/* Program to accompany "Simultaneous confidence intervals for multinomial proportions" by Rick Wicklin. Published on The DO Loop blog http://blogs.sas.com/content/iml/2017/02/15/confidence-intervals-multinomial-proportions.html */ /*******************************************************/ /* The following functions will construct simultaneous */ /* confidence intervals for multinomial proportions */ /* based on several methods of construction. */ /* Values for the variable 'Method' are: */ /* 1 = Quesenberry and Hurst (1964) */ /* 2 = Goodman (1965) */ /* 3 = naive binomial */ /* 4 = Fitzpatrick and Scott */ /* (alpha <= 0.15 only) */ /* 5 = Q & H type and estimated variance */ /* 6 = Goodman type and estimated variance */ /* */ /* Original code: May and Johnson (1997). "A SAS Macro */ /* for Constructing Simultaneous Confidence Intervals*/ /* for Multinomial Proportions," Computer Methods and*/ /* Programs in Biomedicine, p. 153-162. */ /* */ /* Rewritten by Rick Wicklin, Feb 2016 */ /* */ /*******************************************************/ /* Define library of SAS/IML function */ proc iml; /* helper function that computes interval based on ChiSq value */ start MultCIFromChiSquare(Count, chiSq); n = Count[+]; p = Count / n; a = chiSq + n; b = chiSq + 2*n*p; c = n*p##2; root = sqrt(b##2 - 4*a*c); Lower = (b-root) / (2*a); Upper = (b+root) / (2*a); return (p || Lower || Upper); finish; /* helper function that computes interval based on ChiSq value and estimated variance factor */ start MultCIFromChiSquareVar(Count, chiSq); n = Count[+]; p = Count / n; a = sqrt( chiSq * p#(1-p) / n ); Lower = p - a; Upper = p + a; return (p || Lower || Upper); finish; /* Quesenberry and Hurst (1964) */ start MultCI_QH(Count, alpha); k = nrow(Count); chiSq = quantile("ChiSquare", 1-alpha, k-1); return( MultCIFromChiSquare(Count, chiSq) ); finish; /* Quesenberry and Hurst with estimated variance factor */ start MultCI_QHVar(Count, alpha); k = nrow(Count); chiSq = quantile("ChiSquare", 1-alpha, k-1); return( MultCIFromChiSquareVar(Count, chiSq) ); finish; /* Goodman (1965) */ start MultCI_Goodman(Count, alpha); k = nrow(Count); chiSq = quantile("ChiSquare", 1-alpha/k, 1); /*Bonferroni adjustment */ return( MultCIFromChiSquare(Count, chiSq) ); finish; /* Goodman with estimated variance factor */ start MultCI_GoodmanVar(Count, alpha); k = nrow(Count); chiSq = quantile("ChiSquare", 1-alpha/k, 1); /*Bonferroni adjustment */ return( MultCIFromChiSquareVar(Count, chiSq) ); finish; /* naive binomial method */ start MultCI_Binomial(Count, alpha); chiSq = quantile("chisquare", 1-alpha, 1); n = Count[+]; p = Count / n; a = sqrt(chiSq / (4*n)); Lower = p - a; Upper = p + a; return (p || Lower || Upper); finish; /* Fitzpatrick and Scott (1987) */ start MultCI_FS(Count, alpha); if alpha <= 0.016 then c2 = quantile("chisquare", 1-alpha/2, 1); else if alpha > 0.016 && alpha <= 0.15 then c2 = (8/9)*quantile("chisquare", 1-alpha/3, 1); else do; print 'alpha must be less than 0.15 to use the Fitzpatrick and Scott method'; return( j(nrow(Count), 3, .) ); end; n = Count[+]; p = Count / n; a = sqrt(c2 / (4*n)); Lower = p - a; Upper = p + a; return (p || Lower || Upper); finish; /*******************************************************/ /* Driver function for May and Johnson (1997) functions*/ /* Print the CI results for various tests. */ /* Values for the variable 'type' are: */ /* 1 = Quesenberry and Hurst (1964) */ /* 2 = Goodman (1965) [default] */ /* 3 = naive binomial */ /* 4 = Fitzpatrick and Scott */ /* (alpha<=0.15 only) */ /* 5 = Q & H type using observed */ /* proportions as variance */ /* 6 = Goodman type using observed */ /* proportions as variance */ /*******************************************************/ start MultCI(Count, alpha=0.05, Method=2); if Method=1 then return MultCI_QH(Count, alpha); else if Method=2 then return MultCI_Goodman(Count, alpha); else if Method=3 then return MultCI_Binomial(Count, alpha); else if Method=4 then return MultCI_FS(Count, alpha); else if Method=5 then return MultCI_QHVar(Count, alpha); else if Method=6 then return MultCI_GoodmanVar(Count, alpha); finish; start MultCIPrint(Category, Count, alpha=0.05, Method=2); title = { "Method of Quesenberry and Hurst (1964)" "Method of Goodman (1965)" "Naive binomial method" "Method of Fitzpatrick and Scott (1987)" "Method of Quesenberry and Hurst (1964), using Var = p(1-p)" "Method of Goodman (1965), using Var = p(1-p) " }; print (title[Method]); if type(Category)='C' then Cat = Category; else Cat = putn(Category, "Best6."); alphaText = putn(alpha, "Best6."); labl = "Simultaneous Confidence Intervals"; CI = MultCI(Count, alpha, Method); result = Count || CI; onemalpha = strip(putn(1-alpha, "Percent7.")); varNames = {"Count" "Proportion"} || (onemalpha + " Lower") || (onemalpha + " Upper"); print result[Label=labl rowname=Cat colname=varNames format=Best6.]; finish; store module=_all_; quit; /* Example calling syntax: data Psych; input Category $21. Count; datalines; Neurotic 91 Depressed 49 Schizophrenic 37 Personality disorder 43 ; proc iml; load module=(MultCI MultCIPrint); use Psych; read all var {"Category" "Count"}; close; alpha = 0.05; call MultCIPrint(Category, Count, alpha, 2); * Goodman = 2; call MultCIPrint(Category, Count, alpha, 4); * Fitzpatrick and Scott; CI = MultCI(Count, alpha, 2); */