/* Program to accompany 
   "Simultaneous confidence intervals for multinomial proportions"
   by Rick Wicklin. Published on The DO Loop blog 
    http://blogs.sas.com/content/iml/2017/02/15/confidence-intervals-multinomial-proportions.html
*/


/*******************************************************/
/* The following functions will construct simultaneous */
/* confidence intervals for multinomial proportions    */
/* based on several methods of construction.           */
/* Values for the variable 'Method' are:               */
/*   1 = Quesenberry and Hurst (1964)                  */
/*   2 = Goodman (1965)                                */
/*   3 = naive binomial                                */
/*   4 = Fitzpatrick and Scott                         */
/*       (alpha <= 0.15 only)                          */
/*   5 = Q & H type and estimated variance             */
/*   6 = Goodman type  and estimated variance          */
/*                                                     */
/* Original code: May and Johnson (1997). "A SAS Macro */
/*   for Constructing Simultaneous Confidence Intervals*/
/*   for Multinomial Proportions," Computer Methods and*/
/*   Programs in Biomedicine, p. 153-162.              */
/*                                                     */
/* Rewritten by Rick Wicklin, Feb 2016                 */
/*                                                     */
/*******************************************************/

/* Define library of SAS/IML function */
proc iml;
/* helper function that computes interval based on ChiSq value */
start MultCIFromChiSquare(Count, chiSq);
   n = Count[+];
   p = Count / n;
   a = chiSq + n;
   b = chiSq + 2*n*p;
   c = n*p##2;
   root = sqrt(b##2 - 4*a*c);
   Lower = (b-root) / (2*a);
   Upper = (b+root) / (2*a);
   return (p || Lower || Upper);
finish;

/* helper function that computes interval based on ChiSq 
   value and estimated variance factor */
start MultCIFromChiSquareVar(Count, chiSq);
   n = Count[+];
   p = Count / n;
   a = sqrt( chiSq * p#(1-p) / n );
   Lower = p - a;
   Upper = p + a;
   return (p || Lower || Upper);
finish;

/* Quesenberry and Hurst (1964) */
start MultCI_QH(Count, alpha);
   k = nrow(Count);
   chiSq = quantile("ChiSquare", 1-alpha, k-1);
   return( MultCIFromChiSquare(Count, chiSq) );
finish;

/* Quesenberry and Hurst with estimated variance factor */
start MultCI_QHVar(Count, alpha);
   k = nrow(Count);
   chiSq = quantile("ChiSquare", 1-alpha, k-1);
   return( MultCIFromChiSquareVar(Count, chiSq) );
finish;

/* Goodman (1965) */
start MultCI_Goodman(Count, alpha);
   k = nrow(Count);
   chiSq = quantile("ChiSquare", 1-alpha/k, 1); /*Bonferroni adjustment */
   return( MultCIFromChiSquare(Count, chiSq) );
finish;

/* Goodman with estimated variance factor */
start MultCI_GoodmanVar(Count, alpha);
   k = nrow(Count);
   chiSq = quantile("ChiSquare", 1-alpha/k, 1); /*Bonferroni adjustment */
   return( MultCIFromChiSquareVar(Count, chiSq) );
finish;

/* naive binomial method */
start MultCI_Binomial(Count, alpha);
   chiSq = quantile("chisquare", 1-alpha, 1);
   n = Count[+];
   p = Count / n;
   a = sqrt(chiSq / (4*n));
   Lower = p - a;
   Upper = p + a;
   return (p || Lower || Upper);
finish;

/* Fitzpatrick and Scott (1987)                */
start MultCI_FS(Count, alpha);
   if alpha <= 0.016 then
      c2 = quantile("chisquare", 1-alpha/2, 1);
   else if alpha > 0.016 && alpha <= 0.15 then
      c2 = (8/9)*quantile("chisquare", 1-alpha/3, 1);
   else do;
      print 'alpha must be less than 0.15 to use the Fitzpatrick and Scott method';
      return( j(nrow(Count), 3, .) );
   end;
   n = Count[+];
   p = Count / n;
   a = sqrt(c2 / (4*n));
   Lower = p - a;
   Upper = p + a;
   return (p || Lower || Upper);
finish;

/*******************************************************/
/* Driver function for May and Johnson (1997) functions*/
/* Print the CI results for various tests.             */
/* Values for the variable 'type' are:                 */
/*                   1 = Quesenberry and Hurst (1964)  */
/*                   2 = Goodman (1965) [default]      */
/*                   3 = naive binomial                */
/*                   4 = Fitzpatrick and Scott         */
/*                       (alpha<=0.15 only)            */
/*                   5 = Q & H type using observed     */
/*                       proportions as variance       */
/*                   6 = Goodman type using observed   */
/*                       proportions as variance       */
/*******************************************************/
start MultCI(Count, alpha=0.05, Method=2);
   if Method=1 then
      return MultCI_QH(Count, alpha);
   else if Method=2 then
      return MultCI_Goodman(Count, alpha);
   else if Method=3 then 
      return MultCI_Binomial(Count, alpha);
   else if Method=4 then
      return MultCI_FS(Count, alpha);
   else if Method=5 then 
      return MultCI_QHVar(Count, alpha);
   else if Method=6 then
      return MultCI_GoodmanVar(Count, alpha);
finish;


start MultCIPrint(Category, Count, alpha=0.05, Method=2);
   title = {
   "Method of Quesenberry and Hurst (1964)"
   "Method of Goodman (1965)"
   "Naive binomial method"
   "Method of Fitzpatrick and Scott (1987)"
   "Method of Quesenberry and Hurst (1964), using Var = p(1-p)"
   "Method of Goodman (1965), using Var = p(1-p) "
   };

   print (title[Method]);
   if type(Category)='C' then Cat = Category;
   else Cat = putn(Category, "Best6.");
   alphaText = putn(alpha, "Best6.");
   labl = "Simultaneous Confidence Intervals";
   CI = MultCI(Count, alpha, Method);
   result = Count || CI;
   onemalpha = strip(putn(1-alpha, "Percent7."));
   varNames = {"Count" "Proportion"} || (onemalpha + " Lower") ||
              (onemalpha + " Upper");
   print result[Label=labl rowname=Cat
                colname=varNames format=Best6.];
finish;

store module=_all_;
quit;

/* Example calling syntax: 
data Psych;
input Category $21. Count;
datalines;
Neurotic              91
Depressed             49
Schizophrenic         37
Personality disorder  43
;

proc iml;
load module=(MultCI MultCIPrint);
use Psych; read all var {"Category" "Count"}; close;

alpha = 0.05;
call MultCIPrint(Category, Count, alpha, 2);  * Goodman = 2;
call MultCIPrint(Category, Count, alpha, 4);  * Fitzpatrick and Scott;

CI = MultCI(Count, alpha, 2);   
*/