/* Program to accompany "The distribution of colors for plain M&M candies" by Rick Wicklin. Published on The DO Loop blog http://blogs.sas.com/content/iml/2017/02/20/proportion-of-colors-mandms.html */ /********************************************************/ data MandMs; input Color \$7. Count; datalines; Red 108 Orange 133 Yellow 103 Green 139 Blue 133 Brown 96 ; ods graphics / reset; title 'Sample Distribution of Colors for M&Ms'; title2 "N = 712"; proc sgplot data=MandMs noautolegend; styleattrs datacolors=(red orange yellow green blue CX593B18) datacontrastcolors=(black); vbar Color / response=Count group=Color stat=percent datalabel; xaxis discreteorder=data display=(noticks nolabel); yaxis display=(nolabel) grid; *refline 0.13 0.14 0.16 0.2 0.24 / axis=y; run; proc freq data = MandMs order=data; weight Count; tables Color / nocum chisq /* 2008 proportions: red orange yellow green blue brown */ testp=(0.13 0.20 0.14 0.16 0.24 0.13); run; /********************************************************/ /* Read data into SAS/IML vector and run Goodman method */ /* Obtain file conint.sas from */ /* http://blogs.sas.com/content/iml/2017/02/15/confidence-intervals-multinomial-proportions.html */ /********************************************************/ %include "conint.sas"; proc iml; load module=(MultCI MultCIPrint); use MandMs; read all var {"Color" "Count"}; close; alpha = 0.05; call MultCIPrint(Color, Count, alpha, 2); /* Goodman's method */ CI = MultCI(Count, alpha, 2); /* Goodman = 2 */ Estimate = CI[,1]; Lower = CI[,2]; Upper = CI[,3]; create CIs var {"Color" "Estimate" "Lower" "Upper"}; append; close; quit; data dattrs; ID = "MandMs"; input Value \$11.; if Value="Brown" then MarkerColor="CX593B18"; else MarkerColor = Value; datalines; Red Orange Yellow Green Blue Brown ; /* 2008 */ data CIs; set CIs; labl = "m"; char="|"; if Color="Red" then Expected=0.13; else if Color="Orange" then Expected=0.20; else if Color="Yellow" then Expected=0.14; else if Color="Green" then Expected=0.16; else if Color="Blue" then Expected=0.24; else if Color="Brown" then Expected=0.13; run; ods graphics / width=600px height=336px; title 'Observed vs 2008 Proportions for M&M Candies'; title2 '95% Simultaneous Confidence Intervals, N = 712'; proc sgplot data=CIs DATTRMAP=dattrs noautolegend; styleattrs wallcolor=CXDDDDDD; xaxis grid label="Proportion"; yaxis grid display=(nolabel); scatter x=Expected y=Color / group=Color ATTRID=MandMs markerchar=char markercharattrs=(size=23) name="exp" legendlabel="Expected"; scatter x=Estimate y=Color / group=color ATTRID=MandMs xerrorlower=Lower xerrorupper=upper markerattrs=(Size=21 symbol=CircleFilled) errorbarattrs=(thickness=3) noerrorcaps name="obs" legendlabel="Observed"; text x=Estimate y=Color text=labl / textattrs=(size=12 family=arial color=white weight=bold); run; /* M&M/Mars 2017 proportions, CLV Plant: M&M'S MILK AND DARK CHOCOLATE CLV: 20.7% Cyan Blue, 20.5% Orange, 19.8% Green, 13.5% Bright Yellow, 13.1% Red, 12.4% Brown. */ data CIs; set CIs; labl = "m"; char="|"; if Color="Red" then Expected=0.131; else if Color="Orange" then Expected=0.205; else if Color="Yellow" then Expected=0.135; else if Color="Green" then Expected=0.198; else if Color="Blue" then Expected=0.207; else if Color="Brown" then Expected=0.124; run; ods graphics / width=600px height=336px; title 'Observed vs 2017 Proportions (CLV Plant) for M&M Candies'; title2 '95% Simultaneous Confidence Intervals, N = 712'; proc sgplot data=CIs DATTRMAP=dattrs noautolegend; styleattrs wallcolor=CXECECEC; xaxis grid label="Proportion" values=(0.1 to 0.24 by 0.02); yaxis grid display=(nolabel); scatter x=Expected y=Color / group=Color ATTRID=MandMs markerchar=char markercharattrs=(size=23) name="exp" legendlabel="Expected"; scatter x=Estimate y=Color / group=color ATTRID=MandMs xerrorlower=Lower xerrorupper=upper markerattrs=(Size=21 symbol=CircleFilled) errorbarattrs=(thickness=3) noerrorcaps name="obs" legendlabel="Observed"; text x=Estimate y=Color text=labl / textattrs=(size=12 family=arial color=white weight=bold); run; proc freq data = MandMs order=data; weight Count; tables Color / nocum chisq /* 2017 CLV proportions: red orange yellow green blue brown */ testp=(0.131 0.205 0.135 0.198 0.207 0.124); run;