/* SAS program to accompany the article "Quantiles and the Flint water crisis" by Rick Wicklin published 17MAY2017 on The DO Loop blog http://blogs.sas.com/content/iml/2017/05/17/quantiles-flint-water-crisis.html */ /* Values of lead concentration in Flint water samples. Use 0 for "not detectable" Data from the Figure 1 in the article "The murky tale of Flint's deceptive water data," by Robert Langkjaer-Bain, Significance magazine, Apr 2017, pp 17-21. */ data FlintObs; label Lead = "Lead Concentration (ppb)"; input lead @@; Exclude = (Lead=20 | Lead=104); /* MDEQ excluded these two large values */ if Exclude then Pb=Lead; else Pb=.; /* for graphing */ datalines; 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 4 4 5 5 5 5 5 5 5 5 6 6 6 6 7 7 7 8 8 9 10 10 11 13 18 20 21 22 29 43 43 104 ; /* compute 90th pctl (P90) for the full set of 71 measurements */ proc means data=FlintObs N p90; var lead; run; /* compute 90th pctl (P90) after excluding two measurements */ proc means data=FlintObs N p90; where Exclude=0; var lead; run; /* Graph and P90 before excluding two observations */ title2; ods graphics / width=800px height=400px; title "Lead Levels in Flint Water Samples Collected by Flint Officials"; footnote j=L "Jan 2015 - Jun 2015"; proc sgplot data=FlintObs; histogram Lead / binwidth=1 scale=count; yaxis grid offsetmin=0 values=(0 to 13) labelpos=top; xaxis offsetmin=0.02 ranges=(0-45 100-105); refline 15 / axis=X label="15 ppb" labelloc=inside lineattrs=(thickness=2); refline 18 / axis=X label="90th Pctl" lineattrs=GraphData2(thickness=2) labelattrs=GraphData2; run; /* Graph and P90 after excluding two observations */ title "Lead Levels in Flint Water Samples After Excluding Two Values"; footnote j=L "Jan 2015 - Jun 2015"; proc sgplot data=FlintObs; histogram Lead / binwidth=1 scale=count legendlabel="Used"; histogram Pb / binwidth=1 scale=count legendlabel="Excluded";; yaxis grid offsetmin=0 values=(0 to 13) labelpos=top; xaxis offsetmin=0.02 ranges=(0-30 100-105); refline 15 / axis=X label="15 ppb" labelloc=inside lineattrs=(thickness=2); refline 13 / axis=X label="90th Pctl (modified)" lineattrs=GraphData3(thickness=2) labelattrs=GraphData3; run; title; footnote; /* CI for P90 http://blogs.sas.com/content/iml/2013/05/06/compute-confidence-intervals-for-percentiles-in-sas.html */ proc univariate data=FlintObs CIPctlDF; where Exclude=0; var Lead; ods select quantiles; run; /* or use PROC QUANTREG and perform a hypothesis test H0: P90 > 15 http://blogs.sas.com/content/iml/2017/02/22/difference-of-medians-sas.html */ proc quantreg data=FlintObs CI=resampling(nrep=10000); where Exclude=0; model lead = / quantile=0.9 seed=12345; estimate 'P90 > 15' Intercept 1 / upper CL testvalue=15; ods select ParameterEstimates Estimates; run;