/* Program to accompany Rick Wicklin (2016) "Weighted Percentiles", published on The DO Loop blog 29AUG2016 http://blogs.sas.com/content/iml/2016/08/29/weighted-percentiles.html ? */ /*****************************************/ title "Unweighted Percentiles"; /*****************************************/ ods graphics / reset width=300px height=225px; data A; input x wt; datalines; 1 0.25 1.9 0.05 2.2 0.15 3.0 0.25 3.7 0.15 4.1 0.10 5 0.05 ; proc means data=A p20 p40 p60 p80; var x; run; /* You can get an unweighted CDF plot from PROC UNIVARIATE */ ods select cdfplot; proc univariate data=A; cdfplot x / odstitle="Unweighted ECDF"; run; /* Use SAS/IML to calculate percentiles by using the CDF definition. Also create points on the CDF */ /* For information about the BIN function, see http://blogs.sas.com/content/iml/2013/07/15/cut-pts-and-uneven-bins.html */ proc iml; use A; read all var {x}; close; wt = j(nrow(x), 1, 1/nrow(x)); cumWt = cusum(wt); cutPts = 0 // cumWt; /* Use cutpoints to find quantiles */ pctl = do(0.2, 0.8, 0.2); idx = bin(pctl, cutPts); q = x[idx]; print (pctl`)[L="pctl"] q; /* generate data for ECDF */ t = do(0, 0.999, 0.001); idx = bin(t, cutPts); q = x[idx]; *call series(q, t) grid={x y}; create ECDF var {t q x}; append; close; QUIT; title "Unweighted ECDF"; proc sgplot data=ecdf noautolegend; xaxis grid label="x"; yaxis grid offsetmin=0.1 label="Cumulative Proportion"; step x=q y=t; fringe x / lineattrs=(color=black); refline 0 / axis=y; run; /*****************************************/ title "Weighted Percentiles"; /*****************************************/ proc means data=A p20 p40 p60 p80; var x; weight wt; run; /* Same IML code except use weights from data set */ proc iml; use A; read all var {x wt}; close; cumWt = cusum(wt); cutPts = 0 // cumWt; /* Use cutpoints to find quantiles */ pctl = do(0.2, 0.8, 0.2); idx = bin(pctl, cutPts); q = x[idx]; print (pctl`)[L="pctl"] q; /* generate data for WECDF */ t = do(0, 0.999, 0.001); idx = bin(t, cutPts); q = x[idx]; *call series(q, t) grid={x y}; create WECDF var {t q x}; append; close; QUIT; title "Weighted ECDF"; proc sgplot data=wecdf noautolegend; xaxis grid label="x"; yaxis grid offsetmin=0.1 label="Cumulative Proportion"; step x=q y=t; fringe x / lineattrs=(color=black); refline 0 / axis=y; run; /*****************************************/ /* visualize weighted percentiles with bubble plot of weights */ title "Weighted Percentiles"; /*****************************************/ data Bubble; set A; y = 0; radius = sqrt(Wt); run; ods graphics / width = 400px height=120px; proc sgplot data=Bubble noautolegend; refline 0 / axis=y; fringe x / lineattrs=(color=black); bubble x=x y=y size=radius; text x=x y=y text=wt / strip; /* or scatter x=x y=y / markerchar=wt; */ yaxis display=none; run;