/* Program to accompany "A comparison of 9 definitions of sample quantiles" by Rick Wicklin, published 24May2017 on The DO Loop blog, http://blogs.sas.com/content/iml/2017/05/24/definitions-sample-quantiles.html SAS program to compare the nine sample quantile definitions in Hyndman, R. J. and Fan, Y. (1996) "Sample quantiles in statistical packages", American Statistician, 50, 361–365. https://www.amherst.edu/media/view/129116/original/Sample+Quantiles.pdf See also https://en.wikipedia.org/wiki/Quantile */ title; footnote; ods graphics/reset antialiasmax=10000; data Q; input x @@; datalines; 0 1 1 1 2 2 2 4 5 8 ; /* If you prefer, use 100 random uniform observations */ /* data Q; call streaminit(12345); do i = 1 to 100; x = 8*rand('uniform'); output; end; drop i; run; proc sort data=Q; by x; run; */ /* SAS/IML function to compute all nine sample quantiles that are discussed in Hyndman, R. J. and Fan, Y. (1996) and https://en.wikipedia.org/wiki/Quantile */ title "Sample Quantiles"; /* Compare all quantile estimation methods */ proc iml; /* implement sample quantiles in Hyndman and Fan (1996) */ start SampQuantile(y, probs={0.25, 0.5, 0.75}, Type=2); x = colvec(y); p = colvec(probs); if element(Type, {1 2 3 4 6}) then do; SASType = {3 5 2 1 . 4}; /* convert from Type to QNTLDEF */ call qntl(q, x, p, SASType[Type]); return q; end; if any(x=.) then x = x[loc(x ^= .)]; /* remove missing values */ call sort(x); N = nrow(x); if Type=5 then return quantileM(x, p, 0.5, 1/(2*N)); else if Type=7 then return quantileM(x, p, 1-p, 0); else if Type=8 then return quantileM(x, p, (p+1)/3, 2/(3*N+1)); else if Type=9 then return quantileM(x, p, (2*p+3)/8, 5/(8*N+2)); finish; /* The following function assumes x is a sorted column vector and contains nonmissing values. p is a column vector of probabilities. The interpolation is q = (1-g)#x[j] + g#x[j+1]; where j = floor(n*p + m); g = n*p + m - j; and m is a constant determined by the definition. For extreme values of p, the methods return x[1] or x[N]. */ start quantileM(x, p, m, c); N = nrow(x); /* assume nonmissing */ j = floor(N*p + m); g = n*p + m - j; if all(p>=c & p<1-c) then /* usual case */ return (1-g)#x[j] + g#x[j+1]; /* otherwise, handle extreme values of p */ /* When p < c, use x[1]. When p = 1 - c, use x[N] */ q = j(nrow(p),1,.); idx1 = loc(p < c); /* set these quantiles to x[1] */ idxN = loc(p >= 1-c); /* set these quantiles to x[N] */ idx = loc(p > c & p < 1-c); if ncol(idx1)>0 then q[idx1] = x[1]; if ncol(idxN)>0 then q[idxN] = x[N]; if ncol(idx) >0 then do; j = j[idx]; g = g[idx]; q[idx] = (1-g)#x[j] + g#x[j+1]; end; return q; finish; store module=(SampQuantile quantileM); quit; /* Driver SAS/IML program to compare all 9 methods */ proc iml; load module=(SampQuantile); use Q; read all var "x"; close; prob = T(1:999)/1000; * fine mesh of prob values; create Pctls var {"Type" "quantile" "prob" "x"}; do t = 1 to 9; quantile = SampQuantile(x, prob, t); Type = j(nrow(prob), 1, t); append; end; close; quit; title "Sample Quantiles in Statistical Software"; footnote justify=L "Definitions from Hyndman & Fan (TAS, 1996)"; proc sgpanel data=Pctls noautolegend; panelby Type / onepanel rows=3; scatter x=quantile y=prob/ markerattrs=(size=3 symbol=CircleFilled); fringe x; rowaxis offsetmax=0 offsetmin=0.05 grid values=(0 to 1 by 0.1) label="Cumulative Proportion"; refline 0 1 / axis=y; colaxis display=(nolabel); run; footnote; /* optional: overlay two or three methods. For example, overly type=2 and type=7, which are the default methods for SAS and R */ proc format; value QntlDef 2 = "SAS Default (Type=2)" 7 = "R Default (Type=7)"; run; title "Sample Quantiles"; title2 "SAS Default vs. R Default"; proc sgplot data=Pctls; where Type in (2, 7); format Type QntlDef.; scatter x=quantile y=prob/ group=Type markerattrs=(size=5 symbol=CircleFilled) name="ecdf"; fringe x / lineattrs=(thickness=3 color=black); yaxis offsetmax=0 offsetmin=0.05 grid values=(0 to 1 by 0.1) label="Cumulative Proportion"; refline 0 1 / axis=y; xaxis display=(nolabel); keylegend "ecdf" / location=inside position=topleft across=1 type=markercolor; run; title; /* Use random uniform on (0,8) and show that the quantiles are practially the same */