/* Program to accompany the article "The bias-corrected and accelerated (BCa) bootstrap interval" by Rick Wicklin, published 12JUL2017 on The DO Loop blog, http://blogs.sas.com/content/iml/2017/07/12/bca-bootstrap-interval.html Chernick and LaBudde (p. 78) state, "Unfortunately, in small to moderate samples for asymmetric or heavy-tailed distributions, the percentile method is not very good and so modifications are required to improve it." (p. 85) "The BCa method incorporates a parameter that Efron named the 'acceleration constant.' It is based on the third moment and corrects for skewness." (p. 86) "The method has a second-order accuracy property." */ /*****************************************************/ data sample; set sashelp.iris; /* load your data here */ where species = "Setosa"; rename PetalWidth=x; /* following code analyzes 'x' variable */ run; /* Bootstrap BCa CI to explore variation of skewness */ proc iml; /* If x is univariate, you can construct a matrix where each column contains a jackknife sample. Use for univariate column vector x when n <= 20000 See https://blogs.sas.com/content/iml/2017/06/21/jackknife-estimate-standard-error-sas.html */ start JackSampMat(x); n = nrow(x); S = j(n-1, n,0); do i = 1 to n; S[,i] = remove(x, i)`; /* transpose to column vevtor */ end; return S; finish; store module=JackSampMat; QUIT; option linesize = 128; /* based on Chapter 7 and Appendix D MATLAB code by Martinez and Martinez, 2002, Computational Statistics Handbook with MATLAB */ proc iml; load module=(JackSampMat); /* compute bias-correction factor from the proportion of bootstrap estimates that are less than the observed estimate */ start bootBC(bootEst, Est); B = ncol(bootEst)*nrow(bootEst); /* number of bootstrap samples */ propLess = sum(bootEst < Est)/B; /* proportion of replicates less than observed stat */ z0 = quantile("normal", propLess); /* bias correction */ return z0; finish; /* compute acceleration factor, which is related to the skewness of bootstrap estimates. Use jackknife replicates to estimate. */ start bootAccel(x); M = JackSampMat(x); /* each column is jackknife sample */ jStat = EvalStat(M); /* row vector of jackknife replicates */ jackEst = mean(jStat`); /* jackknife estimate */ num = sum( (jackEst-jStat)##3 ); den = sum( (jackEst-jStat)##2 ); ahat = num / (6*den##(3/2)); /* ahat based on jackknife ==> not random */ return ahat; finish; /* Input: matrix where each column of X is a bootstrap sample. Return a row vector of statistics, one for each column. */ start EvalStat(M); return skewness(M); /* <== put your computation here */ finish; alpha = 0.05; B = 5000; /* B = number of bootstrap samples */ use sample; read all var "x"; close; /* read univariate data into x */ call randseed(1234567); Est = EvalStat(x); /* 1. compute observed statistic */ s = sample(x, B // nrow(x)); /* 2. generate many bootstrap samples (N x B matrix) */ bStat = T( EvalStat(s) ); /* 3. compute the statistic for each bootstrap sample */ bootEst = mean(bStat); /* 4. summarize bootstrap distrib, such as mean */ /*------ for comparison, compute Pctl CI -----------------------------------*/ SE = std(bStat); /* standard error */ call qntl(CI, bStat, alpha/2 || 1-alpha/2); /* 95% bootstrap percentile CI */ R = Est || BootEst || SE || CI`; /* combine results for printing */ print R[c={"Obs" "BootEst" "StdErr" "LowerCL" "UpperCL"} format=8.4 L="95% Bootstrap Pctl CI"]; /*--------------------------------------------------------------------------*/ z0 = bootBC(bStat, Est); /* 5. bias-correction factor */ ahat = bootAccel(x); /* 6. ahat = acceleration of std error */ print z0 ahat; /* 7. use z0 and ahat to adjust quantiles for 100*(1-alpha)% bootstrap BCa interval */ zL = z0 + quantile("normal", alpha/2); alpha1 = cdf("normal", z0 + zL / (1-ahat*zL)); zU = z0 + quantile("normal", 1-alpha/2); alpha2 = cdf("normal", z0 + zU / (1-ahat*zU)); call qntl(CI, bStat, alpha1//alpha2); /* BCa CI */ R = Est || BootEst || CI`; /* combine results for printing */ print R[format=8.4 L="95% Bootstrap Bias-Corrected CI (BCa)" c={"Obs" "BootEst" "LowerCL" "UpperCL"}]; /* 8. visualize distribution */ call symputx('Est', round(Est, 1e-4)); call symputx('LowerCL', round(CI[1], 1e-4)); call symputx('UpperCL', round(CI[2], 1e-4)); refStmt = 'refline &Est / axis=x lineattrs=(color=red) name="Est" legendlabel="Observed Statistic = &Est";' + 'refline &LowerCL &UpperCL / axis=x lineattrs=(color=blue) name="CI" legendlabel="95% BCa CI";' + 'keylegend "Est" "CI";'; title "Bootstrap Distribution"; call histogram(bStat) label="Skewness" other=refStmt; QUIT; title;footnote;