/* Program to accompany the article "Compute a bootstrap analysis in SAS/IML" by Rick Wicklin, published 10JUL2017 on The DO Loop blog, http://blogs.sas.com/content/iml/2017/07/10/bootstrap-sasiml.html */ data sample; set sashelp.Iris; /* <== load your data here */ where species = "Setosa"; rename PetalWidth=x; /* <== rename the analyzes variable to 'x' */ run; proc univariate data=sample; var x; histogram x; inset N Skewness (6.3) / position=NE; run; /******************************************************/ /* BOOTSTRAP IN SAS/IML */ /******************************************************/ /* Basic bootstrap percentile CI. The following program is based on Chapter 15 of Wicklin (2013) Simulating Data with SAS, pp 288-289. */ /* Basic bootstrap percentile CI. The following program is based on Chapter 15 of Wicklin (2013) Simulating Data with SAS, pp 288-289. */ proc iml; /* Function to evaluate a statistic for each column of a matrix. Return a row vector of statistics, one for each column. */ start EvalStat(M); return skewness(M); /* <== put your computation here */ finish; alpha = 0.05; B = 5000; /* B = number of bootstrap samples */ use sample; read all var "x"; close; /* read univariate data into x */ call randseed(1234567); Est = EvalStat(x); /* 1. compute observed statistic */ s = sample(x, B // nrow(x)); /* 2. generate many bootstrap samples (N x B matrix) */ bStat = T( EvalStat(s) ); /* 3. compute the statistic for each bootstrap sample */ bootEst = mean(bStat); /* 4. summarize bootstrap distrib, such as mean */ SE = std(bStat); /* standard deviation, */ call qntl(CI, bStat, alpha/2 || 1-alpha/2); /* and 95% bootstrap percentile CI */ R = Est || BootEst || SE || CI`; /* combine results for printing */ print R[format=8.4 L="95% Bootstrap Pctl CI" c={"Obs" "BootEst" "StdErr" "LowerCL" "UpperCL"}]; /* 5. visualize distribution */ call symputx('Est', round(Est, 1e-4)); call symputx('LowerCL', round(CI[1], 1e-4)); call symputx('UpperCL', round(CI[2], 1e-4)); refStmt = 'refline &Est / axis=x lineattrs=(color=red) name="Est" legendlabel="Observed Statistic = &Est";' + 'refline &LowerCL &UpperCL / axis=x lineattrs=(color=blue) name="CI" legendlabel="95% Pctl CI";' + 'keylegend "Est" "CI";'; title "Bootstrap Distribution"; call histogram(bStat) label="Skewness" other=refStmt; QUIT; /******************************************************/ /* BOOTSTRAP IN BASE SAS */ /******************************************************/ /* Use the bootstrap to get a 95% CI for the skewness */ /* In Base SAS, you can use PROC SURVEYSELECT and BY-group processing to create a 95% bootstrap percentile interval. See http://blogs.sas.com/content/iml/2016/08/10/bootstrap-confidence-interval-sas.html */ /* 1. compute value of the statistic on original data: Skewness = 0.366 */ proc means data=sample nolabels Skew; var x; output out=outStat skew=; run; data _null_; set outStat; call symputx('Est', round(x, 1e-4)); run; %put &=Est; %let NumSamples = 5000; /* number of bootstrap resamples */ /* 2. Generate many bootstrap samples */ proc surveyselect data=sample NOPRINT seed=1 out=BootSSFreq(rename=(Replicate=SampleID)) method=urs /* resample with replacement */ samprate=1 /* each bootstrap sample has N observations */ /* OUTHITS option to suppress the frequency var */ reps=&NumSamples; /* generate NumSamples bootstrap resamples */ run; /* 3. Compute the statistic for each bootstrap sample */ proc means data=BootSSFreq noprint; by SampleID; freq NumberHits; var x; output out=OutStats skew=Skewness; /* approx sampling distribution */ run; /* 4. Use approx sampling distribution to make statistical inferences */ proc univariate data=OutStats noprint; var Skewness; output out=Pctl pctlpre =CI95_ pctlpts =2.5 97.5 /* compute 95% bootstrap confidence interval */ pctlname=Lower Upper; run; data _null_; set Pctl; call symputx('LowerCL', round(CI95_Lower, 1e-4)); call symputx('UpperCL', round(CI95_Upper, 1e-4)); run; %put &=LowerCL; %put &=UpperCL; proc print noobs; run; /* 5 Visualize the bootstrap distribution */ title "Bootstrap Distribution"; proc sgplot data=OutStats; label Skewness= ; histogram Skewness; /* Optional: draw reference line at observed value and draw 95% CI */ refline &Est / axis=x lineattrs=(color=red) name="Est" legendlabel="Observed Statistic = &Est"; refline &LowerCL &UpperCL / axis=x lineattrs=(color=blue) name="CI" legendlabel="95% Pctl CI"; keylegend "Est" "CI"; run;