/** SAS/IML Studio program to estimate the probability that Rick Wicklin's grocery bill will be a whole dollar amount. **/ /** In SAS/IML Studio, use the SUBMIT/ENDSUBMIT statements to run the DATA step and procedures. For PROC IML, delete the SUBMIT/ENDSUBMIT statements. **/ submit; /** Three week's worth of grocery items that Rick buys. **/ data prices; input item @@; Last2 = item - floor(item); datalines; 1.18 1.18 3.19 1.99 4.99 1.00 1.19 3.69 6.99 2.50 2.00 9.49 3.15 3.15 2.00 1.89 4.29 1.39 0.99 2.99 2.75 3.99 2.69 3.93 1.89 0.92 2.49 1.00 0.02 3.56 1.19 1.99 1.00 1.67 3.37 1.59 2.99 0.75 2.99 16.22 1.99 2.59 1.49 4.87 4.99 3.99 1.99 1.50 2.99 1.98 3.99 2.11 0.99 0.99 0.70 1.00 1.59 1.19 2.51 1.49 1.79 0.59 2.83 3.95 1.69 1.69 1.00 1.88 1.67 3.36 1.67 3.29 1.69 1.55 0.40 0.63 1.50 1.50 1.19 4.99 5.99 1.99 3.19 0.89 1.00 2.84 2.99 3.00 4.29 2.10 0.89 0.89 0.89 0.54 0.69 1.49 0.77 2.50 1.69 2.75 2.99 0.99 3.19 1.99 0.40 0.35 1.35 2.10 1.44 1.00 1.21 0.50 0.53 2.49 0.63 3.34 13.49 ; run; /** proc freq data=prices order=freq; table Last2 / nocum; run; **/ endsubmit; proc iml; /** read prices of individual items **/ use prices; read all var {"Item"}; close prices; SalesTax = 0.0775; /** sales tax in Wake county, NC **/ numItems = 40; NumResamples = 5000; /** number of receipts to simulate **/ call randseed(4321); /** LOAD or define the SampleReplaceUni module here. **/ start SampleReplaceUni(A, nSamples, k); results = j(nSamples, k); /** allocate result matrix **/ call randgen(results, "Uniform"); /** fill with random U(0,1) **/ n = ncol(A); /** number of elements **/ results = ceil(n*results); /** convert to integers 1,2,...n **/ return(shape(A[results], nSamples)); /** reshape and return from A **/ finish; /** one simulation **/ /** resample from the original data. **/ y = sampleReplaceUni(Item`, NumResamples, NumItems); pretax = y[,+]; /** sum prices to get pre-tax cost of each receipt **/ total = round((1 + SalesTax) # pretax, 0.01);/** add sales tax; round **/ whole = (total=int(total)); /** find receipts that are whole dollars **/ Prop = whole[:]; /** proportion of bills that are whole-dollar amounts **/ print NumItems NumResamples (sum(whole))[label="Num00"] Prop[format=PERCENT7.2 label="Pct"]; /**************************************/ /** Repeat the simulation many times to examine the distribution of the proportion estimate **/ numItems = 40; NumRep = 200; /* number of bootstrap replications */ Prop = j(NumRep,1); do k = 1 to NumRep; y = sampleReplaceUni(Item`, NumResamples, NumItems); pretax = y[,+]; /** sum prices to get pre-tax cost of each receipt **/ total = round((1 + SalesTax) # pretax, 0.01);/** add sales tax; round **/ whole = (total=int(total)); /** find receipts that are whole dollars **/ Prop[k] = whole[:]; /** proportion of bills that are whole-dollar amounts **/ end; meanProp = Prop[:]; /** Qntl: compute quantiles (Defn. 5 from the UNIVARIATE doc) **/ /** Arguments: q upon return, q contains the specified sample quantiles of the data. x is a matrix. The module computes quantiles for each column. p specifies the quantiles. For example, 0.5 specifies the median, whereas {0.25 0.75} specifies the first and third quartiles. This module does not handle missing values in the data. **/ start Qntl(q, x, p); /** definition 5 from UNIVARIATE doc **/ n = nrow(x); /** assume nonmissing data **/ q = j(ncol(p), ncol(x));/** allocate space for return values **/ do j = 1 to ncol(x); /** for each column of x... **/ y = x[,j]; call sort(y,1); /** sort the values **/ do i = 1 to ncol(p); /** for each quantile **/ k = n*p[i]; /** find position in ordered data **/ k1 = int(k); /** find indices into ordered data **/ k2 = k1 + 1; g = k - k1; if g>0 then q[i,j] = y[k2];/** return a data value **/ else /** average adjacent data **/ q[i,j] = (y[k1]+y[k2])/2; end; end; finish; call Qntl(q, Prop, {0.05 0.95}); /** IML 9.22 function; or use Qntl module **/ print numItems NumRep meanProp q[label="90% CI"]; /** In SAS/IML Studio, create the histogram and plot the mean of the sampling distribution. In PROC IML, write the data to a data set and call SGPLOT. **/ declare Histogram hist; hist = Histogram.Create("Dist of mean", Prop); hist.SetAxisLabel(XAXIS, "Proportion of Simulated Receipts That Are Whole-Dollar Amounts"); title = tranwrd("Distribution of Proportions (%d Simulations)", "%d", strip(char(NumRep))); hist.SetTitleText(title, true); hist.ShowPercentage(); hist.DrawUseDataCoordinates(); hist.DrawSetPenAttributes(BLACK, DASHED, 1); hist.DrawLine(meanProp, 0, meanProp, 100); /** bootstrap mean **/ hist.ReBin(0.01, 0.001);