/* SAS program to accompany the article "An easier way to run thousands of regressions" by Rick Wicklin, published 25APR2018 on The DO Loop blog: https://blogs.sas.com/content/iml/2018/04/25/sweep-thousands-of-regressions.html This program shows how to run thousands of regressions by using the SWEEP operator in SAS/IML. You can also use PROC REG, as shown in https://blogs.sas.com/content/iml/2017/02/13/run-1000-regressions.html */ /* Simulate wide data with variables Y, X1, X2, X3, .... See http://blogs.sas.com/content/iml/2017/01/25/simulate-regression-model-sas.html */ %let nCont = 1000; /* <== Specify the number of continuous variables */ %let N = 50; /* Specify sample size */ data Wide(keep= Y x:); call streaminit(54321); /* set the random number seed */ array x[&nCont]; /* explanatory vars are named x1-x&nCont */ /* 1. Specify model coefficients. You can hard-code values such as array beta[0:&nCont] _temporary_ (-4 2 -1.33 1 -0.8 0.67 -0.57 0.5 -0.44 0.4 -0.36); or you can use a formula such as the following */ array beta[0:&nCont] _temporary_; do j = 0 to &nCont; beta[j] = 4 * (-1)**(j+1) / (j+1); /* formula for beta[j] */ end; do i = 1 to &N; /* for each observation in the sample */ do j = 1 to dim(x); x[j] = rand("Normal"); /* 2. Simulate explanatory variables */ end; eta = beta[0]; /* model = intercept term */ do j = 1 to &nCont; eta = eta + beta[j] * x[j]; /* + sum(beta[j]*x[j]) */ end; epsilon = rand("Normal", 0, 1.5); /* 3. Specify error distrib */ Y = eta + epsilon; /* 4. Y = model + error */ output; end; run; /* Begin analysis. We want to compute the parameter estimates for many regression models of the form Y=X_i. You can use the SWEEP operator */ /* program to compute parameter estimates for Y = b0 + b1*X_k, k=1,2,... */ proc iml; XVarNames = "x1":"x&nCont"; /* names of explanatory variables */ p = ncol(XVarNames ); /* number of X variables, excluding intercept */ varNames = XVarNames || "Y"; /* name of all data variables */ use Wide; read all var varNames into M; close; A = j(nrow(M), 3, 1); /* columns for intercept, X_k, Y */ A[ ,3] = M[ ,ncol(M)]; /* put Y in 3rd col */ ParamEst = j(2, p, .); /* allocate 2 x p matrix */ do k = 1 to &nCont; A[ ,2] = M[ ,k]; /* X_k in 2nd col */ S1 = sweep(A`*A, {1 2}); /* sweep in intercept and X_k */ ParamEst[ ,k] = S1[{1 2}, 3]; /* estimates for model Y = b0 + b1*X_k */ end; print (paramEst[,1:6])[r={"Intercept" "Slope"} c=XVarNames]; quit; /* Now solve thousands of regression problems of the form Y_i = b0 + b1*X */ /* Create a regression model */ proc reg data=sashelp.iris plots=none; where Species="Virginica"; model SepalLength = SepalWidth; run; /* The prameter estimates are RMSE = 5.71; b0 = 39.07; b1 = 0.902; Use these values to simulate 1,000 new responses for the same fixed values of the explanatory variable. For more details, see Wicklin (2013, p. 203) _Simulating Data with SAS_ */ %let NumSim = 1000; /* number of Y variables */ data RegSim(drop=RMSE b0 b1 k Species); set Sashelp.Iris(where =(Species="Virginica") keep=Species SepalWidth rename=(SepalWidth=X)); /* change name to X */ RMSE = 5.71; b0 = 39.07; b1 = 0.902; /* param estimates */ array Y[&NumSim]; call streaminit(321); do k = 1 to &NumSim; Y[k] = b0 + b1*X + rand("Normal", 0, RMSE); /* simulate responses */ end; output; run; /* You can solve this problem very easily in PROC REG because the procedure supports multiple responses */ proc reg data=RegSim noprint outest=PE; model Y1-Y&numSim = X; run; quit; /* Alternately, you can use a single call to the SWEEP function to compute parameter estimates for Y_k = b0 + b1*X, k=1,2,... */ proc iml; YVarNames = "Y1":"Y&numSim"; /* names of explanatory variables */ varNames = "X" || YVarNames; /* name of all data variables */ use RegSim; read all var varNames into M; close; M = j(nrow(M), 1, 1) || M; /* add intercept column */ S1 = sweep(M`*M, {1 2}); /* sweep in intercept and X */ ParamEst = S1[{1 2}, 3:ncol(M)]; /* estimates for models Y_i = X */ title "Parameter Estimates for 1,000 Simulated Responses"; call scatter(ParamEst[1,], ParamEst[2,]) label={"Intercept" "X"} other="refline 39.07/axis=x; refline 0.902/axis=y;"; quit;