/* Program to accompany "An easy way to run thousands of regressions in SAS" by Rick Wicklin. Published on The DO Loop blog http://blogs.sas.com/content/iml/2017/02/13/run-1000-regressions.html */ /* Create wide data with variables Y, X1, X2, X3, .... See http://blogs.sas.com/content/iml/2017/01/25/simulate-regression-model-sas.html */ %let nCont = 100; /* <== Specify the number of continuous variables */ /* Simulate some wide data that firs the regression model. See http://blogs.sas.com/content/iml/2017/01/25/simulate-regression-model-sas.html */ %let N = 50; /* Specify sample size */ data Wide(keep= Y x:); call streaminit(54321); /* set the random number seed */ array x[&nCont]; /* explanatory vars are named x1-x&nCont */ /* 1. Specify model coefficients. You can hard-code values such as array beta[0:&nCont] _temporary_ (-4 2 -1.33 1 -0.8 0.67 -0.57 0.5 -0.44 0.4 -0.36); or you can use a formula such as the following */ array beta[0:&nCont] _temporary_; do j = 0 to &nCont; beta[j] = 4 * (-1)**(j+1) / (j+1); /* formula for beta[j] */ end; do i = 1 to &N; /* for each observation in the sample */ do j = 1 to dim(x); x[j] = rand("Normal"); /* 2. Simulate explanatory variables */ end; eta = beta[0]; /* model = intercept term */ do j = 1 to &nCont; eta = eta + beta[j] * x[j]; /* + sum(beta[j]*x[j]) */ end; epsilon = rand("Normal", 0, 1.5); /* 3. Specify error distrib */ Y = eta + epsilon; /* 4. Y = model + error */ output; end; run; /* Begin analysis. We want to compute the parameter estimates for many regression models of the form Y=X_i. You can use the SLOW way or the BY way. */ /* The SLOW WAY to compute many regressions: Use macro loop and call PROC REG many times to compute model Y = X&i for each call. For a discussion of why you should NOT use this code, see http://blogs.sas.com/content/iml/2012/07/18/simulation-in-sas-the-slow-way-or-the-by-way.html */ /****** DO NOT MIMIC THIS CODE: INEFFICIENT! ******/ %macro RunReg(DSName, NumVars); options nonotes; /* prevents the SAS log from overflowing */ proc datasets nolist; delete AllStats; /* delete this data set if it exists */ run; %do i = 1 %to &NumVars; /* repeat for each x&i */ proc reg data=&DSName noprint outest=PE(rename=(x&i=Value)); /* save parameter estimates */ model Y = x&i; /* model Y = x_i */ quit; /* use PROC APPEND to accumulate statistics */ proc append base=AllStats data=PE; run; %end; options notes; %mend; %RunReg(Wide, &nCont) /****** END OF INEFFICIENT CODE ******/ /* A MORE EFFICIENT WAY to compute many regressions of the form Y = x_i */ /* 1. Transpose from wide to long. Create BY-group variable that identifyies each model. 2. Sort by BY-group variable 3. Call PROC REG and use BY statement to compute all regressions */ /* 1. transpose from wide (Y, X1 ,...,X100) to long (varNum VarName Y Value) */ data Long; set Wide; array x [*] x1-x&nCont; /* <== specify explanatory variables HERE */ do varNum = 1 to dim(x); VarName = vname(x[varNum]); /* variable name in char var */ Value = x[varNum]; /* value for each variable for each obs */ output; end; drop x:; run; /* 2. Sort by BY-group variable */ proc sort data=Long; by VarName; run; /* 3. Call PROC REG and use BY statement to compute all regressions */ proc reg data=Long noprint outest=PE; by VarName; model Y = Value; quit; /* Look at structure of the results. Note sorted by variable name, which might be different than the order of the variables in the Wide data set */ proc print data=PE(obs=5); var VarName Intercept Value; run;