/* Program to accompany Wicklin (2017) "Simulate data for a linear regression model" published 25JAN2017 on The DO Loop blog: http://blogs.sas.com/content/iml/2017/01/25/simulate-regression-model-sas.html */ /* SAS program to simulate a random sample that has p=&nCont explanatory variables and &N observations. The explanatory variables are independent and x[i] ~ N(0,1). The error term is epsilon ~ N(0,1.5). The response variable Y satisfies the linear regression model Y = beta[0] + sum( beta[i]*x[i] ) + epsilon */ %let N = 50; /* 1. Specify sample size */ %let nCont = 10; /* 2. Specify the number of continuous variables */ data SimReg1(keep= Y x:); call streaminit(54321); /* set the random number seed */ array x[&nCont]; /* explanatory vars are named x1-x&nCont */ /* 3. Specify model coefficients. You can hard-code values such as array beta[0:&nCont] _temporary_ (-4 2 -1.33 1 -0.8 0.67 -0.57 0.5 -0.44 0.4 -0.36); or you can use a formula such as the following */ array beta[0:&nCont] _temporary_; do j = 0 to &nCont; beta[j] = 4 * (-1)**(j+1) / (j+1); /* formula for beta[j] */ end; do i = 1 to &N; /* for each observation in the sample */ do j = 1 to dim(x); x[j] = rand("Normal"); /* 4. Simulate explanatory variables */ end; eta = beta[0]; /* model = intercept term */ do j = 1 to &nCont; eta = eta + beta[j] * x[j]; /* + sum(beta[j]*x[j]) */ end; epsilon = rand("Normal", 0, 1.5); /* 5. Specify error distrib */ Y = eta + epsilon; /* 6. Y = model + error */ output; end; run; /* Test the simulation. Estimates should be close to parameters */ proc reg data=SimReg1 plots=none; model Y = x: / CLB; ods output ParameterEstimates=PE; ods select FitStatistics ParameterEstimates; quit; /* Visualize whether estimates are close to parameters. Create plot of estimates and 95% CIs. Overlay parameters. */ data Coeffs; do j = 0 to &nCont; Parameter = (-1)**(j+1) * 4 / (j+1); output; end; run; data PE; merge PE Coeffs; run; title "Parameters and Estimates for Simulated Data"; title2 "True Model, N=&N"; proc sgplot data=PE; scatter x=Estimate Y=Variable / XErrorLower=LowerCL XErrorUpper=UpperCL markerattrs=(symbol=CircleFilled) legendlabel="Estimate"; scatter x=Parameter Y=Variable / markerattrs=(symbol=X size=12) legendlabel="Parameter"; refline 0 / axis=x; xaxis discreteorder=data; yaxis grid reverse; run;