/* SAS program to accompany the article "Simulate correlations by using the Wishart distribution" by Rick Wicklin. Published 11OCT2017 on The DO Loop blog https://blogs.sas.com/content/iml/2017/10/11/simulate-correlations-wishart-distribution.html */ /* Simulate bivariate normal samples of size N with correlation rho */ /* https://blogs.sas.com/content/iml/2014/11/26/the-wishart-distribution.html */ %let rho = 0.8; /* correlation for bivariate normal distribution */ %let N = 20; /* sample size */ %let NumSamples = 2500; /* number of simulated samples */ /* Simulate sample correlation coefficients in three steps: 1. Simulate B samples of size N from a bivariate normal distrib w/ correlation rho. 2. Use PROC CORR to compute the sample correlation matrix for each of the B samples. 3. Use the DATA step to extract only the off-diagonal elements */ /* 1. Simulate B samples of size N from a bivariate normal distrib w/ correlation rho. */ proc iml; N = &N; /* sample size */ NumSamples = &NumSamples; call randseed(123456); Sigma = {1 &rho, &rho 1}; Z = randnormal(N*NumSamples, {0 0}, Sigma); x = Z[,1]; y = Z[,2]; ID = colvec( repeat( T(1:NumSamples), 1, N) ); create SimCorr var {ID X Y}; /* define 4 numerical output vars */ append; close; quit; /* 2. Use PROC CORR to compute the sample correlation matrix for each of the B samples. */ proc corr data=SimCorr noprint outP=OutCorr(where=(_TYPE_="CORR") rename=(y=Corr)); by ID; var x y; run; /* 3. Use the DATA step to extract only the off-diagonal elements */ data NormalCorr; /* Retain R[1,2] cell only */ set OutCorr; if mod(_N_,2)=1; /* Keep odd-numbered rows; delete even rows */ label Corr = "Sample Correlation (r)"; drop _NAME_ x; run; /* title "Distribution of r (rho = &rho)"; title2 "X ~ Bivariate MVN, r = corr(X)"; proc sgplot data=DistribCorr; histogram Corr; run; */ /**************************/ /* ALTERNATIVE SIMULATION */ /* generate sample correlation coefficients by using Wishart distribution */ proc iml; call randseed(12345); NumSamples = &NumSamples; DF = &N - 1; /* X ~ N obs from MVN(0, Sigma) */ Sigma = {1 &rho, /* covariance for MVN samples */ &rho 1 }; S = RandWishart(NumSamples, DF, Sigma); /* each row is 2x2 matrix */ Corr = j(NumSamples, 1); /* allocate vector for correlation estimates */ do i = 1 to nrow(S); /* convert to correlation; extract off-diagonal */ Corr[i] = cov2corr( shape(S[i,], 2, 2) )[1,2]; end; title "Distribution of r (rho = &rho)"; title2 "r ~ Wishart(Sigma, DF=19)"; call histogram(Corr) label="Correlation Estimate (r)"; create Wishart var {Corr}; append; close; quit; /* Combine the simulated correlations and construct a comparative histogram. See https://blogs.sas.com/content/iml/2016/03/09/comparative-panel-overlay-histograms-sas.html https://blogs.sas.com/content/iml/2014/08/25/bins-for-histograms.html */ data All; set NormalCorr(keep=Corr) Wishart(in=w); if w then Group = "Wishart"; else Group = "Normal "; run; title "Distribution of r (rho = &rho)"; proc univariate data=All; class Group; histogram Corr / outhist=outhist endpoints=(0.28 to 1 by 0.03) odstitle=title; ods select histogram; run; /* If desired, run nonparametric tests that compare the two distributions */ proc npar1way data=All; class Group; var Corr; run;