/* SAS program to accompany the article
   "Simulate correlations by using the Wishart distribution"
   by Rick Wicklin. Published 11OCT2017 on The DO Loop blog
   https://blogs.sas.com/content/iml/2017/10/11/simulate-correlations-wishart-distribution.html
*/


/* Simulate bivariate normal samples of size N with correlation rho */
/* https://blogs.sas.com/content/iml/2014/11/26/the-wishart-distribution.html */
%let rho = 0.8;           /* correlation for bivariate normal distribution */
%let N = 20;              /* sample size */
%let NumSamples = 2500;   /* number of simulated samples */

/* Simulate sample correlation coefficients in three steps:
   1. Simulate B samples of size N from a bivariate normal distrib w/ correlation rho.
   2. Use PROC CORR to compute the sample correlation matrix for each of the B samples.
   3. Use the DATA step to extract only the off-diagonal elements
*/

/*    1. Simulate B samples of size N from a bivariate normal distrib w/ correlation rho.
*/
proc iml;
N = &N;                   /* sample size */
NumSamples = &NumSamples;
call randseed(123456);
Sigma = {1     &rho,
         &rho  1};
Z = randnormal(N*NumSamples, {0 0}, Sigma);
x = Z[,1]; y = Z[,2]; 
ID = colvec( repeat( T(1:NumSamples), 1, N) );
create SimCorr var {ID X Y};  /* define 4 numerical output vars */
append; close;
quit;

/*   2. Use PROC CORR to compute the sample correlation matrix for each of the B samples.
*/
proc corr data=SimCorr noprint
          outP=OutCorr(where=(_TYPE_="CORR") rename=(y=Corr));
   by ID;
   var x y;
run;

/*   3. Use the DATA step to extract only the off-diagonal elements
*/
data NormalCorr;      /* Retain R[1,2] cell only */
   set OutCorr;
   if mod(_N_,2)=1;   /* Keep odd-numbered rows; delete even rows */
   label Corr = "Sample Correlation (r)";
	drop _NAME_ x;
run;

/*
title "Distribution of r (rho = &rho)";
title2 "X ~ Bivariate MVN, r = corr(X)";
proc sgplot data=DistribCorr;
histogram Corr;
run;
*/

/**************************/
/* ALTERNATIVE SIMULATION */
/* generate sample correlation coefficients by using Wishart distribution */
proc iml;
call randseed(12345);
NumSamples = &NumSamples;
DF = &N - 1;              /* X ~ N obs from MVN(0, Sigma) */
Sigma = {1     &rho,      /* covariance for MVN samples */
         &rho   1  };
S = RandWishart(NumSamples, DF, Sigma); /* each row is 2x2 matrix */
Corr = j(NumSamples, 1);  /* allocate vector for correlation estimates */
do i = 1 to nrow(S);      /* convert to correlation; extract off-diagonal */
   Corr[i] = cov2corr( shape(S[i,], 2, 2) )[1,2];
end;

title "Distribution of r (rho = &rho)";
title2 "r ~ Wishart(Sigma, DF=19)";
call histogram(Corr) label="Correlation Estimate (r)";

create Wishart var {Corr}; append; close;
quit;

/* Combine the simulated correlations and construct a comparative histogram. See
   https://blogs.sas.com/content/iml/2016/03/09/comparative-panel-overlay-histograms-sas.html
   https://blogs.sas.com/content/iml/2014/08/25/bins-for-histograms.html
*/
data All;
set NormalCorr(keep=Corr) Wishart(in=w);
if w then Group = "Wishart";
else      Group = "Normal ";
run;

title "Distribution of r (rho = &rho)";
proc univariate data=All;
  class Group;
  histogram Corr / outhist=outhist endpoints=(0.28 to 1 by 0.03) odstitle=title;
  ods select histogram;
run;

/* If desired, run nonparametric tests that compare the two distributions */
proc npar1way data=All;
class Group;
var Corr;
run;