/* Program to accompany the article "7 ways to view correlation" by Rick Wicklin published on The DO Loop blog, 05SEP2017 http://blogs.sas.com/content/iml/2017/09/05/7-ways-to-view-correlation.html Based on Rodgers and Nicewander (1988), "Thirteen ways to look at the correlation coefficient", TAS http://www.jstor.org/stable/2685263?seq=1#page_scan_tab_contents */ /* 1. Graphically */ ods graphics / width=400px height=300px; title "Weight vs. Height for 19 Students"; proc sgplot data=sashelp.class; scatter x=Height y=Weight / markerattrs=(symbol=CircleFilled size=9); run; /* Compute the correlation by using PROC CORR */ proc corr data=sashelp.class plots=scatter; var Height Weight; ods select PearsonCorr; run; /* implement the computations */ proc iml; use sashelp.class; read all var "Height" into x; read all var "Weight" into y; close; /* 2. Sum of crossproducts */ xC = x - mean(x); /* center data */ yC = y - mean(y); ProdMoment = sum( xC#yC ) / sqrt( ssq(xC)*ssq(yC) ); print ProdMoment; /* 3. Inner product of standardized vectors */ u = (x - mean(x)) / norm( x - mean(x) ); v = (y - mean(y)) / norm( y - mean(y) ); InnerProd = u` * v; print InnerProd; /* 4. The angle between two vectors */ theta = arcos( u` * v ); print theta[L="angle(radians)"] (cos(theta))[L="cos(theta)"]; /* 5. The standardized covariance */ Z = standard(x) || standard(y); Cov = cov(Z); print Cov; /* 6. The slope of the regression line between two standardized variables */ xStd = standard(x); yStd = standard(y); b = inv(xStd`*xStd)*xStd`*yStd; print b; call symputx("stdSlope", round(b,1e-4)); /* 7. Geometric mean of regression slopes */ Z = j(nrow(X), 1, 1) || x; /* add intercept term */ b_x = inv(Z`*Z)*Z`*y; /* regress y onto x */ Z = j(nrow(Y), 1, 1) || y; b_y = inv(Z`*Z)*Z`*x; /* regress x onto y */ /* slopes are the second components */ geomMean = geomean( b_x[2] // b_y[2] ); print geomMean; create CorrViz var {"xStd" "yStd"}; append; close; quit; /* 6. The slope of the regression line between two standardized variables */ title "Slope of Regression Line between Standardized Variables"; title2 "Slope = &stdSlope"; proc sgplot data=CorrViz noautolegend; scatter x=xStd y=yStd / markerattrs=(symbol=CircleFilled size=9); lineparm x=0 y=0 slope=&stdSlope; xaxis grid; yaxis grid; run;