/* SAS/IML program to draw prediction ellipses under the assumption of bivariate normality. Author: Rick Wicklin Blog: "Computing prediction ellipses from a covariance matrix" http://blogs.sas.com/content/iml/2014/07/22/prediction-ellipses-from-covariance/ */ proc iml; /* Compute prediction ellipses centered at m from the 2x2 covariance matrix S. The return matrix is a matrix with three columns. Col 1: The X coordinate of the ellipse for the confidence level. Col 2: The Y coordinate of the ellipse for the confidence level. Col 3: The confidence level. Use this column to extract a single ellipse. Input parameters: m a 1x2 vector that specifies the center of the ellipse. This can be the sample mean or median. S a 2x2 symmetric positive definite matrix. This can be the sample covariance matrix or a robust estimate of the covariance. n The number of nonmissing observations in the data. ConfLevel a 1 x k vector of (1-alpha) confidence levels that determine the ellipses. Each entry is 0 < ConfLevel[i] < 1. For example, 0.95 produces the 95% confidence ellipse for alpha=0.05. nPts the number of points used to draw the ellipse. The default is 0.95. */ start PredEllipseFromCov(m, S, n, ConfLevel=0.95, nPts=128); /* parametrize standard ellipse in coords of eigenvectors */ call eigen(lambda, evec, S); /* eigenvectors are columns of evec */ t = 2*constant("Pi") * (0:nPts-1) / nPts; xy = sqrt(lambda[1])*cos(t) // sqrt(lambda[2])*sin(t); stdEllipse = T( evec * xy ); /* transpose for convenience */ /* scale the ellipse; see documentation for PROC CORR */ c = 2 * (n-1)/n * (n+1)/(n-2); /* adjustment for finite sample size */ p = rowvec(ConfLevel); /* row vector of confidence levels */ F = sqrt(c * quantile("F", p, 2, n-2)); /* p = 1-alpha */ ellipse = j(ncol(p)*nPts, 3); /* 3 cols: x y p */ startIdx = 1; /* starting index for next ellipse */ do i = 1 to ncol(p); /* scale and translate */ idx = startIdx:startIdx+nPts-1; ellipse[idx, 1:2] = F[i] # stdEllipse + m; ellipse[idx, 3] = p[i]; /* use confidence level as ID */ startIdx = startIdx + nPts; end; return( ellipse ); finish; store module=PredEllipseFromCov; quit; /***************************************************/ /* Example 1: Compare a classical and robust prediction ellipse See SAS/IML documentation http://support.sas.com/documentation/cdl/en/imlug/66845/HTML/default/viewer.htm#imlug_robustregexpls_sect011.htm /***************************************************/ proc iml; load module=(PredEllipseFromCov); /* Stackloss data example from SAS/IML doc */ /* classical mean and covariance for stackloss data */ /* Rate AcidConcentration */ N = 21; mean = {60.428571 86.285714}; cov = {84.057143 24.571429, 24.571429 28.714286}; classicalEllipse = PredEllipseFromCov(mean, cov, N, 0.9); /* robust estimates of location and scatter for stackloss data */ /* Rate AcidConcentration */ N = 17; robMean = {56.7 85.5}; robCov = {23.5 16.1, 16.1 32.4}; robEllipse = PredEllipseFromCov(robMean, robCov, N, 0.9); E = classicalEllipse || robEllipse[,1:2]; create Ellipse from E[c={"classX" "classY" "ConfLevel" "robX" "robY"}]; append from E; close Ellipse; quit; data Stackloss; input Rate Temperature AcidCon @@; datalines; 80 27 89 80 27 88 75 25 90 62 24 87 62 22 87 62 23 87 62 24 93 62 24 93 58 23 87 58 18 80 58 18 89 58 17 88 58 18 82 58 19 93 50 18 89 50 18 86 50 19 72 50 19 79 50 20 80 56 20 82 70 20 91 ; data All; set Stackloss Ellipse; run; title "Classical and Robust Prediction Ellipses"; proc sgplot data=All; scatter x=Rate y=AcidCon / transparency=0.5 markerattrs=(symbol=CircleFilled size=12); polygon x=classX y=classY id=ConfLevel / lineattrs=GraphData1 name="classical" legendlabel="Classical 90% Ellipse"; polygon x=robX y=robY id=ConfLevel / lineattrs=GraphData2 name="robust" legendlabel="Robust 90% Ellipse"; xaxis grid; yaxis grid; keylegend "classical" "robust"; run; /***************************************************/ /* Example 2: Overlay 95% prediction ellipse for each species in the sashelp.iris data. /***************************************************/ proc iml; load module=(PredEllipseFromCov); /* iris data */ use sashelp.iris; read all var {PetalLength PetalWidth} into X; read all var {Species}; close sashelp.iris; /* draw 95% prediction ellipses for each species in the iris data by using the classical mean and covariance */ E = j(128, 7, 0.95); u = unique(Species); do i = 1 to ncol(u); idx = loc(Species=u[i]); mean = mean( X[idx,] ); cov = cov( X[idx,] ); PE = PredEllipseFromCov(mean, cov, ncol(idx)); E[,2*i:2*i+1] = PE[,1:2]; end; varNames = {"ConfLevel" "L_Set" "W_Set" "L_Virg" "W_Virg" "L_Vers" "W_Vers"}; create Ellipse from E[c=varNames]; append from E; close Ellipse; quit; data All; set sashelp.iris Ellipse; run; title "95% Prediction Ellipses for Each Group"; proc sgplot data=All; scatter x=PetalLength y=PetalWidth / jitter group=Species name="scatter" nomissinggroup transparency=0.5 markerattrs=(symbol=CircleFilled size=12); polygon x=L_Set y=W_Set id=ConfLevel / lineattrs=GraphData1; polygon x=L_Virg y=W_Virg id=ConfLevel / lineattrs=GraphData2; polygon x=L_Vers y=W_Vers id=ConfLevel / lineattrs=GraphData3; keylegend "scatter" / title="Species:"; xaxis grid label="Petal Length (mm)"; yaxis grid label="Petal Width (mm)"; run; /***************************************************/ /* Example 3: Simulate bivariate normal data. /***************************************************/ proc iml; load module=(PredEllipseFromCov); /* population parameters */ mean = {0 0}; cov = {4 3, 3 9}; /* generate random sample of size n from MVN distribution */ n = 100; call randseed(1); X = randNormal(n, mean, cov); create Pts from X[c={x y}]; append from X; close Pts; /* compute prediction ellipses from n and estimates of mean & cov */ mCenter = mean(X); mCov = cov(X); p = {0.5 0.75 0.9}; /* confidence levels */ ellipse = PredEllipseFromCov(mCenter, mCov, n, p); create Ellipse from ellipse[c={"ex" "ey" "ConfLevel" }]; append from ellipse; close Ellipse; quit; data All; set Pts Ellipse; run; title "Prediction Ellipses: Computed by using SAS/IML"; proc sgplot data=All; format ConfLevel percent6.; scatter x=x y=y / transparency=0.6 markerattrs=(symbol=CircleFilled); /* to verify computation: use ELLIPSE statement */ ellipse x=x y=y / alpha=0.5; ellipse x=x y=y / alpha=0.25; ellipse x=x y=y / alpha=0.1; /* overlay SAS/IML computation */ polygon x=ex y=ey id=ConfLevel / group=ConfLevel; xaxis grid; yaxis grid; run;