/* SAS program to accompany the article "Kernel regression in SAS" by Rick Wicklin, published 29AUG2018 on The DO Loop blog: https://blogs.sas.com/content/iml/2018/08/29/kernel-regression-in-sas.html This program shows how to implement a simple kernel regression smoother in SAS by using the SAS/IML matrix language. */ ods graphics / reset; title; data gas; label NOx = "Nitric oxide and nitrogen dioxide"; label E = "Air/Fuel Ratio"; input NOx E @@; datalines; 4.818 0.831 2.849 1.045 3.275 1.021 4.691 0.97 4.255 0.825 5.064 0.891 2.118 0.71 4.602 0.801 2.286 1.074 0.97 1.148 3.965 1 5.344 0.928 3.834 0.767 1.99 0.701 5.199 0.807 5.283 0.902 3.752 0.997 0.537 1.224 1.64 1.089 5.055 0.973 4.937 0.98 1.561 0.665 ; ods graphics / width=400px height=300px; title "Nitrogen Oxides versus Air/Fuel Ratio"; proc sgplot data=gas; scatter x=E y=NOx / markerattrs=(symbol=CircleFilled); xaxis grid; yaxis grid; run; /****************************************************************/ /* implement kernel regression (weighted polynomial regression) */ /****************************************************************/ proc iml; /* First, define the PolyRegEst and PolyRegScore modules https://blogs.sas.com/content/iml/2016/10/05/weighted-regression.html */ /* weighted polynomial regression for one regressor. Y, X, and w are col vectors */ start PolyRegEst(Y, X, w, deg); Yw = sqrt(w)#Y; /* mult rows of Y by sqrt(w) */ XDesign = j(nrow(X), deg+1, 1); do j = 1 to deg; /* design matrix for polynomial regression */ Xdesign[,j+1] = X##j; end; Xw = sqrt(w)#Xdesign; /* mult rows of X by sqrt(w) */ return solve(Xw`*Xw, Xw`*Yw); /* solve normal equation: (Xw`*Xw)b = Xw`*Yw */ finish; /* Score regression fit at x0 (which can be a column vector) */ start PolyRegScore(x, coef); p = nrow(coef); y = j(nrow(x), 1, coef[p]); /* initialize to coef[p] */ do j = p-1 to 1 by -1; y = y # x + coef[j]; end; return(y); finish; /* Interpret H as "proportion of range" so that the bandwidth is h=H*range(X) The weight of an observation at x when fitting the regression at x0 is given by the f(x; x0, h), which is the density function of N(x0, h) at x. This is equivalent to (1/h)*pdf("Normal", (X-x0)/h) for the standard N(0,1) distribution. */ start GetKernelWeights(X, x0, H); return pdf("Normal", X, x0, H*range(X)); /* bandwidth h = H*range(X) */ finish; /* Kernel regression module. (X,Y) are column vectors that contain the data. H is the proportion of the data range. It determines the bandwidth for the kernel regression as h = H*range(X) t is a vector of values at which to evaluate the fit. By defaul t=X. */ start KernelRegression(X, Y, H, _t=X, deg=1); t = colvec(_t); pred = j(nrow(t), 1, .); do i = 1 to nrow(t); /* compute weighted regression model estimates, degree deg */ W = GetKernelWeights(X, t[i], H); b = PolyRegEst(Y, X, W, deg); pred[i] = PolyRegScore(t[i], b); /* score model at t[i] */ end; return pred; finish; /* example of calling the kernel regression modules */ use gas; read all var {E NOx} into Z; close; /* read data */ call sort(Z); /* for graphing, sort by X */ X = Z[,1]; Y = Z[,2]; H = 0.1; /* choose bandwidth as proportion of range */ deg = 1; /* degree of regression model */ x0 = 1.0; /* evaluate at point x0 in range(X) */ W = GetKernelWeights(X, x0, h); /* get weights of data close to x0 */ b = PolyRegEst(Y, X, W, deg); /* weighted regression model (polynomial of degree d) */ y0 = PolyRegScore(x0, b); /* score model */ print x0 y0; H = 0.1; /* choose bandwidth as proportion of range */ deg = 1; /* degree of regression model */ XMin = min(X); XMax = max(X); N = 201; t = T( do(XMin, XMax, (XMax-XMin)/(N-1)) ); /* evenly spaced x values */ pred = KernelRegression(X, Y, H, t); create KerOut var {t pred}; append; close; /* create graph that overlays data and kernel smoother */ submit H deg; data All; set gas KerOut; run; title "Kernel Regression Fit"; title2 "h = &H*Range(X), deg = °"; proc sgplot data=All cycleattrs noautolegend; scatter x=E y=NOx / markerattrs=(symbol=CircleFilled); series x=t y=Pred / name="ker" legendlabel="Kernel Regression" lineattrs=(thickness=2); xaxis grid; yaxis grid; run; endsubmit; QUIT; /********************************************************/ /* special case of kernel regression (weighted average) */ /********************************************************/ proc iml; /* Nadaraya-Watson kernel estimator, which is a locally weighted average.*/ start NWKerReg(Y, X, x0, H); K = colvec( pdf("Normal", X, x0, H*range(X)) ); return( K`*Y / sum(K) ); finish; /* Nadaraya-Watson kernel smoother module. (X,Y) are column vectors that contain the data. H is the proportion of the data range. It determines the bandwidth for the kernel regression as h = H*range(X) t is a vector of values at which to evaluate the fit. By defaul t=X. */ start NWKernelSmoother(X, Y, H, _t=X); t = colvec(_t); pred = j(nrow(t), 1, .); do i = 1 to nrow(t); pred[i] = NWKerReg(Y, X, t[i], H); end; return pred; finish; /* example of calling the kernel regression modules */ use gas; read all var {E NOx} into Z; close; call sort(Z); /* for graphing, sort by X */ X = Z[,1]; Y = Z[,2]; H = 0.1; /* bandwidth as proportion of range */ x0 = 1.0; /* evaluate at point x0 in range(X) */ XMin = min(X); XMax = max(X); N = 201; t = T( do(XMin, XMax, (XMax-XMin)/(N-1)) ); pred = NWKernelSmoother(X, Y, H, t); create NWKerOut var {t pred}; append; close; /* create graph that overlays data and kernel smoother */ submit H deg; data NWAll; set gas NWKerOut; run; title "Nadaraya-Watson Kernel Estimator"; title2 "h = &H*Range(X)"; proc sgplot data=NWAll cycleattrs noautolegend; scatter x=E y=NOx / markerattrs=(symbol=CircleFilled); series x=t y=Pred / name="ker" legendlabel="N-W Smoother" lineattrs=(thickness=2); xaxis grid; yaxis grid; run; endsubmit; QUIT; /* compare degree-1 kernel regression with loess */ title "Kernel Regression Fit (deg=1) vs Loess"; proc sgplot data=All cycleattrs noautolegend; scatter x=E y=NOx / markerattrs=(symbol=CircleFilled); series x=t y=Pred / name="ker" legendlabel="Kernel Regression" lineattrs=(thickness=2); loess x=E y=NOx / name="L" legendlabel="Loess" lineattrs=(thickness=2) nomarkers; keylegend "ker" "L"; xaxis grid; yaxis grid; run; /* Notice that the two kernel smoothers primarily differ at the edges of the data range. */ data Comp; Merge KerOut(rename=(pred=RegPred)) NWKerOut(rename=(pred=NWPred)); by t; diff = RegPred - NWPred; run; /* difference between degreee-1 kernel regression and N-W estimate */ proc sgplot data=Comp; series x=t y=diff; refline 0 / axis=y; run;