/* Program from "Designing a quantile bin plot" by Rick Wicklin http://blogs.sas.com/content/iml/2014/09/24/quantile-bin-plot/ */ ods graphics/reset; proc iml; use sashelp.bweight; /* read birthweight data */ read all var {momwtgain weight} into Z; close; start bin2D(u, cutX, cutY); /* define 2-D bin function */ bX = bin(u[,1], cutX); /* bins in X direction: 1,2,...,kx */ bY = bin(u[,2], cutY); /* bins in Y direction: 1,2,...,ky */ bin = bX + (ncol(cutX)-1)*(bY-1); /* assign bins 1,2,...,kx*ky */ return(bin); finish; k = 10; /* divide vars into deciles */ prob = (1:k-1)/k; /* vector 0.1, 0.2,..., 0.9 */ call qntl(qX, Z[,1], prob); /* empirical quantiles for X */ call qntl(qY, Z[,2], prob); /* ...and Y */ /* divide 2-D data into the k*k bins specified by the quantiles */ cutX = .M || qX` || .P; cutY = .M || qY` || .P; b = bin2D(Z, cutX, cutY); /* bin numbers 1-k^2*/ u = unique(b); /* which bins are occupied? */ means = j(ncol(u), 2, .); /* allocate matrix for means */ count = j(ncol(u), 1, .); /* allocate matrix for counts */ do i = 1 to ncol(u); /* for each bin: */ idx = loc(b = u[i]); /* find obs in the i_th bin */ count[i] = ncol(idx); /* how many obs in bin? */ means[i,] = mean( Z[idx,] ); /* mean position within bin */ end; cutX = min(Z[,1]) || qX` || max(Z[,1]); cutY = min(Z[,2]) || qY` || max(Z[,2]); gX = rowcat(char(cutX)+" "); gY = rowcat(char(cutY)+" "); title "Scatter Plot of Mean Values in Each Bin"; refX = "refline " + gX + " / axis=x;"; refY = "refline " + gY + " / axis=y;"; call scatter(means[,1], means[,2]) other=(refX + refY) label={"Mean Weight Gain for Mother (kg)" "Mean Weight of Child (g)"}; /* for creating the bubble plot and heat map overlay, save some information in macro variables */ call symput("XBound", gX); call symput("YBound", gY); call symput("RefX", RefX); call symput("RefY", RefY); c = colvec(Count) || means; create Counts from c[colname={"Count" "MeanX" "MeanY"}]; append from c; close; quit; /********************************************************/ /* Scatter plot of means overlaid on heat map of counts */ /********************************************************/ data Counts; set Counts; label MeanX = "Mean Weight Gain for Mother (kg)" MeanY="Mean Weight of Child (g)"; run; proc template; define statgraph QuantileBinPlot; begingraph; entrytitle "Scatter Plot of Mean Values in Each Bin"; entrytitle "Overlaid on Heat Map of Bin Count"; layout overlay; heatmapparm x=meanX y=meanY colorresponse=Count / xvalues=leftpoints xendlabels=true yvalues=leftpoints yendlabels=true xboundaries=(&xbound) yboundaries=(&ybound) name="heatmap" primary=true xbinaxis=false ybinaxis=false; scatterplot x=meanX y=meanY; continuouslegend "heatmap" / title="Count"; endlayout; endgraph; end; run; proc sgrender data=Counts template=QuantileBinPlot; run; /********************************************************/ /* Bubble plot of means, where size of bubble indicates the number of counts in each bin */ /********************************************************/ /* Trick: Add fake obs with count=0 so scale is [0,max] */ data Fake; MeanX=.; MeanY=.; Count=0; data Counts; set Counts Fake; run; proc sgplot data=Counts; bubble x=MeanX y=MeanY size=Count / /* colorresponse=Count colormodel=ThreeColorRamp*/ /* SAS 9.4m2 */ datalabel=Count datalabelpos=center datalabelattrs=(size=8 color=black) BradiusMin=0 BradiusMax=10pt transparency=0.6 name="bubble" legendlabel="Count"; &RefX; &RefY; keylegend "bubble" / location=inside position=bottomright; run;