/* Program to accompany the article "How to create and interpret a weighted histogram" by Rick Wicklin. Published 04OCT2017 on The DO Loop blog https://blogs.sas.com/content/iml/2017/10/04/create-interpret-weighted-histogram.html This program creates a weighted data set from the SAS documentation http://go.documentation.sas.com/?docsetId=proc&docsetTarget=n1xkqt7u5ylr2kn11174pq11od76.htm&docsetVersion=9.4&locale=en#n1934j4pmu3328n1hcczxemu170p You can use PROC SGPLOT to compare the weighted and unweighted histograms. The weighted version estimates the true parameter value better: (1) The weighted mean is more accurate (2) The weighted variance is smaller than the unweighted variance The program also contains a DATA step and SAS/IML program that computes the heights of the bars in a weighted histogram for these data. */ ods graphics/reset; data Size; input Distance ObjectSize @@; Wt = 1 / distance; /* precision */ x = ObjectSize; label x = "Estimate of Size"; datalines; 1.5 30 1.5 20 1.5 30 1.5 25 3 43 3 33 3 25 3 30 4.5 25 4.5 36 4.5 48 4.5 33 6 43 6 36 6 23 6 48 7.5 30 7.5 25 7.5 50 7.5 38 ; /* Note that SCALE=COUNT is MEANINGLESS. You can multiply Wt by any positive constant and the histogram will look the same. Use SCALE=PERCENT or SCALE=PROPORTION */ title "Unweighted Histogram of Size Estimates"; proc sgplot data=Size noautolegend; histogram x / scale=proportion datalabel binwidth=5 ; yaxis grid; refline 30 / axis=x lineattrs=(pattern=dash); run; proc means data=Size mean stddev var sum; var x; run; title "Weighted Histogram of Size Estimate"; proc sgplot data=size noautolegend; histogram x / weight=Wt scale=proportion datalabel binwidth=5 ; fringe x / lineattrs=(thickness=2 color=black) transparency=0.6; yaxis grid offsetmin=0.05 label="Weighted Proportion"; refline 30 / axis=x lineattrs=(pattern=dash); run; proc means data=Size mean stddev var sum; weight wt; var x; run; /* DATA step to compute the proportion of (standardized) weights in each bin, which is the height of each bar. */ data BinHeights(keep=height:); array EndPt[8] _temporary_; binStart = 17.5; binWidth = 5; /* bin anchor and width */ do i = 1 to dim(EndPt); /* define endpoints of bins */ EndPt[i] = binStart + (i-1)*binWidth; end; array height[7]; /* height of each bin */ set Size end=eof; /* for each observation ... */ sumWt + Wt; /* compute sum of weights */ Found=0; do i = 1 to dim(EndPt)-1 while (^Found); /* find bin for each obs */ Found = (EndPt[i] <= x < EndPt[i+1]); if Found then height[i] + Wt; /* increment bin height by weight */ end; if eof then do; do i = 1 to dim(height); /* scale heights by sum of weights */ height[i] = height[i] / sumWt; end; output; end; run; proc print noobs data=BinHeights; run; /* SAS/IML program to compute the proportion of (standardized) weights in each bin */ proc iml; use Size; read all var {x Wt}; close; sumWt = sum(Wt); EndPt = do(17.5, 52.5, 5); /* define endpoints of bins */ idx = bin(x, EndPt); /* idx[i] contains bin for x[i] */ heights = j(1, ncol(EndPt)-1, 0); /* initialize heights to 0 */ do i = 1 to ncol(heights); jdx = loc(idx = i); /* find obs numbers in i_th bin */ if ncol(jdx)>0 then heights[i] = sum( Wt[jdx] ) / sumWt; /* sum of standardized weights */ end; print heights[format=BEST6.]; /**************************************/