/* Program to accompany the article "Lasagna plots in SAS: When spaghetti plots aren't sufficient" by Rick Wicklin, published 08JUN2016 http://blogs.sas.com/content/iml/2016/06/08/lasagna-plots-in-sas.html Average life expectancy data from World Bank http://data.worldbank.org/indicator/SP.DYN.LE00.IN downloaded 20May2016. */ /* Step 1: Download two CSV file The life expectancy data: http://blogs.sas.com/content/iml/files/2016/06/LE2.csv Country information: http://blogs.sas.com/content/iml/files/2016/06/LifeExpectancyCountries.csv /* /* Step 2: Import to SAS data sets */ PROC IMPORT OUT= WORK.LifeExpectancy DATAFILE= "C:\Users\frwick\Downloads\LifeExpectancy\LE2.csv" DBMS=CSV REPLACE; GETNAMES=YES; DATAROW=2; RUN; PROC IMPORT OUT= WORK.CountryCodes DATAFILE= "C:\Users\frwick\Downloads\LifeExpectancy\LifeExpectancyCountries.csv" DBMS=CSV REPLACE; GETNAMES=YES; DATAROW=2; RUN; /* Step 3: Prepare data for plotting. Add formats. Convert from Wide to Long form */ proc sort data=LifeExpectancy(drop=Indicator_Code Indicator_Name) out=LEsort; by Country_Code; run; proc format; value IncomeFmt 1='High OECD' 2='High nonOECD' 3='Upper Middle' 4='Lower Middle' 5='Low'; run; data LL2; merge LEsort CountryCodes(drop=SpecialNotes); by Country_Code; if IncomeGroup="High income: OECD" then Income=1; else if IncomeGroup="High income: nonOECD" then Income=2; else if IncomeGroup="Upper middle income" then Income=3; else if IncomeGroup="Lower middle income" then Income=4; else if IncomeGroup="Low income" then Income=5; else delete; format Income IncomeFmt.; run; proc sort data=LL2; by Income Country_Name; run; /* transpose from wide to long */ data LE; set LL2; array Yr[*] Y1960-Y2014; do i = 1 to dim(Yr); Year = 1960 + (i-1); Expected = Yr[i]; output; end; label Expected = "Life Expectancy at Birth (years)"; drop i Y1960-Y2015; run; ods graphics/reset; ods graphics / ANTIALIASMAX=13700 imagemap TIPMAX=11800; /* enable data tips */ /* conventional spaghetti plot is not very useful */ ods graphics / reset; *ods graphics / ANTIALIASMAX=13700 imagemap=ON TIPMAX=11800; /* enable data tips */ title "Life Expectancy at Birth"; title2 "Low-Income Countries"; proc sgplot data=LE; where income=5; /* extract the "low income" companies */ format Country_Name $10.; /* truncate country names */ series x=Year y=Expected / group=Country_name break curvelabel lineattrs=(pattern=solid) tip=(Country_Name Region Year Expected); run; /***********************************************/ /* Lasagna plots */ /***********************************************/ /* More readable: heat map of countries, colored by response variable */ /* 1. Unsorted list of countries */ /* use COLORMODEL=(color-list) to change colors */ ods graphics/ width=500px height=600px discretemax=10000; title "Life Expectancy in Low Income Countries"; proc sgplot data=LE; where Income=5; /* extract the "low income" companies */ format Country_Name $10.; /* truncate country names */ heatmap x=Year y=Country_Name/ colorresponse=Expected discretex colormodel=TwoColorRamp; yaxis display=(nolabel) labelattrs=(size=6pt) fitpolicy=thin reverse; xaxis display=(nolabel) labelattrs=(size=8pt) fitpolicy=thin; run; title; /* switch to IML */ ods graphics / width=500px height=650px; proc iml; varName = "Y1960":"Y2014"; use LL2 where (Income=4); /* read the "lower middle income" countries */ read all var varName into X[rowname=Country_Name]; read all var "Income"; close LL2; Names = putc(Country_Name, "$15."); /* truncate names */ palette = "CXFFFFFF" || palette("YLORRD", 4); /* use palette from colorbrewer.org */ /* 2. Order countries by name */ call heatmapcont(X) xvalues=1960:2014 yvalues=Names displayoutlines=0 colorramp=palette title="Life Expectancy in Lower-Middle Income Countries"; /* 3. order rows by average LE from 1960-2014 */ mean = X[,:]; call sortndx(idx, mean, 1, 1); /* by first column, descending */ Sort1 = X[idx,]; Names1 = Names[idx,]; call heatmapcont(Sort1) xvalues=1960:2014 yvalues=Names1 displayoutlines=0 colorramp=palette title="Life Expectancy Sorted by Average"; print (ncol(x)); /* 4. order each year to see how LE has changed */ Sort2 = X; do i = 1 to ncol(X); v = X[,i]; call sort(v, 1, 1); /* sort i_th column descending */ Sort2[,i] = v; end; call heatmapcont(Sort2) xvalues=1960:2014 displayoutlines=0 colorramp=palette title="Life Expectancy Sorted for each Year"; quit; /* 5. Box plots of life expectancy vs year */ title "Distribution of Life Expetancy by Year"; title2 "Lower-Middle Income Countries"; proc sgplot data=LE; where Income=4; /* extract the "low income" companies */ vbox Expected / category=Year; xaxis display=(nolabel) labelattrs=(size=8pt) fitpolicy=thin; run; title;