/* Program to accompany the article "Create spaghetti plots in SAS" by Rick Wicklin, published 02JUN2016 http://blogs.sas.com/content/iml/2016/06/02/create-spaghetti-plots-in-sas.html ? Average life expectancy data from World Bank http://data.worldbank.org/indicator/SP.DYN.LE00.IN downloaded 20May2016. */ /* Step 1: Download two CSV file The life expectancy data: http://blogs.sas.com/content/iml/files/2016/06/LE2.csv Country information: http://blogs.sas.com/content/iml/files/2016/06/LifeExpectancyCountries.csv /* /* Step 2: Import to SAS data sets */ PROC IMPORT OUT= WORK.LifeExpectancy DATAFILE= "C:\Users\frwick\Downloads\LifeExpectancy\LE2.csv" DBMS=CSV REPLACE; GETNAMES=YES; DATAROW=2; RUN; PROC IMPORT OUT= WORK.CountryCodes DATAFILE= "C:\Users\frwick\Downloads\LifeExpectancy\LifeExpectancyCountries.csv" DBMS=CSV REPLACE; GETNAMES=YES; DATAROW=2; RUN; /* Step 3: Prepare data for plotting. Add formats. Convert from Wide to Long form */ proc sort data=LifeExpectancy(drop=Indicator_Code Indicator_Name) out=LEsort; by Country_Code; run; proc format; value IncomeFmt 1='High OECD' 2='High nonOECD' 3='Upper Middle' 4='Lower Middle' 5='Low'; run; data LL2; merge LEsort CountryCodes(drop=SpecialNotes); by Country_Code; if IncomeGroup="High income: OECD" then Income=1; else if IncomeGroup="High income: nonOECD" then Income=2; else if IncomeGroup="Upper middle income" then Income=3; else if IncomeGroup="Lower middle income" then Income=4; else if IncomeGroup="Low income" then Income=5; else delete; format Income IncomeFmt.; run; proc sort data=LL2; by Income Country_Code; run; /* transpose from wide to long */ data LE; set LL2; array Yr[*] Y1960-Y2014; do i = 1 to dim(Yr); Year = 1960 + (i-1); Expected = Yr[i]; output; end; label Expected = "Life Expectancy at Birth (years)"; drop i Y1960-Y2015; run; /* Step 4: Create various spaghetti plots */ ods graphics/reset; ods graphics / ANTIALIASMAX=13700 imagemap TIPMAX=11800; /* enable data tips */ /* Line charts versus spaghetti charts */ title "Life Expectancy at Birth"; proc sgplot data=LE; where country_name in ("China" "Chad" "Croatia" "Israel" "Kosovo" "Kuwait" "India" "Peru" "Sudan" "United States"); series x=Year y=Expected / group=Country_name break curvelabel; run; /* Spaghetti plot in SAS, colored by World Bank wealth category */ title "Life Expectancy at Birth for 207 Countries"; proc sgplot data=LE; series x=Year y=Expected / group=Country_Name grouplc=Income break transparency=0.7 lineattrs=(pattern=solid) tip=(Country_Name Income Region); xaxis display=(nolabel); keylegend / type=linecolor title=""; run; /* Paneled spaghetti plots, colored by geography */ proc sgpanel data=LE; panelby Income / columns=3 onepanel sparse; series x=Year y=Expected / group=Country_Name break transparency=0.5 grouplc=region lineattrs=(pattern=solid) tip=(Country_Name Region); colaxis grid display=(nolabel) offsetmin=0.05 fitpolicy=stagger; keylegend / type=linecolor title=""; run;