/* SAS Program by Rick Wicklin. http://blogs.sas.com/content/iml/2015/03/30/visualizing-airline-crashes/ Airline crash data 1993-Mar 2015 Downloaded from http://www.informationisbeautiful.net/visualizations/plane-truth/ Save data as a CSV file called "FlightRisk.csv" Then run the following program. */ PROC IMPORT OUT= WORK.CrashRaw /* CHANGE the next line to point to the location of the FlightRisk.csv file */ DATAFILE= "C:\Users\frwick\Documents\Blog\images\FlightRisk.csv" DBMS=CSV REPLACE; GETNAMES=YES; DATAROW=3; RUN; /* Do some data cleansing */ data Crash; label Phase="Flight Phase" Cause="Cause of Crash"; set CrashRaw(rename=(cause=LongCause meta=Cause)); Year = Year(Date); if Phase="ap" | substr(upcase(Phase),1,8)="APPROACH" | Phase="Landing" then Phase="landing"; if Phase="grounded" then Phase="standing"; if Phase="initial takeo" | Phase="take off" | Phase="Takeoff" | Phase="takeoff" |Phase="initial_climb" then Phase="take-off"; if Phase="en_route" then Phase="en route"; if Cause="human_error" then Cause="human error"; /* order by frequency */ if Phase="landing" then PhaseN=1; else if Phase="en route" then PhaseN=2; else if Phase="take-off" then PhaseN=3; else if Phase="standing" then PhaseN=4; else if Phase="unknown" then PhaseN=5; if Cause="criminal" then CauseN=1; else if Cause="unknown" then CauseN=2; else if Cause="mechanical" then CauseN=3; else if Cause="weather" then CauseN=4; else if Cause="human error" then CauseN=5; run; /* data are already sorted by date */ proc freq data=Crash; tables Year / out=FreqOut; run; /* create basic time series plot of crashes per year */ ods graphics / width=600 height=400; title "Commercial Airline Crashes by Year"; title2 "1993 - 2014"; proc sgplot data=FreqOut; where Year<2015; series x=Year y=Count / Markers; xaxis grid; yaxis grid min=0; run; /* Optional: In Significance magazine, McCandless shows series plot where size of marker is "average fatalities per crash" http://onlinelibrary.wiley.com/doi/10.1111/j.1740-9713.2015.00795.x/full#sign795-sec-0070 */ proc freq data=Crash noprint; tables Year / out=FreqOut2; weight fat; /* fatalities */ run; data FreqOut2; merge FreqOut(drop=Percent) FreqOut2(drop=Percent rename=(Count=Fatalities)); by year; AvgFat = round(Fatalities/Count); run; ods graphics / reset; title2 "Marker Size Indicates Average Fatailities per Crash"; proc sgplot data=FreqOut2 noautolegend; where Year<2015; series x=Year y=Count; bubble x=Year y=Count size=AvgFat / BRadiusMax=15px datalabel=AvgFat datalabelpos=center; xaxis grid; yaxis grid min=0; run; /* Use a mosaic plot instead of flow diagram */ proc sort data=Crash; by PhaseN CauseN; run; ods graphics / width=600px height=600px; proc freq data=Crash order=data; tables Cause*Phase / nocum norow nocol plots=(mosaic(square)); run; /* If you want to augment the plot (change titles, add percentages) you can create an editable graphic. See http://blogs.sas.com/content/iml/2012/08/06/change-a-plot-title-by-using-the-ods-graphics-editor/ */ ods html sge=on; proc freq data=Crash order=data; tables Cause*Phase / nocum plots=(mosaic(square)) out=Out; run; ods html sge=off;