/* Program to accompany "Wealth and winning in NC high school athletics" by Rick Wicklin, published July 26, 2015, The DO Loop blog: http://blogs.sas.com/content/iml/2015/07/26/wealth-and-winning.html */ /* read data from csv file */ PROC IMPORT OUT= WORK.NCHighSchools2013 DATAFILE= "C:\Users\frwick\Documents\Blog\freereduced13-14.csv" DBMS=CSV REPLACE; GETNAMES=YES; DATAROW=2; GUESSINGROWS=500; RUN; /* create histogram of distribution of free/reduced meal ate */ proc univariate data=NCHighSchools2013; label Needy = "Proportion on Free or Reduced Lunch"; var Needy; histogram Needy / endpoints=(0 to 1 by 0.1) odstitle="Distribution of Free and Reduced Rates (2013)"; run; /* summarize into five categories in article */ proc format; value FRRate 0.0 -< 0.2 = '[0, 20%)' 0.2 -< 0.4 = '[20%, 40%)' 0.4 -< 0.6 = '[40%, 60%)' 0.6 -< 0.8 = '[60%, 80%)' 0.8 - 1.0 = '[80%, 100%]'; run; proc freq data=NCHighSchools2013; format Needy FRRATE.; tables Needy / nocum; run; /* create simple summary tables */ data Titles; label FRRate= "Free/Reduced Rate" PctWon = "Percent Titles Won" PctInNC = "Percent in NC"; input FRRate NumTitles PctInNC; Total = 471; PctWon = NumTitles / Total; Deviation = (PctWon - PctInNC) / PctInNC; datalines; 0.1 168 0.05 0.3 157 0.21 0.5 85 0.46 0.7 56 0.20 0.9 5 0.08 ; proc print data=Titles noobs label; format FRRate FRRATE.; format PctInNC PctWon Deviation PERCENTN9.2; var FRRate PctWon; run; proc print data=Titles noobs label; format FRRate FRRATE.; format PctInNC PctWon Deviation PERCENTN9.2; var FRRate PctWon PctInNC Deviation; run; proc freq data=Titles; format FRRate FRRATE.; weight NumTitles; tables FRRate / nocum chisq expected deviation testp=(0.05 0.21 0.46 0.20 0.08); run;