options extendobscounter=no; /* In Jan 2015, I switched from using the individual-year Excel files, to using the single (all years) csv file... http://www.fueleconomy.gov/feg/download.shtml http://www.fueleconomy.gov/feg/epadata/vehicles.csv.zip Ran the following code in DMS first... PROC IMPORT OUT=foo DATAFILE="vehicles.csv" DBMS=CSV REPLACE; GETNAMES=YES; DATAROW=2; RUN; And then did a Run->Recall_last_submit to get the actual code, which I copy-n-pasted here, and then modified slightly. */ data raw_data; infile 'vehicles.csv' delimiter = ',' MISSOVER DSD lrecl=32767 firstobs=2; informat barrels08 best32. ; informat barrelsA08 best32. ; informat charge120 best32. ; informat charge240 best32. ; informat city08 best32. ; informat city08U best32. ; informat cityA08 best32. ; informat cityA08U best32. ; informat cityCD best32. ; informat cityE best32. ; informat cityUF best32. ; informat co2 best32. ; informat co2A best32. ; informat co2TailpipeAGpm best32. ; informat co2TailpipeGpm best32. ; informat comb08 best32. ; informat comb08U best32. ; informat combA08 best32. ; informat combA08U best32. ; informat combE best32. ; informat combinedCD best32. ; informat combinedUF best32. ; informat cylinders best32. ; informat displ best32. ; informat drive $26. ; informat engId best32. ; informat eng_dscr $50. ; /* increased from 12 to 50 */ informat feScore best32. ; informat fuelCost08 best32. ; informat fuelCostA08 best32. ; informat fuelType $7. ; informat fuelType1 $16. ; informat ghgScore best32. ; informat ghgScoreA best32. ; informat highway08 best32. ; informat highway08U best32. ; informat highwayA08 best32. ; informat highwayA08U best32. ; informat highwayCD best32. ; informat highwayE best32. ; informat highwayUF best32. ; informat hlv best32. ; informat hpv best32. ; informat id best32. ; informat lv2 best32. ; informat lv4 best32. ; informat make $50. ; /* increased from 10 to 50 */ informat model $50. ; /* increased from 19 to 50 */ informat mpgData $1. ; informat phevBlended $5. ; informat pv2 best32. ; informat pv4 best32. ; informat range best32. ; informat rangeCity best32. ; informat rangeCityA best32. ; informat rangeHwy best32. ; informat rangeHwyA best32. ; informat trany $25. ; /* increased to 25 */ informat UCity best32. ; informat UCityA best32. ; informat UHighway best32. ; informat UHighwayA best32. ; informat VClass $50. ; /* increased to 50 */ informat year best32. ; informat youSaveSpend best32. ; informat guzzler $1. ; informat trans_dscr $11. ; informat tCharger $1. ; informat sCharger $1. ; informat atvType $20. ; /* lengthened to 20 */ informat fuelType2 $1. ; informat rangeA $1. ; informat evMotor $1. ; informat mfrCode $1. ; informat c240Dscr $1. ; informat charge240b best32. ; informat c240bDscr $1. ; informat createdOn anydtdtm40. ; informat modifiedOn anydtdtm40. ; informat startStop $1. ; informat phevCity best32. ; informat phevHwy best32. ; informat phevComb best32. ; format barrels08 best12. ; format barrelsA08 best12. ; format charge120 best12. ; format charge240 best12. ; format city08 best12. ; format city08U best12. ; format cityA08 best12. ; format cityA08U best12. ; format cityCD best12. ; format cityE best12. ; format cityUF best12. ; format co2 best12. ; format co2A best12. ; format co2TailpipeAGpm best12. ; format co2TailpipeGpm best12. ; format comb08 best12. ; format comb08U best12. ; format combA08 best12. ; format combA08U best12. ; format combE best12. ; format combinedCD best12. ; format combinedUF best12. ; format cylinders best12. ; format displ best12. ; format drive $26. ; format engId best12. ; format eng_dscr $50. ; format feScore best12. ; format fuelCost08 best12. ; format fuelCostA08 best12. ; format fuelType $7. ; format fuelType1 $16. ; format ghgScore best12. ; format ghgScoreA best12. ; format highway08 best12. ; format highway08U best12. ; format highwayA08 best12. ; format highwayA08U best12. ; format highwayCD best12. ; format highwayE best12. ; format highwayUF best12. ; format hlv best12. ; format hpv best12. ; format id best12. ; format lv2 best12. ; format lv4 best12. ; format make $50. ; format model $50. ; format mpgData $1. ; format phevBlended $5. ; format pv2 best12. ; format pv4 best12. ; format range best12. ; format rangeCity best12. ; format rangeCityA best12. ; format rangeHwy best12. ; format rangeHwyA best12. ; format trany $25. ; format UCity best12. ; format UCityA best12. ; format UHighway best12. ; format UHighwayA best12. ; format VClass $50. ; format year best12. ; format youSaveSpend best12. ; format guzzler $1. ; format trans_dscr $11. ; format tCharger $1. ; format sCharger $1. ; format atvType $20. ; format fuelType2 $1. ; format rangeA $1. ; format evMotor $1. ; format mfrCode $1. ; format c240Dscr $1. ; format charge240b best12. ; format c240bDscr $1. ; format createdOn datetime. ; format modifiedOn datetime. ; format startStop $1. ; format phevCity best12. ; format phevHwy best12. ; format phevComb best12. ; input barrels08 barrelsA08 charge120 charge240 city08 city08U cityA08 cityA08U cityCD cityE cityUF co2 co2A co2TailpipeAGpm co2TailpipeGpm comb08 comb08U combA08 combA08U combE combinedCD combinedUF cylinders displ drive $ engId eng_dscr $ feScore fuelCost08 fuelCostA08 fuelType $ fuelType1 $ ghgScore ghgScoreA highway08 highway08U highwayA08 highwayA08U highwayCD highwayE highwayUF hlv hpv id lv2 lv4 make $ model $ mpgData $ phevBlended $ pv2 pv4 range rangeCity rangeCityA rangeHwy rangeHwyA trany $ UCity UCityA UHighway UHighwayA VClass $ year youSaveSpend guzzler $ trans_dscr $ tCharger $ sCharger $ atvType $ fuelType2 $ rangeA $ evMotor $ mfrCode $ c240Dscr $ charge240b c240bDscr $ createdOn modifiedOn startStop $ phevCity phevHwy phevComb ; run; /* Variable definitions from http://www.fueleconomy.gov/feg/ws/index.shtml#vehicle -------------------- year - model year make - manufacturer (division) model - model name (carline) city08U - unrounded city MPG for fuelType1 highway08U - unrounded highway MPG for fuelType1 comb08U - unrounded combined MPG for fuelType1 cylinders - engine cylinders displ - engine displacement in liters drive - drive axle type trany - transmission eng_dscr - engine descriptor co2 - tailpipe CO2 in grams/mile for fuelType1 */ data mpg_data (keep = year make model vclass city08u highway08u comb08u cylinders displ drive trany eng_dscr co2 ); set raw_data; /* if make='Hummer' then model='Hummer '||trim(left(model)); */ /* If the unrounded numbers aren't available, use the rounded ones */ if city08u=. or city08u=0 then city08u=city08; if highway08U=. or highway08U=0 then highway08u=highway08; if comb08U=. or comb08U=0 then comb08u=comb08; eng_dscr=trim(left(trim(left(eng_dscr))||' '||trim(left(atvtype)))); run; /* Re-name a few variables, so I don't have to re-write my old code */ proc datasets; modify mpg_data; rename city08u = MPG_City; rename highway08u = MPG_Hwy; rename comb08u = MPG_Cmb; rename displ = Engine_liters; rename drive = Drive_sys; rename trany = Transmission; rename eng_dscr = Extra_info; rename vclass = Class; run; /* Order the variables in a logical easy-to-read manner, and make the labels & formats the way I want them */ data mpg_data; label Year='Year'; label Make='Make'; label Model='Model'; label Cylinders='Cylinders'; label Engine_liters='Engine Liters'; label Drive_sys='Drive System'; label Transmission='Transmission'; label Class='Class'; label MPG_City='MPG City'; label MPG_Hwy='MPG Highway'; label MPG_Cmb='MPG Combined'; label Extra_info='Extra Info'; label co2='CO2 gram/mile'; format MPG_City MPG_Hwy MPG_Cmb engine_liters comma8.1; set mpg_data; if co2=-1 then co2=.; if (MPG_City^=.) and (MPG_Hwy^=.) and (MPG_Cmb^=.) and (Cylinders^=.) and (Engine_liters^=.) /* this eliminates the electric vehicles */ then output; run; /* Sort it so the dataset is in a logical easy-to-read arrangement */ proc sort data=mpg_data out=mpg_data; by descending Year Make Model Drive_sys Transmission Engine_liters; run; /* Copy it to the permanent library */ libname mylib '.'; data mylib.mpg_data; set mpg_data; run;