%let name=boston_marathon_times; /* Set your current-working-directory (to read/write files), if you need to ... %let rc=%sysfunc(dlgcdir('c:\someplace\public_html')); */ filename odsout '.'; /* Downloaded data from here: https://github.com/llimllib/bostonmarathon Creating a graph like the one here: http://www.runnersworld.com/boston-marathon/dozens-suspected-of-cheating-to-enter-boston-marathon */ libname race_lib 'D:\Public\Boston_Marathon\'; /* libname race_lib '\\l7a695.na.sas.com\Public\Boston_Marathon\'; */ data plot_data; set race_lib.boston_marathon (where=(bib_num^=.)); /* by rounding, the first group is a little smaller ... which is good */ obs_200=round(_n_/200); run; /* Assign each runner to a sequential group of 'n' (based on their bib # ... which should also represent their qualifying run time, and get average speed, so you can detect 'outliers'. */ proc sql noprint; create table plot_data as select unique *, avg(official) as time_avg from plot_data group by year, obs_200 order by year descending, bib_num; quit; run; data plot_data; set plot_data; if (abs(official/time_avg-1))>.2 then outlier=official; length my_drill $300; my_drill='https://www.google.com/search?&q=boston marathon '||trim(left(year))||' '||trim(left(name)); run; ODS LISTING CLOSE; ODS HTML path=odsout body="&name..htm" (title="Boston Marathon Times") style=htmlblue; ods graphics / noscale /* if you don't use this option, the text will be resized */ imagemap tipmax=50000 imagefmt=png imagename="&name._#byval(year)" width=800px height=600px noborder; options nobyline; title1 h=16pt c=gray33 "Boston Marathon #byval(year)"; title2 h=10pt ls=0.5 c=red "Red outliers " c=gray77 "were 20% slower or faster than their 'pack'"; footnote c=gray77 h=9pt "(only using data with numeric bib numbers - includes runners & perhaps some wheelchair racers)"; ods html anchor="#byval(year)"; proc sgplot data=plot_data noautolegend noborder; by year notsorted; format official time_avg mmss.; scatter y=official x=bib_num / tip=none markerattrs=(symbol=circle color=cx0000ff size=5pt) transparency=.85; scatter y=outlier x=bib_num / tip=(name country bib_num official) url=my_drill markerattrs=(symbol=x color=cxff0000 size=4pt); series y=time_avg x=bib_num / tip=none y2axis lineattrs=(color=red); yaxis display=(noline noticks) labelattrs=(color=gray33 size=9pt) labelpos=top label='Race Time' values=('00:00:00't to '00:09:00't by '00:01:00't) valueattrs=(color=gray33 size=9pt) grid gridattrs=(color=graydd) offsetmin=0 offsetmax=0; y2axis display=(noline noticks) labelattrs=(color=gray33 size=9pt) labelpos=top label='Race Time' values=('00:00:00't to '00:09:00't by '00:01:00't) valueattrs=(color=gray33 size=9pt) grid gridattrs=(color=graydd) offsetmin=0 offsetmax=0; xaxis display=(noline noticks) labelattrs=(color=gray33 size=9pt) label='Bib Number (lower bib number = faster qualifying time)' values=(0 to 35000 by 7000) valueattrs=(color=gray33 size=9pt) grid gridattrs=(color=graydd) offsetmin=0 offsetmax=0; run; quit; ODS HTML CLOSE; ODS LISTING;