/* Data downloaded from: https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide https://covid.ourworldindata.org/data/owid-covid-data.csv */ /* Download the datasets in csv format from the links above and specify the path to csv on the filename statement */ filename owid "owid-covid-data.csv"; filename ecdc "ecdc.csv"; proc import datafile=owid out=work.owid dbms=csv replace; getnames=yes; guessingrows=all; run; proc import datafile=ecdc out=work.ecdc (rename=(countryterritoryCode=iso_code) keep=countryterritoryCode continentExp) dbms=csv replace; getnames=yes; guessingrows=all; run; /*%let country = 'FRA','ITA','USA','GBR','KOR','RUS', 'NZL';*/ %let country='USA','GBR','NZL'; proc sort data=work.owid; by iso_code; run; proc sort data=work.ecdc; by iso_code; run; data work.owid; merge work.owid work.ecdc; by iso_code; run; /* grab the Days since the 1st total confirmed case and 1st test */ data work.owid; set work.owid; where total_cases ge 1 and total_tests ge 1; run; proc sql noprint; select max(total_tests) into :maxtests from work.owid; select max(Total_cases) into :maxcases from work.owid; select max(date) into: maxdate from work.owid; select min(date) into: mindate from work.owid; quit; /* add some offsets to x-axis values to avoid curvelabel truncation */ %let adjcases=%eval(&maxcases+1000000); /* verify the macro variables */ data _null_; format mindate maxdate date9.; maxtests=&maxtests; maxcases=&maxcases; maxdate=&maxdate; mindate=&mindate; totaldays=&maxdate-&mindate; put maxtests=maxcases=maxdate=mindate=totaldays=; run; /* prepare data for reflines */ %let slopes=1 2 5 10 20 50 100 200; %let initialSubjects=5; %macro create_refline_data; %local i next_slope; %let i=1; %do %while (%scan(&slopes, &i) ne); %let next_slope = %scan(&slopes, &i); y&next_slope=(&next_slope * (Total_cases)); /*if y&next_slope > &maxtests then y&next_slope=.;*/ %let i = %eval(&i + 1); %end; %mend; /* macro program to plot reference lines using multiple series */ %macro drawrefline; %let labels = Tests=Cases 2x 5x 10x 20x 50x 100x 200x; %local i next_slope; %let i=1; %do %while (%scan(&slopes, &i) ne); %let next_slope = %scan(&slopes, &i); %let next_label = %scan(&labels, &i); series x=cases y=y&next_slope/ lineattrs=(color=lightgray) curvelabel="&next_label" curvelabelpos=max curvelabelloc=outside transparency=0.3 curvelabelattrs=(color=gray); %let i = %eval(&i + 1); %end; %mend; data _temp; format tempdate date9.; retain Total_cases; do tempdate=&mindate to &maxdate; do Total_cases=1 to &adjcases by 100000; output; end; end; run; data _temp (rename=(Total_cases=cases)); set _temp; %create_refline_data; run; /* join refline data */ proc sql; Create table covid as select * from work.owid as x left join work._temp as y On x.date=y.tempdate; quit; proc sort data=covid; by date location; run; data covid; set covid; by date location; if first.location=0 then do; total_tests=.; end; run; /* prepre attribute map dataset for the scatter plot */ data work.attrmap; id="myid";value="Oceania";markercolor="lightseagreen";markercontrastcolor="lightseagreen";output; id="myid";value="America";markercolor="orange";markercontrastcolor="orange";output; id="myid";value="Europe";markercolor="lightred";markercontrastcolor="lightred";output; id="myid";value="Africa";markercolor="dodgerblue";markercontrastcolor="dodgerblue";output; id="myid";value="Asia";markercolor="charcoal";markercontrastcolor="charcoal";output; run; proc sort data=covid; by date; run; ods listing image_dpi=300; ods graphics / labelmax=10000 ; title j=l "Total COVID-19 Tests Conducted against Confirmed Cases"; footnote1 j=l "Data Source: https://github.com/owid/covid-19-data/tree/master/public/data [03MAY2020]"; footnote2 j=l "Created using PROC SGPLOT"; proc sgplot data=work.covid noborder nowall dattrmap=attrmap aspect=1; %drawrefline; scatter x=Total_cases y=Total_tests / group=continentExp grouporder=ascending attrid=myid datalabel=location markerattrs=(symbol=circlefilled) name="scatter"; yaxis type=log label="Total Tests (LOG scale)" min=500 valuesformat=best12. offsetmax=0.01 ; xaxis type=log label="Total Cases (LOG scale)" min=16 valuesformat=best12. ; keylegend "scatter"; where date eq '03MAY2020'd ; run; ods listing image_dpi=96; title; footnote; /* prep data for animation */ data loop_dates; set covid; do i=&mindate to &maxdate; output; end; format i date9.; rename i=Animation_Date; run; data covid_Animated; set loop_dates; where date le Animation_Date; run; proc sort data=covid_Animated; by animation_date; run; ods html; options papersize=('6 in', '6 in') nodate nonumber animduration=0.25 animloop=yes noanimoverlay printerpath=gif animation=start; ods printer file='covid.gif'; /* create the visual using SGPlot */ ods graphics / width=6in height=6in imagefmt=gif antialiasmax=1000000 labelmax=600; ods html select none; title "Total COVID-19 Tests Conducted against Confirmed Cases"; footnote1 "Data Source: https://covid.ourworldindata.org/data/owid-covid-data.csv"; footnote2 "Created using PROC SGPLOT"; proc sgplot data=work.covid_Animated noborder nowall subpixel aspect=1 noautolegend; %drawrefline; series x=total_cases y=total_tests/ group=iso_code grouporder=ascending curvelabel arrowheadshape=barbed lineattrs=(pattern=solid thickness=1px) arrowheadpos=end arrowheadshape=filled name="series"; yaxis type=log label="Total Tests (LOG scale)" min=1000 offsetmin=0.01 offsetmax=0.01 ; xaxis type=log label="Total Cases (LOG scale)" min=32 offsetmin=0.01 offsetmax=0.01 ; by Animation_Date; where iso_code in (&country) and (animation_date ge '07MAR2020'd); run; title; footnote; ods html select all; options printerpath=gif animation=stop; run; ods printer close; ods _all_ close;