/* ORIGINAL DATA FROM http://www.rhesusnegative.net/themission/bloodtypefrequencies/ */ /* Download the Excel spreadsheet blood_type_data.xlsx from http://www.robslink.com/SAS/democd74/ /* PROC IMPORT OUT=blood_data DATAFILE="blood_type_data.xlsx" DBMS=EXCEL REPLACE; RANGE="Sheet1$"; GETNAMES=YES; MIXED=NO; SCANTEXT=YES; RUN; data blood_data; length idname $100; format O_positive A_positive B_positive AB_positive percentn7.1; format O_negative A_negative B_negative AB_negative percentn7.1; set blood_data (rename=( O_=O_positive A_=A_positive B_=B_positive AB_=AB_positive O_0=O_negative A_0=A_negative B_0=B_negative AB_0=AB_negative )); idname=country; length O_positive_html A_positive_html B_positive_html AB_positive_html $300; O_positive_html='title='||quote(trim(left(idname))||': '||put(O_positive,percentn7.2)); A_positive_html='title='||quote(trim(left(idname))||': '||put(A_positive,percentn7.2)); B_positive_html='title='||quote(trim(left(idname))||': '||put(B_positive,percentn7.2)); AB_positive_html='title='||quote(trim(left(idname))||': '||put(AB_positive,percentn7.2)); length O_negative_html A_negative_html B_negative_html AB_negative_html $300; O_negative_html='title='||quote(trim(left(idname))||': '||put(O_negative,percentn7.2)); A_negative_html='title='||quote(trim(left(idname))||': '||put(A_negative,percentn7.2)); B_negative_html='title='||quote(trim(left(idname))||': '||put(B_negative,percentn7.2)); AB_negative_html='title='||quote(trim(left(idname))||': '||put(AB_negative,percentn7.2)); run; data blood_data; set blood_data; if idname='Russia' then idname='Russian Federation'; if idname='Korea' then idname='Korea, Republic of'; if idname='Syria' then idname='Syrian Arab Republic'; if idname='Taiwan' then idname='China/Taiwan, Province of China'; run; */ libname dataloc "C:\Users\frwick\Documents\My SAS Files\Blog"; data Wide / view=Wide; set dataloc.blood_data(drop=population); obsNum = _N_; /* add ID for subject (observation) */ run; /* transpose from wide to long data format; VARNAME is a categorical var */ proc transpose data=Wide name=Type out=Long(rename=(Col1=Pct) drop=_LABEL_); by obsNum; copy Country; run; data Long; set Long; if Type="O_positive" then do; Type="O+"; end; if Type="A_positive" then do; Type="A+"; end; if Type="B_positive" then do; Type="B+"; end; if Type="AB_positive" then do; Type="AB+"; end; if Type="O_negative" then do; Type="O-"; Pct=-Pct; end; if Type="A_negative" then do; Type="A-"; Pct=-Pct; end; if Type="B_negative" then do; Type="B-"; Pct=-Pct; end; if Type="AB_negative" then do; Type="AB-"; Pct=-Pct; end; run; /* copy the Country value to all rows; add up Pct of Positive Rh factor */ proc iml; use Long; read all var _NUM_; read all var _CHAR_; close; do i = 1 to nrow(Country); if mod(i,8)^=1 then Country[i]=Country[i-1]; end; TotalPos = j(nrow(Country)/ 8,1,.); do i = 1 to nrow(Country)/8; TotalPos[i]=sum(Pct[1+8*(i-1):8*(i-1)+4]); end; TotalPos = repeat(TotalPos,1,8); free i; create Blood; /* save into SAS data set */ append var _all_; close Blood; quit; proc sort data=Blood; by TotalPos Country; run; /* create stacked bar chart, but butterfly it by positive/negative Rh factors */ ods graphics / width=800px height=1000px; title "Distribution of Blood Types by Country"; proc sgplot data=Blood; where TotalPos<1; /* data quality issues! */ format pct PERCENT8.1; hbar Country / response=Pct group=Type groupdisplay=stack; xaxis display=(nolabel); yaxis display=(nolabel) discreteorder=data valueattrs=(size=8pt) fitpolicy=none; run; /********************************/ /* PRINCIPAL COMPONENT ANALYSIS */ /********************************/ /* proc princomp data=Wide N=2 plots=all; var A_Positive A_Negative B_Positive B_Negative AB_Positive AB_Negative O_Positive O_Negative; ID Country; run; */ proc princomp data=Wide N=2 noprint out=ScorePlot(keep=Country Prin1 Prin2); var O_Positive--AB_Negative; ID Country; run; /* United Nations list of countries and geographic regions: http://unstats.un.org/unsd/methods/m49/m49regin.htm */ data ScorePlot; set ScorePlot; format Region $20.; if Country in ("Burundi ", "Comoros ", "Djibouti ", "Eritrea ", "Ethiopia ", "Kenya ", "Madagascar ", "Malawi ", "Mauritius ", "Mayotte ", "Mozambique ", "Reunion ", "Rwanda ", "Seychelles ", "Somalia ", "South Sudan ", "Uganda ", "United Republic of Tanzania ", "Zambia ", "Zimbabwe ", "Angola ", "Cameroon ", "Central African Republic ", "Chad ", "Congo ", "Democratic Republic of the Congo ", "Equatorial Guinea ", "Gabon ", "Sao Tome and Principe ", "Algeria ", "Egypt ", "Libya ", "Morocco ", "Sudan ", "Tunisia ", "Western Sahara ", "Botswana ", "Lesotho ", "Namibia ", "South Africa ", "Swaziland ", "Benin ", "Burkina Faso ", "Cabo Verde ", "Cote d'Ivoire ", "Gambia ", "Ghana ", "Guinea ", "Guinea-Bissau ", "Liberia ", "Mali ", "Mauritania ", "Niger ", "Nigeria ", "Saint Helena ", "Senegal ", "Sierra Leone ", "Togo ") then Region = "Africa"; else if Country in ( "Anguilla ", "Antigua and Barbuda ", "Aruba ", "Bahamas ", "Barbados ", "Bonaire, Saint Eustatius and Saba ", "British Virgin Islands ", "Cayman Islands ", "Cuba ", "Curaçao", "Dominica ", "Dominican Republic ", "Grenada ", "Guadeloupe ", "Haiti ", "Jamaica ", "Martinique ", "Montserrat ", "Puerto Rico ", "Saint-Barthélemy", "Saint Kitts and Nevis ", "Saint Lucia ", "Saint Martin (French part) ", "Saint Vincent and the Grenadines ", "Sint Maarten (Dutch part) ", "Trinidad and Tobago ", "Turks and Caicos Islands ", "United States Virgin Islands " ) then Region="C. Amer & Caribbean"; else if Country in ( "Belize ", "Costa Rica ", "El Salvador ", "Guatemala ", "Honduras ", "Mexico ", "Nicaragua ", "Panama " ) then Region = "C. Amer & Caribbean"; else if Country in ( "Argentina ", "Bolivia", "Brazil ", "Chile ", "Colombia ", "Ecuador ", "Falkland Islands", "French Guiana ", "Guyana ", "Paraguay ", "Peru ", "Suriname ", "Uruguay ", "Venezuela" ) then Region = "S. America"; else if Country in ( "Bermuda ", "Canada ", "Greenland ", "Saint Pierre and Miquelon ", "United States" ) then Region="N. America"; else if Country in ( "Kazakhstan ", "Kyrgyzstan ", "Tajikistan ", "Turkmenistan ", "Uzbekistan ", "China ", "Hong Kong", "China, Macao Special Administrative Region ", "Democratic People's Republic of Korea ", "Taiwan", "Japan ", "Mongolia ", "Korea", "Afghanistan ", "Bangladesh ", "Bhutan ", "India ", "Iran", "Maldives ", "Nepal ", "Pakistan ", "Sri Lanka ", "Brunei Darussalam ", "Cambodia ", "Indonesia ", "Lao People's Democratic Republic ", "Malaysia ", "Myanmar ", "Philippines ", "Singapore ", "Thailand ", "Timor-Leste ", "Viet Nam " ) then Region="Asia"; else if Country in ( "Armenia ", "Azerbaijan ", "Bahrain ", "Cyprus ", "Georgia ", "Iraq ", "Israel ", "Jordan ", "Kuwait ", "Lebanon ", "Oman ", "Qatar ", "Saudi Arabia ", "State of Palestine ", "Syria", "Turkey ", "United Arab Emirates ", "Yemen " ) then Region="W. Asia"; else if Country in ( "Belarus ", "Bulgaria ", "Czech Republic ", "Hungary ", "Poland ", "Republic of Moldova ", "Romania ", "Russia", "Slovakia ", "Ukraine ", "Aland Islands ", "Channel Islands ", "Denmark ", "Estonia ", "Faeroe Islands ", "Finland ", "Guernsey ", "Iceland ", "Ireland ", "Isle of Man ", "Jersey ", "Latvia ", "Lithuania ", "Norway ", "Sark ", "Svalbard and Jan Mayen Islands ", "Sweden ", "United Kingdom", "Albania ", "Andorra ", "Bosnia and Herzegovina ", "Croatia ", "Gibraltar ", "Greece ", "Holy See ", "Italy ", "Malta ", "Montenegro ", "Portugal", "San Marino", "Serbia", "Slovenia ", "Spain ", "The former Yugoslav Republic of Macedonia ", "Austria ", "Belgium ", "France ", "Germany ", "Liechtenstein ", "Luxembourg ", "Monaco ", "Netherlands ", "Switzerland " ) then Region="Europe"; else if Country in ( "Australia ", "New Zealand ", "Norfolk Island ", "Melanesia ", "Fiji ", "New Caledonia ", "Papua New Guinea ", "Solomon Islands ", "Vanuatu ", "Guam ", "Kiribati ", "Marshall Islands ", "Nauru ", "Northern Mariana Islands ", "Palau ", "American Samoa ", "Cook Islands ", "Niue ", "Pitcairn ", "Samoa ", "Tokelau ", "Tonga ", "Tuvalu ", "Wallis and Futuna Islands " ) then Region="Oceania"; run; ods graphics / width=800px height=800px; title "Principal Component Score Plot"; proc sgplot data=ScorePlot aspect=1; scatter x=Prin1 y=Prin2 / Group=Region datalabel=Country markerattrs=(symbol=CircleFilled); refline 0 / axis=x; refline 0 / axis=y; xaxis label="First PC (52%)"; yaxis label="Second PC (29%)"; run;