/* Rick Wicklin, 11 January, 2011.
SAS program to investigate the "initial-matching problem":
In a room with N people, what is the chance that two people
share the same initials?
*/
proc iml;
call randseed(123);
/* module from the book _Statistical Programming with SAS/IML Software_, by Rick Wicklin */
load module=SampleWithReplace;
/* simulate matching birthdays by using empirical proportions from 2002 */
use Sasuser.InitialFreq where(Percent>0);
read all var {Percent};
close Sasuser.InitialFreq;
p = Percent/Percent[+]; /* probability of events */
NumRooms = 1e5; /* number of simulated rooms */
match = j(NumRooms, 1); /* allocate results vector */
N=20;
initials = SampleWithReplace(1:nrow(p), NumRooms||N, p); /* simulate */
do j = 1 to NumRooms;
u = unique(initials[j,]); /* number of unique initials */
match[j] = N - ncol(u); /* number of common initials */
end;
/* estimated prob of >= 1 matching initials */
ProbEst = (match>0)[:];
print ProbEst;
maxN = 40;
SimR = j(maxN, 1, 0);
do N = 2 to maxN; /* for rooms with N people... */
initials = SampleWithReplace(1:nrow(p), NumRooms||N, p); /* simulate */
do j = 1 to NumRooms;
u = unique(initials[j,]); /* number of unique initials */
match[j] = N - ncol(u); /* number of common initials */
end;
/* estimated prob of >= 1 matching initials */
SimR[N] = (match>0)[:];
end;
NumPeople = T(1:maxN); /* number of people in room */
create InitialMatch var {NumPeople SimR};
append;
close InitialMatch;
quit;
data InitialMatch;
set InitialMatch(rename=(SimR=EstProb));
label EstProb = "Estimated Probability of Match"
NumPeople = "Number of People in Room";
run;
proc sgplot data=InitialMatch;
title "Probability of Matching Initials";
title2 "SAS Employees, Cary, NC";
series x=NumPeople y=EstProb;
yaxis grid values=(0 to 1 by 0.1);
xaxis grid values=(0 to 40 by 5);
run;