/* Rick Wicklin, 11 January, 2011. SAS program to investigate the "initial-matching problem": In a room with N people, what is the chance that two people share the same initials? */ proc iml; call randseed(123); /* module from the book _Statistical Programming with SAS/IML Software_, by Rick Wicklin */ load module=SampleWithReplace; /* simulate matching birthdays by using empirical proportions from 2002 */ use Sasuser.InitialFreq where(Percent>0); read all var {Percent}; close Sasuser.InitialFreq; p = Percent/Percent[+]; /* probability of events */ NumRooms = 1e5; /* number of simulated rooms */ match = j(NumRooms, 1); /* allocate results vector */ N=20; initials = SampleWithReplace(1:nrow(p), NumRooms||N, p); /* simulate */ do j = 1 to NumRooms; u = unique(initials[j,]); /* number of unique initials */ match[j] = N - ncol(u); /* number of common initials */ end; /* estimated prob of >= 1 matching initials */ ProbEst = (match>0)[:]; print ProbEst; maxN = 40; SimR = j(maxN, 1, 0); do N = 2 to maxN; /* for rooms with N people... */ initials = SampleWithReplace(1:nrow(p), NumRooms||N, p); /* simulate */ do j = 1 to NumRooms; u = unique(initials[j,]); /* number of unique initials */ match[j] = N - ncol(u); /* number of common initials */ end; /* estimated prob of >= 1 matching initials */ SimR[N] = (match>0)[:]; end; NumPeople = T(1:maxN); /* number of people in room */ create InitialMatch var {NumPeople SimR}; append; close InitialMatch; quit; data InitialMatch; set InitialMatch(rename=(SimR=EstProb)); label EstProb = "Estimated Probability of Match" NumPeople = "Number of People in Room"; run; proc sgplot data=InitialMatch; title "Probability of Matching Initials"; title2 "SAS Employees, Cary, NC"; series x=NumPeople y=EstProb; yaxis grid values=(0 to 1 by 0.1); xaxis grid values=(0 to 40 by 5); run;