/* This SAS program accompanies the article "Find your birthday in the digits of pi" by Rick Wicklin published on "The DO Loop" blog 13MAR2017 http://blogs.sas.com/content/iml/2017/03/13/find-your-birthday-in-pi.html ? Find your birthday (or any other date) within the first 10 million decimal digits of pi. */ /* read data over the internet from a URL */ filename rawurl url "http://www.cs.princeton.edu/introcs/data/pi-10million.txt" /* proxy='http://yourproxy.company.com:80' */ ; data PiDigits; infile rawurl lrecl=10000000; input Digit 1. @@; Position = _n_; run; /* Print the Pi Day "birthday" 03/14/88 in MMDDYY for 031488 */ proc print noobs data=PiDigits(firstobs=433422 obs=433427); var Position Digit; run; /* Pi Day birthday in alternative form 31488 */ proc print noobs data=PiDigits(firstobs=19466 obs=19470); var Position Digit; run; /* How to find any pattern in the digits of pi. For an explanation of the program, see http://blogs.sas.com/content/iml/2017/03/10/find-pattern-in-sequence-of-digits.html */ proc iml; /* Input: target : row vector of the target pattern digits : col vector of the digits in which to search Returns two-element vector: {Number of times pattern appears, first location} */ start FindPattern(target, digits); p = ncol(target); /* length of target sequence */ D = lag(digits, (p-1):0); /* columns shift the digits */ D = D[p:nrow(digits),]; /* delete first p rows */ X = (D=target); /* binary matrix */ /* sum across columns. Which rows contain all 1s? */ b = (X[,+] = p); /* b[i]=1 if i_th row matches target */ NumRepl = sum(b); /* how many times does target appear? */ if NumRepl=0 then FirstLoc = 0; else FirstLoc = loc(b)[1]; result = NumRepl // FirstLoc; labl = "Pattern = " + rowcat(char(target,1)); /* convert to string */ print result[L=labl F=COMMA9. rowname={"Num Repl", "First Loc"}]; finish; /* read in 10 million digits of pi */ use PiDigits; read all var {"Digit"}; close; target = {0 3 1 4 8 8}; call FindPattern(target, Digit); target = {3 1 4 8 8}; call FindPattern(target, Digit); target = {1 2 0 3}; call FindPattern(target, Digit); QUIT; /* Rick's birthday: 10/24/66 */ proc print noobs data=PiDigits(firstobs=2254877 obs=2254882); var Position Digit; run; /***************************************************/ /* Check ALL dates from 01/01/00 to 12/31/99 to see where/how often in digits of pi */ /* NOTE: This code takes about 3.5 minutes to run */ proc iml; use PiDigits; read all var {"Digit" "Position"}; close; len = 6; lag = lag(Digit, len:1); lag = lag[(len+1):nrow(lag),]; /* Preliminary step: Delete rows that could not possibly match the pattern MMDDYY. This reduces candidate list from 10M to about 1.2M */ tens = 10##(ncol(lag)-1:0); /* powers of 10 */ val = (lag # tens)[,+]; /* create digit: {1 2 3} ==> 123 */ validIdx = loc(val <= 123199); /* these rows can match MMDDYY pattern */ val = val[validIdx,]; /* throw away impossible patterns */ position = position[validIdx]; free lag; /* done with this matrix; use 'val' */ /* only search for matches within the feasible patterns */ Month=.; Day=.; Year=.; Loc=.; NumRepl=.; create BDay var {"Month" "Day" "Year" "Loc" "NumRepl"}; /* J F M A M J J A S O N D */ days = {31 29 31 30 31 30 31 31 30 31 30 31}; do year = 0 to 99; Y = putn(year, "Z2."); do month = 1 to 12; M = putn(month, "Z2."); do day = 1 to days[month]; D = putn(day, "Z2"); target = num( cat(M,D,Y) ); * create digit: {1 2 3} ==> 123; b = (val = target); * look for rows that match target; NumRepl = sum(b); * how many rows matched?; if NumRepl=0 then Loc=0; * pattern not found; else do; Loc = loc(b)[1]; * postion in reduced array; Loc = position[Loc]; * position in digits of pi; end; append; end; end; end; close; QUIT; /* If desired, make a permanent compy of the data */ libname temp "C:\Temp"; data temp.PiBirthdays; set BDay; run; /* run some analyses */ proc means data=temp.PiBirthdays min mean max; var Loc NumRepl; run; /* last occurance of first appearance of a date */ proc print noobs data=PiDigits(firstobs=9982545 obs=9982550); var Position Digit; run; /* You can find 12/1/54, but not 12/01/54 */ proc print noobs data=PiDigits(firstobs=92570 obs=92574); var Position Digit; run; /* dates that appear one or zero times */ data Unique; set temp.PiBirthdays; where Loc = 0 OR NumRepl = 1; run; proc sort data=Unique; by year month day; run; proc print data=Unique; format month day year Z2. Loc COMMA9.; run; proc print data=Unique; where NumRepl=0; format month day year Z2. Loc COMMA9.; run; proc print data=Unique; where NumRepl^=0; format month day year Z2. Loc COMMA9.; run; /* most common */ proc print noobs data=temp.PiBirthdays; where NumRepl=25; run; /* distribution of how many times dates appear */ proc freq data=temp.PiBirthdays; tables NumRepl / out=FreqOut; run; data fake; NumRepl=24; Count=0; Percent=0; run; data ReplCount / view=ReplCount; set FreqOut fake; run; title "Number of Times That Each Date Appears"; title2 "Six-Digit Dates as MMDDYY in 10M Decimal Digits of pi"; proc sgplot data=ReplCount; vbarparm category=NumRepl response=Count / nozerobars; yaxis grid; run; /* extreme observations in Loc variable */ proc univariate data=temp.PiBirthdays; ods select ExtremeObs; var Loc; run; /* earlest occurrence of a date */ proc print noobs data=PiDigits(firstobs=71 obs=76); var Position Digit; run; /* latest occurrence of a date */ proc print noobs data=PiDigits(firstobs=9982545 obs=9982550); var Position Digit; run;