/* SAS program to accompany the article 'Spatial data analysis: Is "La Quinta" Spanish for "Next to Denny's"?' by Rick Wicklin, The DO Loop blog, published 06JAN2017 http://blogs.sas.com/content/iml/2017/01/06/la-quinta-next-to-dennys.html */ /* Analysis follows the article "La Quinta is Spanish for "Next to Denny's"" C. Rundel and M. Cetinkaya-Rundel (2016), CHANCE, 29(2), p. 53-57 who based their analysis from J. Reiser (2014) "Mitch Hedberg and GIS", New Jersey Geographer blog, http://njgeo.org/2014/01/30/mitch-hedberg-and-gis/ */ ods graphics / reset antialiasmax=8100; /* Step 1: Download data files */ /* See http://blogs.sas.com/content/sasdummy/2016/07/13/build-your-pokemon-library-using-sas/ */ /* Location of the CSV files on C HANCE web site */ %let ChanceRoot=http://chance.amstat.org/files/2016/04; filename temp_csv "C:/Temp/Dennys.csv"; proc http url="&ChanceRoot./dennys.csv" method="GET" out=temp_csv; run; proc import file=temp_csv dbms=csv replace out=Dennys(where=(State not in ('AK' 'HI'))); guessingrows=max; run; filename temp_csv "C:/Temp/LaQuinta.csv"; proc http url="&ChanceRoot./laquinta.csv" method="GET" out=temp_csv; run; proc import file=temp_csv dbms=csv replace out=LaQuinta(where=(State not in ('AK' 'HI'))); guessingrows=max; run; /* Step 2 (Optional): Extract outlines of lower-48 states to overlay on the La Quinta and Denny's locations */ data USA; set mapsgfk.US_States; /* REQUIRES SAS/GRAPH software */ where StateCode NOT IN ("AK", "HI", "PR") AND Density<=5; by State Segment; if first.Segment then PolyID+1; /* create ID variable for polygons */ keep State Segment StateCode Long Lat PolyID; run; data US; set maps.states; where State NOT IN (2, 15, 72) AND Density < 4; /* AK, HI, PR */ by State Segment; if first.Segment then PolyID+1; /* create ID variable for polygons */ Long = -180 / constant("pi") * X; /* convert radians to degrees West */ Lat = 180 / constant("pi") * Y; /* convert radians to degrees */ keep State Segment Long Lat PolyID; run; /* concatenate LaQuinta, Denny's, and US State data */ data Positions; set LaQuinta(rename=(Longitude=LQLon Latitude=LQLat) drop=State) Dennys(rename=(Longitude=DLon Latitude=DLat) drop=State) US; keep LQLon LQLat DLon DLat Long Lat PolyID; run; /* Step 3: Visualize location of La Quinta Inns and Denny's Restaurants */ title "Locations of La Quinta Inns and Denny's Restaurants"; title2 "Contiguous US"; proc sgplot data=Positions; polygon x=Long y=Lat ID=PolyID / lineattrs=(color=Gray); scatter x=DLon y=DLat / markerattrs=GraphData2(symbol=plus size=7) legendlabel="Denny's Restaurant" name="Dennys"; scatter x=LQLon y=LQLat / markerattrs=GraphData1(symbol=circlefilled) transparency=0.45 legendlabel="La Quinta Inn" name="LQ"; xaxis label='Longitude' grid valueshint values=(-120 to -70 by 5); yaxis label='Latitude' grid; keylegend "LQ" "Dennys"/ opaque across=1 location=inside position=bottomleft; run; /*******************************************************************************/ /* Step 4: Compute distance to nearest Denny's for each La Quinta Inn */ /* http://blogs.sas.com/content/iml/2016/09/28/distance-between-two-group.html */ proc iml; reset linesize=120; /* http://blogs.sas.com/content/iml/2013/03/27/compute-distance.html */ /* compute distance between points in S and points in R. S is a n x d matrix, where each row is a point in d dimensions. R is a m x d matrix. The function returns the n x m matrix of distances, D, such that D[i,j] is the distance between S[i,] and R[j,]. Pass in "GEODIST" for 3rd argument to get distance between (Lon,Lat) coords. */ start PairwiseDist(S, R, method="EUCLIDEAN"); if ncol(S)^=ncol(R) then return (.); /* different dimensions */ n = nrow(S); m = nrow(R); idx = T(repeat(1:n, m)); /* index matrix for S */ jdx = shape(repeat(1:m, n), n); /* index matrix for R */ if upcase(method)="GEODIST" then do; GD= geodist(S[idx,2], S[idx,1], R[jdx,2], R[jdx,1]); /* distance in km from (lon,lat) */ return shape(GD, n); end; diff = S[idx,] - R[jdx,]; return( shape( sqrt(diff[,##]), n ) ); /* sqrt(sum of squares) */ finish; /* Read La Quinta and Denny's coordinates */ use LaQuinta; read all var {Longitude Latitude} into LaQuinta; /* 851 hotels */ read all var {City State} into LaQuintaAddr; close; use Dennys; read all var {Longitude Latitude} into Dennys; /* 1634 restaurants */ read all var {City State} into DennysAddr; close; /* Distances between La Quinta and nearest Denny's */ /* 1 km = 0.6214 miles */ D = PairwiseDist(LaQuinta, Dennys, "GEODIST"); /* n x m */ Dist = D[ ,><]; /* smallest distance in each row */ /* descriptive statistics of distances */ call qntl(q, Dist, {0 0.25 0.5 0.75 1}); print (q`)[L="Distance (km) from La Quinta to Nearest Denny's" colname={"Min" "Q1" "Median" "Q3" "Max"} format=7.3]; /* Histogram of distances */ title "Distribution of Distance"; title2 "From Each La Quinta Inn to the Nearest Denny's Restaurant"; call histogram(Dist) rebin={5 10} label="Distance (km)"; /* How many La Quinta Inns are within 1 km (about a 10 minute walk) from a Denny's? */ ndx = loc(Dist <= 1.0); print (ncol(ndx))[L="LQ w/ Denny's w/in 1 km"] (ncol(ndx)/nrow(Dist))[L="Prop w/in 1 km"]; /* How many La Quinta Inns are within 20 km? */ ndx = loc(Dist <= 20); print (ncol(ndx))[L="LQ w/ Denny's w/in 20 km"] (ncol(ndx)/nrow(Dist))[L="Prop w/in 20 km"]; /* How many La Quinta Inns are more than 80 km (about 50 miles or 1 hr driving) from a Denny's? */ ndx = loc(Dist > 80); print (ncol(ndx))[L="LQ w/ Denny's more than 80 km away"] (ncol(ndx)/nrow(Dist))[L="Prop more than 80 km away"]; /* Sort LQs by distance to nearest Denny's. Display LQs for which distance is shortest and longest */ call sortndx(sortNdx, Dist); sortDist = Dist[sortNdx]; sortAddr = LaQuintaAddr[sortNdx, ]; sortCoords = LaQuinta[sortNdx, ]; /* The La Quinta Inns that are closest to a Denny's */ k = 10; /* k-nearest and farthest Denny's */ dDenny = round( sortDist[1:k,], 0.001); print dDenny[L="" c="Dist (km)" F=6.3] (sortAddr[1:k,])[c={"City" "State"}]; /* The La Quinta Inns that are farthest from a Denny's */ n = nrow(Dist); dDenny = round( sortDist[n-k:n,], 1); print dDenny[L="" c="Dist (km)"] (sortAddr[n-k:n,])[c={"City" "State"}]; /* create nearest neighbor plot http://blogs.sas.com/content/iml/2016/09/14/nearest-neighbors-sas.html */ idx = D[ ,>:<]; /* column of smallest distance in each row */ ClosestDennys = Dennys[idx,]; LQ = LaQuinta || Dist || j(nrow(LaQuinta), 4, .); V = j(nrow(LaQuinta), 3, .) || LaQuinta || ClosestDennys; /* OPTIONAL: Since most LQs are ery close to a Denny's, only show vectors for LQ where distance to Denny's is a large value */ V = V[loc(Dist>80), ]; Z = LQ // V; create NN from Z[c={"LQLon" "LQLat" "Dist" "x0" "y0" "NDenLon" "NDenLat" }]; append from Z; close; QUIT; /* Step 5: Create nearest-neighbor plot */ data All; set NN US; run; title "How to Get to the Nearest Denny's from a La Quinta Inn"; title2 "Only Denny's Farther Than 80 km Are Shown"; proc sgplot data=All ; polygon x=Long y=Lat ID=PolyID / lineattrs=(color=Gray); scatter x=NDenLon y=NDenLat / markerattrs=GraphData2(symbol=plus size=7) legendlabel="Denny's Restaurant" name="Dennys"; scatter x=LQLon y=LQLat / markerattrs=(symbol=CircleFilled) transparency=0.45 legendlabel="La Quinta Inn" name="LQ" legendlabel="La Quinta Inn"; scatter x=x0 y=y0 / markerattrs=GraphData2(symbol=CircleFilled) name="LQFar" legendlabel="Far from a Denny's"; vector x=NDenLon y=NDenLat / xorigin=x0 yorigin=y0; xaxis label='Longitude' grid valueshint values=(-120 to -70 by 5); yaxis label='Latitude' grid; keylegend "LQ" "LQFar" "Dennys"/ opaque across=1 location=inside position=bottomleft; run;