-
Notifications
You must be signed in to change notification settings - Fork 7
/
gen_points.py
130 lines (108 loc) · 3.97 KB
/
gen_points.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from imposm.parser import OSMParser
from random import random, randint, shuffle
from zipfile import ZipFile
from math import radians, cos
from scipy.spatial import KDTree
import csv
import os
def project(coord) :
"This projection completely wrecks heading but preserves distances. (lon, lat) -> (x, y)"
m_per_degree = 111111.0
lon, lat = coord
y = lat * m_per_degree
x = lon * cos(radians(lat)) * m_per_degree
return (x, y)
def get_random_points( dirname, nn, radius=2000 ):
filenames = os.listdir( dirname )
pbfs = [x for x in filenames if x[-7:]=="osm.pbf"]
if len(pbfs)==0:
raise Exception( "no pbf file in directory" )
if len(pbfs)>1:
raise Exception(" more than one pbf file in directory" )
pbffilename = os.path.join( dirname, pbfs[0] )
zip_filenames = [x for x in filenames if x[-4:]==".zip"]
if len(zip_filenames)==0:
raise Exception( "No GTFS feeds found in directory." )
print "Collecting stop locations from GTFS files..."
stop_coords = []
for fn in zip_filenames:
print " ", fn
zf = ZipFile( os.path.join( dirname, fn ) )
try:
fp = zf.open("stops.txt")
except KeyError, e:
print "This ZIP file does not contain a stops.txt, skipping."
continue
rd = csv.reader(fp)
header = rd.next()
try:
lat_idx = header.index("stop_lat")
lon_idx = header.index("stop_lon")
except ValueError, e:
print "Stops.txt does not contain lat or lon columns, skipping."
continue
for row in rd:
stop_coords.append( (float(row[lon_idx]), float(row[lat_idx])) )
print "Done."
all_nds = set()
intersection_nds = set()
def road(ways):
for id, tags, nds in ways:
if 'highway' not in tags:
continue
# if we've ever seen any of these nds before, they're intersections
for nd in nds:
if nd in all_nds:
intersection_nds.add( nd )
all_nds.add( nd )
print "Collecting intersection nodes from PBF file..."
print " ", pbffilename
# concurrency is automatically set to number of cores
p = OSMParser(ways_callback=road)
p.parse(pbffilename)
all_nds.clear() # cannot del() because it's referenced in a nested scope
print "Done."
nd_coords = {}
def get_coords(coords):
for id, lon, lat in coords:
if (id in intersection_nds):
nd_coords[id] = (lon, lat)
print "Getting lat/lon coordinates of intersection nodes..."
# Avoid some sort of deadlock by setting concurrency to 1 (stacktrace shows sem.acquire())
p = OSMParser(concurrency=1, coords_callback=get_coords)
p.parse(pbffilename)
print "Done."
print "Adding projected GTFS stop locations to a KD tree."
# kdtree requires a long-lived immutable input, so make a copy
projected_stop_coords = map(project, stop_coords)
kdtree = KDTree(projected_stop_coords)
print "Done."
print "Choosing nodes near transit at random..."
ret = []
intersection_nds = list(intersection_nds)
shuffle(intersection_nds)
for nd in intersection_nds :
# Check that it's within radius of a stop
coord = nd_coords[nd]
distance, nearest_coord = kdtree.query(project(coord))
if distance > radius:
continue
ret.append( coord )
if len(ret) >= nn:
break
print "Done."
return ret
if __name__=='__main__':
import sys
# fn = "/Users/brandon/Documents/nysdot/graph/tristate.pbf"
if len(sys.argv)<3:
print "usage: cmd dirname num_points"
exit()
dirname = sys.argv[1]
ct = int(sys.argv[2])
nodes = get_random_points(dirname, ct)
fpout = open("endpoints_random.csv","w")
fpout.write("name,lat,lon\n")
for i,(lon,lat) in enumerate(nodes):
fpout.write("%s,%s,%s\n"%(i,lat,lon))
fpout.close()