From ca0763bfda20cc7cbd8ee17e3fc32079dc6ef0a9 Mon Sep 17 00:00:00 2001 From: Esteban Zimanyi Date: Mon, 1 Jul 2024 17:38:04 +0200 Subject: [PATCH] Improve GTFS chapter --- docs/GTFS.xml | 103 +++++++++++++++++++++----------------------------- 1 file changed, 44 insertions(+), 59 deletions(-) diff --git a/docs/GTFS.xml b/docs/GTFS.xml index 22da9f8..c4ca23f 100644 --- a/docs/GTFS.xml +++ b/docs/GTFS.xml @@ -25,10 +25,6 @@ calendar_dates.txt define exceptions to the default service patterns defined in calendar.txt. There are two types of exceptions: 1 means that the service has been added for the specified date, and 2 means that the service has been removed for the specified date. - - route_types.txt contains transportation types used on routes, such as bus, metro, tramway, etc. - - routes.txt contains transit routes. A route is a group of trips that are displayed to riders as a single service. @@ -85,7 +81,6 @@ CREATE TABLE calendar ( end_date date NOT NULL, CONSTRAINT calendar_pkey PRIMARY KEY (service_id) ); -CREATE INDEX calendar_service_id ON calendar (service_id); CREATE TABLE exception_types ( exception_type int PRIMARY KEY, @@ -97,19 +92,14 @@ CREATE TABLE calendar_dates ( date date NOT NULL, exception_type int REFERENCES exception_types(exception_type) ); -CREATE INDEX calendar_dates_dateidx ON calendar_dates (date); - -CREATE TABLE route_types ( - route_type int PRIMARY KEY, - description text -); +CREATE INDEX calendar_dates_date_idx ON calendar_dates (date); CREATE TABLE routes ( route_id text, route_short_name text DEFAULT '', route_long_name text DEFAULT '', route_desc text DEFAULT '', - route_type int REFERENCES route_types(route_type), + route_type int, route_url text, route_color text, route_text_color text, @@ -120,17 +110,17 @@ CREATE TABLE shapes ( shape_id text NOT NULL, shape_pt_lat double precision NOT NULL, shape_pt_lon double precision NOT NULL, - shape_pt_sequence int NOT NULL + shape_pt_sequence int NOT NULL, + shape_dist_traveled float NOT NULL ); -CREATE INDEX shapes_shape_key ON shapes (shape_id); +CREATE INDEX shapes_shape_id_idx ON shapes (shape_id); -- Create a table to store the shape geometries CREATE TABLE shape_geoms ( shape_id text NOT NULL, - shape_geom geometry('LINESTRING', 4326), + shape_geom geometry('LINESTRING', 3857), CONSTRAINT shape_geom_pkey PRIMARY KEY (shape_id) ); -CREATE INDEX shape_geoms_key ON shapes (shape_id); CREATE TABLE location_types ( location_type int PRIMARY KEY, @@ -146,9 +136,9 @@ CREATE TABLE stops ( stop_lon double precision, zone_id text, stop_url text, - location_type integer REFERENCES location_types(location_type), + location_type integer REFERENCES location_types(location_type), parent_station integer, - stop_geom geometry('POINT', 4326), + stop_geom geometry('POINT', 3857), platform_code text DEFAULT NULL, CONSTRAINT stops_pkey PRIMARY KEY (stop_id) ); @@ -160,7 +150,7 @@ CREATE TABLE pickup_dropoff_types ( CREATE TABLE stop_times ( trip_id text NOT NULL, - -- Check that casting to time interval works. + -- Check that casting to time interval works arrival_time interval CHECK (arrival_time::interval = arrival_time::interval), departure_time interval CHECK (departure_time::interval = departure_time::interval), stop_id text, @@ -183,7 +173,6 @@ CREATE TABLE trips ( shape_id text, CONSTRAINT trips_pkey PRIMARY KEY (trip_id) ); -CREATE INDEX trips_trip_id ON trips (trip_id); INSERT INTO exception_types (exception_type, description) VALUES (1, 'service has been added'), @@ -218,8 +207,6 @@ COPY trips(route_id,service_id,trip_id,trip_headsign,direction_id,block_id,shape FROM '/home/gtfs_tutorial/trips.txt' DELIMITER ',' CSV HEADER; COPY agency(agency_id,agency_name,agency_url,agency_timezone,agency_lang,agency_phone) FROM '/home/gtfs_tutorial/agency.txt' DELIMITER ',' CSV HEADER; -COPY route_types(route_type,description) -FROM '/home/gtfs_tutorial/route_types.txt' DELIMITER ',' CSV HEADER; COPY routes(route_id,route_short_name,route_long_name,route_desc,route_type,route_url, route_color,route_text_color) FROM '/home/gtfs_tutorial/routes.txt' DELIMITER ',' CSV HEADER; @@ -233,12 +220,12 @@ CSV HEADER; INSERT INTO shape_geoms SELECT shape_id, ST_MakeLine(array_agg( - ST_SetSRID(ST_MakePoint(shape_pt_lon, shape_pt_lat),4326) ORDER BY shape_pt_sequence)) + ST_Transform(ST_Point(shape_pt_lon, shape_pt_lat, 4326), 3857) ORDER BY shape_pt_sequence)) FROM shapes GROUP BY shape_id; UPDATE stops -SET stop_geom = ST_SetSRID(ST_MakePoint(stop_lon, stop_lat),4326); +SET stop_geom = ST_Transform(ST_Point(stop_lon, stop_lat, 4326), 3857); The visualization of the routes and stops in QGIS is given in . In the figure, red lines correspond to the trajectories of vehicles, while orange points correspond to the location of stops. @@ -253,21 +240,21 @@ SET stop_geom = ST_SetSRID(ST_MakePoint(stop_lon, stop_lat),4326); Transforming GTFS Data for MobilityDB - We start by creating a table that contains couples of service_id and date defining the dates at which a service is provided. + We start by creating a table that contains couples of service_id and date defining the dates at which a service is provided. DROP TABLE IF EXISTS service_dates; CREATE TABLE service_dates AS ( SELECT service_id, date_trunc('day', d)::date AS date FROM calendar c, generate_series(start_date, end_date, '1 day'::interval) AS d WHERE ( - (monday = 1 AND extract(isodow FROM d) = 1) OR - (tuesday = 1 AND extract(isodow FROM d) = 2) OR - (wednesday = 1 AND extract(isodow FROM d) = 3) OR - (thursday = 1 AND extract(isodow FROM d) = 4) OR - (friday = 1 AND extract(isodow FROM d) = 5) OR - (saturday = 1 AND extract(isodow FROM d) = 6) OR - (sunday = 1 AND extract(isodow FROM d) = 7) -) + (monday = 1 AND extract(isodow FROM d) = 1) OR + (tuesday = 1 AND extract(isodow FROM d) = 2) OR + (wednesday = 1 AND extract(isodow FROM d) = 3) OR + (thursday = 1 AND extract(isodow FROM d) = 4) OR + (friday = 1 AND extract(isodow FROM d) = 5) OR + (saturday = 1 AND extract(isodow FROM d) = 6) OR + (sunday = 1 AND extract(isodow FROM d) = 7) +) EXCEPT SELECT service_id, date FROM calendar_dates WHERE exception_type = 2 @@ -304,8 +291,8 @@ FROM trips t JOIN stop_times s ON t.trip_id = s.trip_id; UPDATE trip_stops t SET perc = CASE -WHEN stop_sequence = 1 then 0.0 -WHEN stop_sequence = no_stops then 1.0 +WHEN stop_sequence = 1 THEN 0.0 +WHEN stop_sequence = no_stops THEN 1.0 ELSE ST_LineLocatePoint(g.shape_geom, s.stop_geom) END FROM shape_geoms g, stops s @@ -359,7 +346,7 @@ SET seg_length = ST_Length(seg_geom), no_points = ST_NumPoints(seg_geom); - The geometry of a segment is a linestring containing multiple points. From the previous table we know at which time the trip arrived at the first point and at the last point of the segment. To determine at which time the trip arrived at the intermediate points of the segments, we create a table trip_points that contains all the points composing the geometry of a segment. + The geometry of a segment is a linestring containing multiple points. From table trip_stops we know at which time the trip arrived at the first point and at the last point of the segment. To determine at which time the trip arrived at the intermediate points of the segments, we create a table trip_points that contains all the points composing the geometry of a segment. DROP TABLE IF EXISTS trip_points; CREATE TABLE trip_points ( @@ -377,29 +364,26 @@ INSERT INTO trip_points (trip_id, route_id, service_id, stop1_sequence, point_sequence, point_geom, point_arrival_time) WITH temp1 AS ( SELECT trip_id, route_id, service_id, stop1_sequence, stop2_sequence, - no_stops, stop1_arrival_time, stop2_arrival_time, seg_length, - (dp).path[1] AS point_sequence, no_points, (dp).geom as point_geom -FROM trip_segs, ST_DumpPoints(seg_geom) AS dp -), + no_stops, stop1_arrival_time, stop2_arrival_time, seg_length, + (dp).path[1] AS point_sequence, no_points, (dp).geom as point_geom + FROM trip_segs, ST_DumpPoints(seg_geom) AS dp ), temp2 AS ( -SELECT trip_id, route_id, service_id, stop1_sequence, stop1_arrival_time, - stop2_arrival_time, seg_length, point_sequence, no_points, point_geom -FROM temp1 -WHERE point_sequence <> no_points OR stop2_sequence = no_stops -), + SELECT trip_id, route_id, service_id, stop1_sequence, stop1_arrival_time, + stop2_arrival_time, seg_length, point_sequence, no_points, point_geom + FROM temp1 + WHERE point_sequence != no_points OR stop2_sequence = no_stops ), temp3 AS ( -SELECT trip_id, route_id, service_id, stop1_sequence, stop1_arrival_time, - stop2_arrival_time, point_sequence, no_points, point_geom, - ST_Length(ST_MakeLine(array_agg(point_geom) OVER w)) / seg_length AS perc -FROM temp2 WINDOW w AS (PARTITION BY trip_id, service_id, stop1_sequence - ORDER BY point_sequence) -) + SELECT trip_id, route_id, service_id, stop1_sequence, stop1_arrival_time, + stop2_arrival_time, point_sequence, no_points, point_geom, + ST_Length(ST_MakeLine(array_agg(point_geom) OVER w)) / seg_length AS perc + FROM temp2 WINDOW w AS (PARTITION BY trip_id, service_id, stop1_sequence + ORDER BY point_sequence) ) SELECT trip_id, route_id, service_id, stop1_sequence, point_sequence, point_geom, -CASE -WHEN point_sequence = 1 then stop1_arrival_time -WHEN point_sequence = no_points then stop2_arrival_time -ELSE stop1_arrival_time + ((stop2_arrival_time - stop1_arrival_time) * perc) -END AS point_arrival_time + CASE + WHEN point_sequence = 1 THEN stop1_arrival_time + WHEN point_sequence = no_points THEN stop2_arrival_time + ELSE stop1_arrival_time + ((stop2_arrival_time - stop1_arrival_time) * perc) + END AS point_arrival_time FROM temp3; In the temporary table temp1 we use the function ST_DumpPoints to obtain the points composing the geometry of a segment. Nevertheless, this table contains duplicate points, that is, the last point of a segment is equal to the first point of the next one. In the temporary table temp2 we filter out the last point of a segment unless it is the last segment of the trip. In the temporary table temp3 we compute in the attribute perc the relative position of a point within a trip segment with window functions. For this we use the function ST_MakeLine to construct the subsegment from the first point of the segment to the current one, determine the length of the subsegment with function ST_Length and divide this length by the overall segment length. Finally, in the outer query we use the computed percentage to determine the arrival time to that point. @@ -441,14 +425,15 @@ CREATE TABLE trips_mdb ( ); INSERT INTO trips_mdb(trip_id, service_id, route_id, date, trip) -SELECT trip_id, service_id, route_id, date, tgeompoint_seq(array_agg(tgeompoint_inst(point_geom, t) ORDER BY T)) +SELECT trip_id, service_id, route_id, date, tgeompointSeq(array_agg( + tgeompoint(point_geom, t) ORDER BY T)) FROM trips_input GROUP BY trip_id, service_id, route_id, date; INSERT INTO trips_mdb(trip_id, service_id, route_id, date, trip) SELECT trip_id, route_id, t.service_id, d.date, - shift(trip, make_interval(days => d.date - t.date)) -FROM trips_mdb t JOIN service_dates d ON t.service_id = d.service_id AND t.date <> d.date; + shiftTime(trip, make_interval(days => d.date - t.date)) +FROM trips_mdb t JOIN service_dates d ON t.service_id = d.service_id AND t.date != d.date; In the first INSERT statement we group the rows in the trips_input table by trip_id and date while keeping the route_id atribute, use the array_agg function to construct an array containing the temporal points composing the trip ordered by time, and compute the trip from this array using the function tgeompointseq. As explained above, table trips_input only contains the first date of a trip. In the second INSERT statement we add the trips for all the other dates with the function shift.