From 561e8601dbc78778a89d9d3c9f44146324181c00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20G=C3=BCndling?= Date: Thu, 19 Oct 2023 21:31:02 +0200 Subject: [PATCH 1/4] wip --- include/nigiri/loader/gtfs/local_to_utc.h | 12 +-- include/nigiri/loader/gtfs/noon_offsets.h | 6 +- include/nigiri/loader/gtfs/services.h | 8 +- include/nigiri/loader/gtfs/trip.h | 2 +- src/loader/gtfs/load_timetable.cc | 10 +- src/loader/gtfs/noon_offsets.cc | 14 ++- src/loader/gtfs/services.cc | 108 +++++----------------- src/loader/gtfs/trip.cc | 7 +- 8 files changed, 47 insertions(+), 120 deletions(-) diff --git a/include/nigiri/loader/gtfs/local_to_utc.h b/include/nigiri/loader/gtfs/local_to_utc.h index 38ccf76d..7f9d5c8e 100644 --- a/include/nigiri/loader/gtfs/local_to_utc.h +++ b/include/nigiri/loader/gtfs/local_to_utc.h @@ -125,9 +125,9 @@ void expand_local_to_utc(trip_data const& trip_data, noon_offset_hours_t const& noon_offsets, timetable const& tt, frequency_expanded_trip&& fet, - interval const& gtfs_interval, interval const& selection, Consumer&& consumer) { + auto const tt_interval = tt.internal_interval_days(); auto trip_it = begin(fet.trips_); auto offsets_it = begin(fet.offsets_); while (trip_it != end(fet.trips_)) { @@ -163,7 +163,7 @@ void expand_local_to_utc(trip_data const& trip_data, auto utc_time_traffic_days = hash_map{}; auto prev_key = conversion_key{date::days{2}, duration_t{-1}}; auto prev_it = utc_time_traffic_days.end(); - for (auto day = gtfs_interval.from_; day != gtfs_interval.to_; + for (auto day = tt_interval.from_; day != tt_interval.to_; day += date::days{1}) { auto const service_days = interval{day + first_day_offset, day + last_day_offset + date::days{1}}; @@ -172,7 +172,7 @@ void expand_local_to_utc(trip_data const& trip_data, } auto const gtfs_local_day_idx = - static_cast((day - gtfs_interval.from_).count()); + static_cast((day - tt_interval.from_).count()); if (!fet.traffic_days_->test(gtfs_local_day_idx)) { continue; } @@ -186,8 +186,7 @@ void expand_local_to_utc(trip_data const& trip_data, auto const first_dep_day_offset = date::days{static_cast( std::floor(static_cast(first_dep_utc.count()) / 1440))}; auto const utc_traffic_day = - (day - tt.internal_interval_days().from_ + first_dep_day_offset) - .count(); + (day - tt_interval.from_ + first_dep_day_offset).count(); if (utc_traffic_day < 0 || utc_traffic_day >= kMaxDays) { continue; @@ -237,13 +236,12 @@ void expand_trip(trip_data& trip_data, timetable const& tt, std::basic_string const& trips, bitfield const* traffic_days, - interval const& gtfs_interval, interval const& selection, Consumer&& consumer) { expand_frequencies( trip_data, trips, traffic_days, [&](frequency_expanded_trip&& fet) { expand_local_to_utc(trip_data, noon_offsets, tt, std::move(fet), - gtfs_interval, selection, + selection, [&](utc_trip&& ut) { consumer(std::move(ut)); }); }); } diff --git a/include/nigiri/loader/gtfs/noon_offsets.h b/include/nigiri/loader/gtfs/noon_offsets.h index 7bb94229..5f3a0090 100644 --- a/include/nigiri/loader/gtfs/noon_offsets.h +++ b/include/nigiri/loader/gtfs/noon_offsets.h @@ -20,9 +20,7 @@ using noon_offset_hours_t = duration_t get_noon_offset(date::local_days const days, date::time_zone const* tz); -noon_offset_hours_t precompute_noon_offsets( - timetable const& tt, - interval gtfs_interval, - agency_map_t const& agencies); +noon_offset_hours_t precompute_noon_offsets(timetable const& tt, + agency_map_t const& agencies); } // namespace nigiri::loader::gtfs \ No newline at end of file diff --git a/include/nigiri/loader/gtfs/services.h b/include/nigiri/loader/gtfs/services.h index 6dcfdcbc..166a25a5 100644 --- a/include/nigiri/loader/gtfs/services.h +++ b/include/nigiri/loader/gtfs/services.h @@ -9,12 +9,10 @@ namespace nigiri::loader::gtfs { -struct traffic_days { - interval interval_; - hash_map> traffic_days_; -}; +using traffic_days_t = hash_map>; -traffic_days merge_traffic_days( +traffic_days_t merge_traffic_days( + interval const& tt_interval, hash_map const&, hash_map> const&); diff --git a/include/nigiri/loader/gtfs/trip.h b/include/nigiri/loader/gtfs/trip.h index f21d9a4c..245104b5 100644 --- a/include/nigiri/loader/gtfs/trip.h +++ b/include/nigiri/loader/gtfs/trip.h @@ -116,7 +116,7 @@ struct trip_data { trip_data read_trips(timetable&, route_map_t const&, - traffic_days const&, + traffic_days_t const&, std::string_view file_content); void read_frequencies(trip_data&, std::string_view); diff --git a/src/loader/gtfs/load_timetable.cc b/src/loader/gtfs/load_timetable.cc index f5194737..3ae63e39 100644 --- a/src/loader/gtfs/load_timetable.cc +++ b/src/loader/gtfs/load_timetable.cc @@ -98,7 +98,8 @@ void load_timetable(loader_config const& config, load(kRoutesFile).data(), config.default_tz_); auto const calendar = read_calendar(load(kCalenderFile).data()); auto const dates = read_calendar_date(load(kCalendarDatesFile).data()); - auto const service = merge_traffic_days(calendar, dates); + auto const service = + merge_traffic_days(tt.internal_interval_days(), calendar, dates); auto trip_data = read_trips(tt, routes, service, load(kTripsFile).data()); read_frequencies(trip_data, load(kFrequenciesFile).data()); read_stop_times(tt, trip_data, stops, load(kStopTimesFile).data()); @@ -127,8 +128,7 @@ void load_timetable(loader_config const& config, route_key_equals> route_services; - auto const noon_offsets = - precompute_noon_offsets(tt, service.interval_, agencies); + auto const noon_offsets = precompute_noon_offsets(tt, agencies); stop_seq_t stop_seq_cache; auto const get_route_key = @@ -159,8 +159,8 @@ void load_timetable(loader_config const& config, auto const add_trip = [&](std::basic_string const& trips, bitfield const* traffic_days) { expand_trip( - trip_data, noon_offsets, tt, trips, traffic_days, service.interval_, - tt.date_range_, [&](utc_trip&& s) { + trip_data, noon_offsets, tt, trips, traffic_days, tt.date_range_, + [&](utc_trip&& s) { auto const* stop_seq = get_route_key(s.trips_); auto const clasz = trip_data.get(s.trips_.front()).get_clasz(tt); auto const it = route_services.find(std::pair{clasz, stop_seq}); diff --git a/src/loader/gtfs/noon_offsets.cc b/src/loader/gtfs/noon_offsets.cc index 57985c21..6bcd6a02 100644 --- a/src/loader/gtfs/noon_offsets.cc +++ b/src/loader/gtfs/noon_offsets.cc @@ -11,11 +11,9 @@ duration_t get_noon_offset(date::local_days const days, return duration_t{abs_zoned_time.get_info().offset.count() / 60}; } -noon_offset_hours_t precompute_noon_offsets( - timetable const& tt, - interval gtfs_interval, - agency_map_t const& agencies) { - auto const tt_range = tt.internal_interval_days(); +noon_offset_hours_t precompute_noon_offsets(timetable const& tt, + agency_map_t const& agencies) { + auto const tt_interval = tt.internal_interval_days(); auto ret = noon_offset_hours_t{}; for (auto const& [id, provider_idx] : agencies) { @@ -33,14 +31,14 @@ noon_offset_hours_t precompute_noon_offsets( tt.locations_.timezones_[tz_idx] .as>() .second); - for (auto day = gtfs_interval.from_; day != gtfs_interval.to_; + for (auto day = tt_interval.from_; day != tt_interval.to_; day += std::chrono::days{1}) { - if (!tt_range.contains(day)) { + if (!tt_interval.contains(day)) { continue; } auto const day_idx = - static_cast((day - gtfs_interval.from_).count()); + static_cast((day - tt_interval.from_).count()); assert(day_idx < kMaxDays); (*ret[tz_idx])[day_idx] = get_noon_offset(date::local_days{date::year_month_day{day}}, tz); diff --git a/src/loader/gtfs/services.cc b/src/loader/gtfs/services.cc index 96d01e22..3c4c6150 100644 --- a/src/loader/gtfs/services.cc +++ b/src/loader/gtfs/services.cc @@ -1,89 +1,29 @@ #include "nigiri/loader/gtfs/services.h" -#include "nigiri/logging.h" #include "utl/get_or_create.h" #include "utl/progress_tracker.h" +#include "nigiri/logging.h" + namespace nigiri::loader::gtfs { enum class bound { kFirst, kLast }; -date::sys_days bound_date( - hash_map const& base, - hash_map> const& exceptions, - bound const b) { - constexpr auto const kMin = date::sys_days{date::sys_days ::duration{ - std::numeric_limits::max()}}; - constexpr auto const kMax = date::sys_days{date::sys_days ::duration{ - std::numeric_limits::min()}}; - - auto const min_base_day = [&]() { - auto const it = - std::min_element(begin(base), end(base), [](auto&& lhs, auto&& rhs) { - return lhs.second.interval_.from_ < rhs.second.interval_.from_; - }); - return it == end(base) ? std::pair{"", kMin} - : std::pair{it->first, it->second.interval_.from_}; - }; - - auto const max_base_day = [&]() { - auto const it = - std::max_element(begin(base), end(base), [](auto&& lhs, auto&& rhs) { - return lhs.second.interval_.to_ < rhs.second.interval_.to_; - }); - return it == end(base) ? std::pair{"", kMax} - : std::pair{it->first, it->second.interval_.to_}; - }; - - switch (b) { - case bound::kFirst: { - auto [min_id, min] = min_base_day(); - for (auto const& [id, dates] : exceptions) { - for (auto const& date : dates) { - if (date.type_ == calendar_date::kAdd && date.day_ < min) { - min = date.day_; - min_id = id; - } - } - } - log(log_lvl::info, "loader.gtfs.services", - "first date {} from services {}", min, min_id); - return min; - } - case bound::kLast: { - auto [max_id, max] = max_base_day(); - for (auto const& [id, dates] : exceptions) { - for (auto const& date : dates) { - if (date.type_ == calendar_date::kAdd && - date.day_ + date::days{1} > max) { - max = date.day_ + date::days{1}; - max_id = id; - } - } - } - log(log_lvl::info, "loader.gtfs.services", - "last date {} from services {}", max, max_id); - return max; - } - } - - assert(false); - throw std::runtime_error{"unreachable"}; -} - -bitfield calendar_to_bitfield(std::string const& service_name, - interval const& gtfs_interval, +bitfield calendar_to_bitfield(interval const& tt_interval, + std::string const& service_name, calendar const& c) { assert((c.interval_.from_ - gtfs_interval.from_).count() >= 0); - bitfield traffic_days; - auto bit = static_cast( - (c.interval_.from_ - gtfs_interval.from_).count()); - for (auto d = c.interval_.from_; d != c.interval_.to_; - d = d + date::days{1}, ++bit) { + auto const from = std::max(c.interval_.from_, tt_interval.from_); + auto const to = std::min(c.interval_.to_, tt_interval.to_); + auto bit = (from - tt_interval.from_).count(); + auto traffic_days = bitfield{}; + for (auto d = from; d != to; d = d + date::days{1}, ++bit) { if (bit >= kMaxDays) { log(log_lvl::error, "loader.gtfs.services", - "date {} for service {} out of range", d, service_name); + "date {} for service {} out of range [tt_interval={}, calendar={}, " + "iterating={}]", + d, service_name, tt_interval, c.interval_, interval{from, to}); break; } auto const weekday_index = @@ -93,35 +33,31 @@ bitfield calendar_to_bitfield(std::string const& service_name, return traffic_days; } -void add_exception(std::string const& service_name, - date::sys_days const& start, +void add_exception(interval const& tt_interval, calendar_date const& exception, bitfield& b) { - auto const day_idx = (exception.day_ - start).count(); + auto const day_idx = (exception.day_ - tt_interval.from_).count(); if (day_idx < 0 || day_idx >= kMaxDays) { - log(log_lvl::error, "loader.gtfs.services", - "date {} for service {} out of range", exception.day_, service_name); return; } b.set(static_cast(day_idx), exception.type_ == calendar_date::kAdd); } -traffic_days merge_traffic_days( +traffic_days_t merge_traffic_days( + interval const& tt_interval, hash_map const& base, hash_map> const& exceptions) { auto const timer = nigiri::scoped_timer{"loader.gtfs.services"}; - traffic_days s; - s.interval_ = {bound_date(base, exceptions, bound::kFirst), - bound_date(base, exceptions, bound::kLast)}; - auto const progress_tracker = utl::get_active_progress_tracker(); progress_tracker->status("Build Base Services") .out_bounds(36.F, 38.F) .in_high(base.size()); + + auto s = traffic_days_t{}; for (auto const& [service_name, calendar] : base) { - s.traffic_days_[service_name] = std::make_unique( - calendar_to_bitfield(service_name, s.interval_, calendar)); + s[service_name] = std::make_unique( + calendar_to_bitfield(tt_interval, service_name, calendar)); } progress_tracker->status("Add Service Exceptions") @@ -129,8 +65,8 @@ traffic_days merge_traffic_days( .in_high(base.size()); for (auto const& [service_name, service_exceptions] : exceptions) { for (auto const& day : service_exceptions) { - add_exception(service_name, s.interval_.from_, day, - *utl::get_or_create(s.traffic_days_, service_name, []() { + add_exception(tt_interval, day, + *utl::get_or_create(s, service_name, []() { return std::make_unique(); })); } diff --git a/src/loader/gtfs/trip.cc b/src/loader/gtfs/trip.cc index 94811e33..85fc088c 100644 --- a/src/loader/gtfs/trip.cc +++ b/src/loader/gtfs/trip.cc @@ -251,7 +251,7 @@ trip_direction_idx_t trip_data::get_or_create_direction( trip_data read_trips(timetable& tt, route_map_t const& routes, - traffic_days const& services, + traffic_days_t const& services, std::string_view file_content) { struct csv_trip { utl::csv_col route_id_; @@ -274,9 +274,8 @@ trip_data read_trips(timetable& tt, utl::make_buf_reader(file_content, progress_tracker->update_fn())} // | utl::csv() // | utl::for_each([&](csv_trip const& t) { - auto const traffic_days_it = - services.traffic_days_.find(t.service_id_->view()); - if (traffic_days_it == end(services.traffic_days_)) { + auto const traffic_days_it = services.find(t.service_id_->view()); + if (traffic_days_it == end(services)) { log(log_lvl::error, "loader.gtfs.trip", R"(trip "{}": service_id "{}" not found)", t.trip_id_->view(), t.service_id_->view()); From bc85365d092508e02f353e7522456691db455e99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20G=C3=BCndling?= Date: Thu, 19 Oct 2023 21:41:54 +0200 Subject: [PATCH 2/4] fix tests --- test/loader/gtfs/services_test.cc | 12 ++++++------ test/loader/gtfs/stop_time_test.cc | 7 ++++++- test/loader/gtfs/trip_test.cc | 11 +++++++++-- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/test/loader/gtfs/services_test.cc b/test/loader/gtfs/services_test.cc index 7d3cc3ac..1ff9e7f3 100644 --- a/test/loader/gtfs/services_test.cc +++ b/test/loader/gtfs/services_test.cc @@ -4,6 +4,7 @@ #include "nigiri/loader/gtfs/calendar_date.h" #include "nigiri/loader/gtfs/files.h" #include "nigiri/loader/gtfs/services.h" +#include "nigiri/common/interval.h" #include "nigiri/types.h" #include "./test_data.h" @@ -34,10 +35,12 @@ using namespace date; */ TEST(gtfs, service_dates) { + auto const i = interval{date::sys_days{July / 1 / 2006}, + date::sys_days{August / 1 / 2006}}; auto dates = read_calendar_date(example_files().get_file(kCalendarDatesFile).data()); auto calendar = read_calendar(example_files().get_file(kCalenderFile).data()); - auto traffic_days = merge_traffic_days(calendar, dates); + auto traffic_days = merge_traffic_days(i, calendar, dates); auto we_bit_str = std::string{"1111000110000011000001100000110"}; auto wd_bit_str = std::string{"0000111001111100111110011111001"}; @@ -46,9 +49,6 @@ TEST(gtfs, service_dates) { auto const we_traffic_days = bitfield{we_bit_str}; auto const wd_traffic_days = bitfield{wd_bit_str}; - EXPECT_EQ(July / 1 / 2006, traffic_days.interval_.from_); - EXPECT_EQ(August / 1 / 2006, traffic_days.interval_.to_); - - EXPECT_EQ(we_traffic_days, *traffic_days.traffic_days_["WE"]); - EXPECT_EQ(wd_traffic_days, *traffic_days.traffic_days_["WD"]); + EXPECT_EQ(we_traffic_days, *traffic_days["WE"]); + EXPECT_EQ(wd_traffic_days, *traffic_days["WD"]); } diff --git a/test/loader/gtfs/stop_time_test.cc b/test/loader/gtfs/stop_time_test.cc index c4396566..9997c5ef 100644 --- a/test/loader/gtfs/stop_time_test.cc +++ b/test/loader/gtfs/stop_time_test.cc @@ -7,12 +7,16 @@ #include "./test_data.h" +using namespace date; + namespace nigiri::loader::gtfs { TEST(gtfs, read_stop_times_example_data) { auto const files = example_files(); timetable tt; + tt.date_range_ = interval{date::sys_days{July / 1 / 2006}, + date::sys_days{August / 1 / 2006}}; tz_map timezones; auto agencies = @@ -22,7 +26,8 @@ TEST(gtfs, read_stop_times_example_data) { auto const dates = read_calendar_date(files.get_file(kCalendarDatesFile).data()); auto const calendar = read_calendar(files.get_file(kCalenderFile).data()); - auto const services = merge_traffic_days(calendar, dates); + auto const services = + merge_traffic_days(tt.internal_interval_days(), calendar, dates); auto trip_data = read_trips(tt, routes, services, files.get_file(kTripsFile).data()); auto const stops = read_stops(source_idx_t{0}, tt, timezones, diff --git a/test/loader/gtfs/trip_test.cc b/test/loader/gtfs/trip_test.cc index d22b33a3..b1f55136 100644 --- a/test/loader/gtfs/trip_test.cc +++ b/test/loader/gtfs/trip_test.cc @@ -9,6 +9,7 @@ #include "./test_data.h" +using namespace date; using namespace nigiri::loader; namespace nigiri::loader::gtfs { @@ -17,6 +18,8 @@ TEST(gtfs, read_trips_example_data) { auto const files = example_files(); timetable tt; + tt.date_range_ = interval{date::sys_days{July / 1 / 2006}, + date::sys_days{August / 1 / 2006}}; tz_map timezones; auto agencies = @@ -26,7 +29,8 @@ TEST(gtfs, read_trips_example_data) { auto const dates = read_calendar_date(files.get_file(kCalendarDatesFile).data()); auto const calendar = read_calendar(files.get_file(kCalenderFile).data()); - auto const services = merge_traffic_days(calendar, dates); + auto const services = + merge_traffic_days(tt.internal_interval_days(), calendar, dates); auto const trip_data = read_trips(tt, routes, services, files.get_file(kTripsFile).data()); @@ -43,6 +47,8 @@ TEST(gtfs, read_trips_berlin_data) { auto const files = berlin_files(); timetable tt; + tt.date_range_ = interval{date::sys_days{July / 1 / 2006}, + date::sys_days{August / 1 / 2006}}; tz_map timezones; auto agencies = @@ -52,7 +58,8 @@ TEST(gtfs, read_trips_berlin_data) { auto const dates = read_calendar_date(files.get_file(kCalendarDatesFile).data()); auto const calendar = read_calendar(files.get_file(kCalenderFile).data()); - auto const services = merge_traffic_days(calendar, dates); + auto const services = + merge_traffic_days(tt.internal_interval_days(), calendar, dates); auto const trip_data = read_trips(tt, routes, services, files.get_file(kTripsFile).data()); From e4f54f3b3deb8e23634c243f9e0f664e388a4589 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20G=C3=BCndling?= Date: Thu, 19 Oct 2023 21:47:01 +0200 Subject: [PATCH 3/4] fix clangtidy --- src/loader/gtfs/services.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/loader/gtfs/services.cc b/src/loader/gtfs/services.cc index 3c4c6150..2134ba3d 100644 --- a/src/loader/gtfs/services.cc +++ b/src/loader/gtfs/services.cc @@ -28,7 +28,8 @@ bitfield calendar_to_bitfield(interval const& tt_interval, } auto const weekday_index = date::year_month_weekday{d}.weekday().c_encoding(); - traffic_days.set(bit, c.week_days_.test(weekday_index)); + traffic_days.set(static_cast(bit), + c.week_days_.test(weekday_index)); } return traffic_days; } From 1e1c9b9e33ae64f750603a3466fec097aefe8a8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20G=C3=BCndling?= Date: Fri, 20 Oct 2023 14:16:08 +0200 Subject: [PATCH 4/4] wip --- src/loader/gtfs/services.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/loader/gtfs/services.cc b/src/loader/gtfs/services.cc index 2134ba3d..fb2d8c97 100644 --- a/src/loader/gtfs/services.cc +++ b/src/loader/gtfs/services.cc @@ -12,8 +12,6 @@ enum class bound { kFirst, kLast }; bitfield calendar_to_bitfield(interval const& tt_interval, std::string const& service_name, calendar const& c) { - assert((c.interval_.from_ - gtfs_interval.from_).count() >= 0); - auto const from = std::max(c.interval_.from_, tt_interval.from_); auto const to = std::min(c.interval_.to_, tt_interval.to_); auto bit = (from - tt_interval.from_).count();