From d6f361463f4b4b0128005fc810a152b4e7f70754 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sat, 26 Oct 2024 20:54:40 +0000 Subject: [PATCH] feat: Port S2Cell operations to s2geography (#35) These previously lived only in the R package ( https://github.com/r-spatial/s2/blob/main/src/s2-cell.cpp ), but there's no reason they can't live here, too! --- CMakeLists.txt | 5 + src/s2geography/op/cell.cc | 199 ++++++++++++++++++++++++++++++++ src/s2geography/op/cell.h | 162 ++++++++++++++++++++++++++ src/s2geography/op/cell_test.cc | 136 ++++++++++++++++++++++ src/s2geography/op/op.h | 94 +++++++++++++++ src/s2geography/op/point.cc | 29 +++++ src/s2geography/op/point.h | 42 +++++++ 7 files changed, 667 insertions(+) create mode 100644 src/s2geography/op/cell.cc create mode 100644 src/s2geography/op/cell.h create mode 100644 src/s2geography/op/cell_test.cc create mode 100644 src/s2geography/op/op.h create mode 100644 src/s2geography/op/point.cc create mode 100644 src/s2geography/op/point.h diff --git a/CMakeLists.txt b/CMakeLists.txt index bbc95bb..212564e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -206,6 +206,8 @@ add_library(s2geography src/s2geography/geoarrow.cc src/s2geography/wkt-reader.cc src/s2geography/wkt-writer.cc + src/s2geography/op/cell.cc + src/s2geography/op/point.cc # geoarrow sources src/vendored/geoarrow/geoarrow.c src/vendored/ryu/d2s.c @@ -256,6 +258,7 @@ if(S2GEOGRAPHY_BUILD_TESTS) add_executable(geoarrow_test src/s2geography/geoarrow_test.cc) add_executable(geography_test src/s2geography/geography_test.cc) add_executable(wkt_writer_test src/s2geography/wkt-writer_test.cc) + add_executable(op_cell_test src/s2geography/op/cell_test.cc) if (S2GEOGRAPHY_CODE_COVERAGE) target_compile_options(coverage_config INTERFACE -O0 -g --coverage) @@ -267,12 +270,14 @@ if(S2GEOGRAPHY_BUILD_TESTS) target_link_libraries(geoarrow_test s2geography nanoarrow GTest::gtest_main) target_link_libraries(geography_test s2geography nanoarrow GTest::gtest_main) target_link_libraries(wkt_writer_test s2geography GTest::gtest_main) + target_link_libraries(op_cell_test s2geography GTest::gtest_main) include(GoogleTest) gtest_discover_tests(distance_test) gtest_discover_tests(geoarrow_test) gtest_discover_tests(geography_test) gtest_discover_tests(wkt_writer_test) + gtest_discover_tests(op_cell_test) endif() if(S2GEOGRAPHY_BUILD_EXAMPLES) diff --git a/src/s2geography/op/cell.cc b/src/s2geography/op/cell.cc new file mode 100644 index 0000000..49c1d29 --- /dev/null +++ b/src/s2geography/op/cell.cc @@ -0,0 +1,199 @@ + +#include "s2geography/op/cell.h" + +#include +#include +#include + +#include "s2/s2cell.h" +#include "s2/s2cell_id.h" +#include "s2/s2latlng.h" + +namespace s2geography::op::cell { + +uint64_t FromToken::ExecuteScalar(const std::string_view cell_token) { + // This constructor may not work for all s2 versions + return S2CellId::FromToken({cell_token.data(), cell_token.size()}).id(); +} + +uint64_t FromDebugString::ExecuteScalar(const std::string_view debug_string) { + // This constructor may not work for all s2 versions + return S2CellId::FromDebugString({debug_string.data(), debug_string.size()}) + .id(); +} + +uint64_t FromPoint::ExecuteScalar(Point point) { + S2Point pt(point[0], point[1], point[2]); + return S2CellId(pt).id(); +} + +Point ToPoint::ExecuteScalar(const uint64_t cell_id) { + S2CellId cell(cell_id); + if (!cell.is_valid()) { + return point::kInvalidPoint; + } else { + S2Point point = S2CellId(cell_id).ToPoint(); + return {point.x(), point.y(), point.z()}; + } +} + +std::string_view ToToken::ExecuteScalar(const uint64_t cell_id) { + last_result_ = S2CellId(cell_id).ToToken(); + return last_result_; +} + +std::string_view ToDebugString::ExecuteScalar(const uint64_t cell_id) { + last_result_ = S2CellId(cell_id).ToString(); + return last_result_; +} + +bool IsValid::ExecuteScalar(const uint64_t cell_id) { + return S2CellId(cell_id).is_valid(); +} + +Point CellCenter::ExecuteScalar(const uint64_t cell_id) { + S2CellId cell(cell_id); + if (!cell.is_valid()) { + return point::kInvalidPoint; + } + + S2Point pt = S2Cell(cell).GetCenter(); + return {pt.x(), pt.y(), pt.z()}; +} + +Point CellVertex::ExecuteScalar(const uint64_t cell_id, + const int8_t vertex_id) { + S2CellId cell(cell_id); + + if (vertex_id < 0 || !cell.is_valid()) { + return point::kInvalidPoint; + } + + S2Point pt = S2Cell(cell).GetVertex(vertex_id); + return {pt.x(), pt.y(), pt.z()}; +} + +int8_t Level::ExecuteScalar(const uint64_t cell_id) { + S2CellId cell(cell_id); + if (!cell.is_valid()) { + return -1; + } + + return cell.level(); +} + +double Area::ExecuteScalar(const uint64_t cell_id) { + S2CellId cell(cell_id); + if (!cell.is_valid()) { + return NAN; + } + + return S2Cell(cell).ExactArea(); +} + +double AreaApprox::ExecuteScalar(const uint64_t cell_id) { + S2CellId cell(cell_id); + if (!cell.is_valid()) { + return NAN; + } + + return S2Cell(cell).ApproxArea(); +} + +uint64_t Parent::ExecuteScalar(const uint64_t cell_id, const int8_t level) { + // allow negative numbers to relate to current level + S2CellId cell(cell_id); + if (!cell.is_valid()) { + return kCellIdSentinel; + } + + int8_t level_final; + int8_t cell_level = cell.level(); + if (level < 0) { + level_final = cell.level() + level; + } else { + level_final = level; + } + + if (level_final > cell.level() || level_final < 0) { + return kCellIdSentinel; + } else { + return cell.parent(level_final).id(); + } +} + +uint64_t Child::ExecuteScalar(const uint64_t cell_id, const int8_t k) { + if (k < 0 || k > 3) { + return kCellIdSentinel; + } + + return S2CellId(cell_id).child(k).id(); +} + +uint64_t EdgeNeighbor::ExecuteScalar(const uint64_t cell_id, const int8_t k) { + S2CellId cell(cell_id); + if (k < 0 || k > 3) { + return kCellIdSentinel; + } + + S2CellId neighbours[4]; + cell.GetEdgeNeighbors(neighbours); + return neighbours[k].id(); +} + +bool Contains::ExecuteScalar(const uint64_t cell_id, + const uint64_t cell_id_test) { + S2CellId cell(cell_id); + S2CellId cell_test(cell_id_test); + if (!cell.is_valid() || !cell_test.is_valid()) { + return false; + } + + return cell.contains(cell_test); +} + +bool MayIntersect::ExecuteScalar(const uint64_t cell_id, + const uint64_t cell_id_test) { + S2CellId cell(cell_id); + S2CellId cell_test(cell_id_test); + if (!cell.is_valid() || !cell_test.is_valid()) { + return false; + } + + return S2Cell(cell).MayIntersect(S2Cell(cell_test)); +} + +double Distance::ExecuteScalar(const uint64_t cell_id, + const uint64_t cell_id_test) { + S2CellId cell(cell_id); + S2CellId cell_test(cell_id_test); + if (!cell.is_valid() || !cell_test.is_valid()) { + return NAN; + } + + return S2Cell(cell).GetDistance(S2Cell(cell_test)).radians(); +} + +double MaxDistance::ExecuteScalar(const uint64_t cell_id, + const uint64_t cell_id_test) { + S2CellId cell(cell_id); + S2CellId cell_test(cell_id_test); + if (!cell.is_valid() || !cell_test.is_valid()) { + return NAN; + } + + return S2Cell(cell).GetMaxDistance(S2Cell(cell_test)).radians(); +} + +int8_t CommonAncestorLevel::ExecuteScalar(const uint64_t cell_id, + const uint64_t cell_id_test) { + S2CellId cell(cell_id); + S2CellId cell_test(cell_id_test); + if (!cell.is_valid() || !cell_test.is_valid()) { + return -1; + } + + return cell.GetCommonAncestorLevel(cell_test); +} + +} // namespace s2geography::op::cell diff --git a/src/s2geography/op/cell.h b/src/s2geography/op/cell.h new file mode 100644 index 0000000..5d6bcef --- /dev/null +++ b/src/s2geography/op/cell.h @@ -0,0 +1,162 @@ +#pragma once + +#include +#include +#include +#include + +#include "s2geography/op/op.h" +#include "s2geography/op/point.h" + +namespace s2geography { + +namespace op { + +namespace cell { + +using point::LngLat; +using point::Point; + +/// \brief Cell identifier returned for invalid input +static constexpr uint64_t kCellIdNone = 0; + +/// \brief Cell identifier that is greater than all other cells +static constexpr uint64_t kCellIdSentinel = ~uint64_t{0}; + +/// \brief Create a cell identifier from a token +class FromToken : public UnaryOp { + public: + uint64_t ExecuteScalar(const std::string_view cell_token) override; +}; + +/// \brief Create a cell identifier from a debug string +class FromDebugString : public UnaryOp { + public: + uint64_t ExecuteScalar(const std::string_view debug_string) override; +}; + +/// \brief Create a cell identifier from an xyz unit vector +class FromPoint : public UnaryOp { + public: + uint64_t ExecuteScalar(Point point) override; +}; + +/// \brief Calculate the cell centre as an xyz vector +class ToPoint : public UnaryOp { + public: + Point ExecuteScalar(const uint64_t cell_id) override; +}; + +/// \brief Get the token string of a cell identifier +class ToToken : public UnaryOp { + public: + std::string_view ExecuteScalar(const uint64_t cell_id) override; + + private: + std::string last_result_; +}; + +/// \brief Get the debug string of a cell identifier +class ToDebugString : public UnaryOp { + public: + std::string_view ExecuteScalar(const uint64_t cell_id) override; + + private: + std::string last_result_; +}; + +/// \brief Returns true if the ID is a valid cell identifier or false otherwise +class IsValid : public UnaryOp { + public: + bool ExecuteScalar(const uint64_t cell_id) override; +}; + +/// \brief Retrieve the corners of a cell +class CellCenter : public UnaryOp { + public: + Point ExecuteScalar(const uint64_t cell_id) override; +}; + +/// \brief Retrieve the corners of a cell +class CellVertex : public BinaryOp { + public: + Point ExecuteScalar(const uint64_t cell_id, const int8_t vertex_id) override; +}; + +/// \brief Calculate the level represented by the cell +class Level : public UnaryOp { + public: + int8_t ExecuteScalar(const uint64_t cell_id) override; +}; + +/// \brief Calculate the area of a given cell +class Area : public UnaryOp { + public: + double ExecuteScalar(const uint64_t cell_id) override; +}; + +/// \brief Calculate the approximate area of a given cell +class AreaApprox : public UnaryOp { + public: + double ExecuteScalar(const uint64_t cell_id) override; +}; + +/// \brief Calculate the parent cell at a given level +class Parent : public BinaryOp { + public: + uint64_t ExecuteScalar(const uint64_t cell_id, const int8_t level) override; +}; + +/// \brief Calculate the child cell at the next level +class Child : public BinaryOp { + public: + uint64_t ExecuteScalar(const uint64_t cell_id, const int8_t k) override; +}; + +/// \brief Get the edge neighbours of a given cell +class EdgeNeighbor : public BinaryOp { + public: + uint64_t ExecuteScalar(const uint64_t cell_id, const int8_t k) override; +}; + +/// \brief Returns true if cell_id contains cell_id_test or false otherwise +class Contains : public BinaryOp { + public: + bool ExecuteScalar(const uint64_t cell_id, + const uint64_t cell_id_test) override; +}; + +/// \brief Returns true if cell_id might intersect cell_id_test or false +/// otherwise +class MayIntersect : public BinaryOp { + public: + bool ExecuteScalar(const uint64_t cell_id, + const uint64_t cell_id_test) override; +}; + +/// \brief Returns the minimum spherical distance (in radians) between two cells +class Distance : public BinaryOp { + public: + double ExecuteScalar(const uint64_t cell_id, + const uint64_t cell_id_test) override; +}; + +/// \brief Returns the maximum spherical distance (in radians) between two cells +class MaxDistance : public BinaryOp { + public: + double ExecuteScalar(const uint64_t cell_id, + const uint64_t cell_id_test) override; +}; + +/// \brief Returns the level at which the two cells have a common ancestor +class CommonAncestorLevel : public BinaryOp { + public: + int8_t ExecuteScalar(const uint64_t cell_id, + const uint64_t cell_id_test) override; +}; + +} // namespace cell + +} // namespace op + +} // namespace s2geography diff --git a/src/s2geography/op/cell_test.cc b/src/s2geography/op/cell_test.cc new file mode 100644 index 0000000..2c373b2 --- /dev/null +++ b/src/s2geography/op/cell_test.cc @@ -0,0 +1,136 @@ + +#include "s2geography/op/cell.h" + +#include + +#include +#include + +namespace s2geography::op::cell { + +static constexpr LngLat kTestPoint{-64, 45}; + +static uint64_t TestCellId() { + Point pt = Execute(kTestPoint); + return Execute(pt); +} + +TEST(Cell, Token) { + uint64_t cell_id = TestCellId(); + EXPECT_EQ(Execute(ExecuteString(cell_id)), cell_id); + EXPECT_EQ(Execute("not a valid token"), kCellIdNone); +} + +TEST(Cell, DebugString) { + uint64_t cell_id = TestCellId(); + EXPECT_EQ(Execute(ExecuteString(cell_id)), + cell_id); + EXPECT_EQ(Execute("not a valid debug"), kCellIdNone); +} + +TEST(Cell, Point) { + uint64_t cell_id = TestCellId(); + EXPECT_EQ(Execute(Execute(cell_id)), cell_id); + + Point invalid_point = Execute(kCellIdSentinel); + EXPECT_EQ(std::memcmp(&invalid_point, &point::kInvalidPoint, sizeof(Point)), + 0); +} + +TEST(Cell, IsValid) { + EXPECT_TRUE(Execute(TestCellId())); + EXPECT_FALSE(Execute(kCellIdSentinel)); + EXPECT_FALSE(Execute(kCellIdNone)); +} + +TEST(Cell, CellCenter) { + Point center = Execute(TestCellId()); + LngLat center_pt = Execute(center); + EXPECT_LT(std::abs(-64 - center_pt[0]), 0.0000001); + EXPECT_LT(std::abs(45 - center_pt[1]), 0.0000001); + + Point invalid_point = Execute(kCellIdSentinel); + EXPECT_EQ(std::memcmp(&invalid_point, &point::kInvalidPoint, sizeof(Point)), + 0); +} + +TEST(Cell, CellVertex) {} + +TEST(Cell, Level) { + EXPECT_EQ(Execute(TestCellId()), 30); + EXPECT_EQ(Execute(kCellIdNone), -1); + EXPECT_EQ(Execute(kCellIdSentinel), -1); +} + +TEST(Cell, Area) { + uint64_t face = Execute(TestCellId(), 0); + EXPECT_DOUBLE_EQ(Execute(face), 4 * M_PI / 6); + EXPECT_TRUE(std::isnan(Execute(kCellIdNone))); + EXPECT_TRUE(std::isnan(Execute(kCellIdSentinel))); +} + +TEST(Cell, AreaApprox) { + uint64_t face = Execute(TestCellId(), 0); + EXPECT_DOUBLE_EQ(Execute(face), 4 * M_PI / 6); + EXPECT_TRUE(std::isnan(Execute(kCellIdNone))); + EXPECT_TRUE(std::isnan(Execute(kCellIdSentinel))); +} + +TEST(Cell, Parent) { + EXPECT_EQ(Execute(Execute(TestCellId(), 0)), 0); + EXPECT_EQ(Execute(Execute(TestCellId(), -1)), 29); + EXPECT_EQ(Execute(TestCellId(), 31), kCellIdSentinel); + EXPECT_EQ(Execute(kCellIdSentinel, 0), kCellIdSentinel); +} + +TEST(Cell, EdgeNeighbor) {} + +TEST(Cell, Contains) { + EXPECT_TRUE( + Execute(Execute(TestCellId(), -1), TestCellId())); + EXPECT_FALSE( + Execute(TestCellId(), Execute(TestCellId(), -1))); + EXPECT_FALSE(Execute(kCellIdSentinel, TestCellId())); + EXPECT_FALSE(Execute(TestCellId(), kCellIdSentinel)); +} + +TEST(Cell, MayIntersect) { + EXPECT_TRUE(Execute(TestCellId(), TestCellId())); + EXPECT_TRUE( + Execute(TestCellId(), Execute(TestCellId(), -1))); + EXPECT_FALSE(Execute(TestCellId(), + Execute(TestCellId(), 0))); +} + +TEST(Cell, Distance) { + uint64_t null_island = Execute(Execute({0, 0})); + uint64_t anti_null_island = + Execute(Execute({180, 0})); + EXPECT_DOUBLE_EQ(Execute(null_island, anti_null_island), M_PI); + + EXPECT_TRUE(std::isnan(Execute(TestCellId(), kCellIdSentinel))); + EXPECT_TRUE(std::isnan(Execute(kCellIdSentinel, TestCellId()))); +} + +TEST(Cell, MaxDistance) { + uint64_t null_island = Execute(Execute({0, 0})); + uint64_t anti_null_island = + Execute(Execute({180, 0})); + EXPECT_DOUBLE_EQ(Execute(null_island, anti_null_island), M_PI); + + uint64_t big_cell = Execute(TestCellId(), 5); + EXPECT_GT(Execute(big_cell, null_island), + Execute(big_cell, null_island)); + + EXPECT_TRUE(std::isnan(Execute(TestCellId(), kCellIdSentinel))); + EXPECT_TRUE(std::isnan(Execute(kCellIdSentinel, TestCellId()))); +} + +TEST(Cell, CommonAncestorLevel) { + EXPECT_EQ(Execute(Execute(TestCellId(), 5), + TestCellId()), + 5); + EXPECT_EQ(Execute(kCellIdSentinel, TestCellId()), -1); +} + +} // namespace s2geography::op::cell diff --git a/src/s2geography/op/op.h b/src/s2geography/op/op.h new file mode 100644 index 0000000..2b25204 --- /dev/null +++ b/src/s2geography/op/op.h @@ -0,0 +1,94 @@ +#pragma once + +#include +#include + +namespace s2geography { + +namespace op { + +/// \defgroup operator S2 Operators +/// +/// S2 Operators are simplifications on top of the S2 library to +/// reduce the amount of glue code required to bind operations in +/// other runtimes. These abstractions intentionally do not include +/// the s2 headers publicly. +/// +/// The general workflow for using an Op class is to (1) create it +/// with the appropriate options class, (2) call Init() to give +/// the Op implementation an opportunity to error for invalid Options, +/// and (3) loop and call ExecuteScalar() where appropriate. Future +/// extensions to this framework may add an opportunity for operators +/// to implement a fast path to loop over arrays (or ArrowArrays) +/// of input. +/// +/// @{ + +struct EmptyOptions {}; + +template +class UnaryOp { + public: + using ReturnT = RetT; + using ArgType0 = ArgT0; + using OptionsT = OptT; + + UnaryOp(const OptT& options = OptT()) : options_(options) {} + virtual void Init() {} + virtual ReturnT ExecuteScalar(const ArgType0 arg0) { return ReturnT(); } + + protected: + OptT options_; +}; + +template +class BinaryOp { + public: + using ReturnT = RetT; + using ArgType0 = ArgT0; + using ArgType1 = ArgT1; + using OptionsT = OptT; + + BinaryOp(const OptionsT& options = OptionsT()) : options_(options) {} + virtual void Init() {} + virtual ReturnT ExecuteScalar(const ArgType0 arg0, const ArgType1 arg1) { + return ReturnT{}; + } + + protected: + OptionsT options_; +}; + +template +typename Op::ReturnT Execute(typename Op::ArgType0 arg0) { + Op op; + op.Init(); + + return op.ExecuteScalar(arg0); +} + +template +typename Op::ReturnT Execute(typename Op::ArgType0 arg0, + typename Op::ArgType1 arg1) { + Op op; + op.Init(); + + return op.ExecuteScalar(arg0, arg1); +} + +// This overload allow executing functions that return std::string_view, +// since the regular Execute() will return a view to deleted memory. +template +std::string ExecuteString(typename Op::ArgType0 arg0) { + Op op; + op.Init(); + + return std::string(op.ExecuteScalar(arg0)); +} + +/// @} + +} // namespace op + +} // namespace s2geography diff --git a/src/s2geography/op/point.cc b/src/s2geography/op/point.cc new file mode 100644 index 0000000..05d2439 --- /dev/null +++ b/src/s2geography/op/point.cc @@ -0,0 +1,29 @@ + +#include "s2geography/op/point.h" + +#include "s2/s2latlng.h" +#include "s2/s2point.h" + +namespace s2geography { + +namespace op { + +namespace point { + +LngLat ToLngLat::ExecuteScalar(const Point point) { + S2Point pt(point[0], point[1], point[2]); + S2LatLng ll(pt); + return {ll.lng().degrees(), ll.lat().degrees()}; +} + +Point ToPoint::ExecuteScalar(const LngLat lnglat) { + S2LatLng ll = S2LatLng::FromDegrees(lnglat[1], lnglat[0]); + S2Point pt(ll.ToPoint()); + return {pt.x(), pt.y(), pt.z()}; +} + +} // namespace point + +} // namespace op + +} // namespace s2geography diff --git a/src/s2geography/op/point.h b/src/s2geography/op/point.h new file mode 100644 index 0000000..42433c0 --- /dev/null +++ b/src/s2geography/op/point.h @@ -0,0 +1,42 @@ +#pragma once + +#include +#include + +#include "s2geography/op/op.h" + +namespace s2geography { + +namespace op { + +namespace point { + +/// \brief Longitude/Latitude pair (degrees) +using LngLat = std::array; + +/// \brief XYZ unit vector tuple +using Point = std::array; + +/// \brief Sentinel LngLat returned for invalid cells +static constexpr LngLat kInvalidLngLat{NAN, NAN}; + +/// \brief Senintel Point returned for invalid cells +static constexpr Point kInvalidPoint{NAN, NAN, NAN}; + +/// \brief Convert a point to its longitude/latitude +class ToLngLat : public UnaryOp { + public: + LngLat ExecuteScalar(const Point point) override; +}; + +/// \brief Convert a longitude/latitude to a point +class ToPoint : public UnaryOp { + public: + Point ExecuteScalar(const LngLat lnglat) override; +}; + +} // namespace point + +} // namespace op + +} // namespace s2geography