From 577821e4fed24d1227c7e39c6e04153f61ede09f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 3 Oct 2024 21:20:44 +0200 Subject: [PATCH 01/14] ENH: geography constructor from geoarrow --- CMakeLists.txt | 1 + ci/environment-dev.yml | 1 + src/geoarrow.cpp | 117 +++++++++++++++++++++++++++++++++++++++++ src/pybind11.hpp | 14 +++++ src/spherely.cpp | 2 + tests/test_geoarrow.py | 38 +++++++++++++ 6 files changed, 173 insertions(+) create mode 100644 src/geoarrow.cpp create mode 100644 tests/test_geoarrow.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 1d59553..1a8f3a3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -74,6 +74,7 @@ endif() add_library(spherely MODULE src/geography.cpp src/accessors-geog.cpp + src/geoarrow.cpp src/predicates.cpp src/spherely.cpp ) diff --git a/ci/environment-dev.yml b/ci/environment-dev.yml index f00ca15..a22d803 100644 --- a/ci/environment-dev.yml +++ b/ci/environment-dev.yml @@ -13,3 +13,4 @@ dependencies: - ninja - pytest - pip + - geoarrow-pyarrow diff --git a/src/geoarrow.cpp b/src/geoarrow.cpp new file mode 100644 index 0000000..480d0cd --- /dev/null +++ b/src/geoarrow.cpp @@ -0,0 +1,117 @@ +#include + +#include "geography.hpp" +#include "pybind11.hpp" + +namespace py = pybind11; +namespace s2geog = s2geography; +using namespace spherely; + +// PyObjectGeography from_wkt(std::string a) { +// s2geog::WKTReader reader; +// std::unique_ptr s2geog = reader.read_feature(a); +// auto geog_ptr = std::make_unique(std::move(s2geog)); +// return PyObjectGeography::from_geog(std::move(geog_ptr)); +// } + +// void init_geoarrow(py::module& m) { +// m.def("from_wkt", +// py::vectorize(&from_wkt), +// py::arg("a"), +// R"pbdoc( +// Creates a geography object from a WKT string. + +// Parameters +// ---------- +// a : str +// WKT string + +// )pbdoc"); +// } + +#ifdef __cplusplus +extern "C" { +#endif + +// Extra guard for versions of Arrow without the canonical guard +#ifndef ARROW_FLAG_DICTIONARY_ORDERED + +#ifndef ARROW_C_DATA_INTERFACE +#define ARROW_C_DATA_INTERFACE + +#define ARROW_FLAG_DICTIONARY_ORDERED 1 +#define ARROW_FLAG_NULLABLE 2 +#define ARROW_FLAG_MAP_KEYS_SORTED 4 + +struct ArrowSchema { + // Array type description + const char* format; + const char* name; + const char* metadata; + int64_t flags; + int64_t n_children; + struct ArrowSchema** children; + struct ArrowSchema* dictionary; + + // Release callback + void (*release)(struct ArrowSchema*); + // Opaque producer-specific data + void* private_data; +}; + +struct ArrowArray { + // Array data description + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void** buffers; + struct ArrowArray** children; + struct ArrowArray* dictionary; + + // Release callback + void (*release)(struct ArrowArray*); + // Opaque producer-specific data + void* private_data; +}; + +#endif // ARROW_C_DATA_INTERFACE +#endif // ARROW_FLAG_DICTIONARY_ORDERED + +#ifdef __cplusplus +} +#endif + +py::array_t from_geoarrow(py::object input) { + py::tuple capsules = input.attr("__arrow_c_array__")(); + py::capsule schema_capsule = capsules[0]; + py::capsule array_capsule = capsules[1]; + + const ArrowSchema* schema = static_cast(schema_capsule); + const ArrowArray* array = static_cast(array_capsule); + + s2geog::geoarrow::Reader reader; + std::vector> s2geog_vec; + + reader.Init(schema, s2geog::geoarrow::ImportOptions()); + reader.ReadGeography(array, 0, array->length, &s2geog_vec); + + // Convert resulting vector to array of python objects + auto result = py::array_t(array->length); + py::buffer_info rbuf = result.request(); + py::object* rptr = static_cast(rbuf.ptr); + + py::ssize_t i = 0; + for (auto& s2geog_ptr : s2geog_vec) { + auto geog_ptr = std::make_unique(std::move(s2geog_ptr)); + // return PyObjectGeography::from_geog(std::move(geog_ptr)); + rptr[i] = py::cast(std::move(geog_ptr)); + i++; + } + return result; +} + +void init_geoarrow(py::module& m) { + m.def("from_geoarrow", &from_geoarrow); +} diff --git a/src/pybind11.hpp b/src/pybind11.hpp index fc172dd..845c16a 100644 --- a/src/pybind11.hpp +++ b/src/pybind11.hpp @@ -142,6 +142,20 @@ struct npy_format_descriptor { } }; +// // Register PyObjectGeography as a valid numpy dtype (numpy.object alias) +// // from: https://github.com/pybind/pybind11/pull/1152 +// template <> +// struct npy_format_descriptor { +// static constexpr auto name = _("object"); +// enum { value = npy_api::NPY_OBJECT_ }; +// static pybind11::dtype dtype() { +// if (auto ptr = npy_api::get().PyArray_DescrFromType_(value)) { +// return reinterpret_borrow(ptr); +// } +// pybind11_fail("Unsupported buffer format!"); +// } +// }; + // Override signature type hint for vectorized Geography arguments template struct handle_type_name> { diff --git a/src/spherely.cpp b/src/spherely.cpp index df1d957..3fcfeb4 100644 --- a/src/spherely.cpp +++ b/src/spherely.cpp @@ -8,6 +8,7 @@ namespace py = pybind11; void init_geography(py::module&); void init_predicates(py::module&); void init_accessors(py::module&); +void init_geoarrow(py::module&); PYBIND11_MODULE(spherely, m) { m.doc() = R"pbdoc( @@ -21,6 +22,7 @@ PYBIND11_MODULE(spherely, m) { init_geography(m); init_predicates(m); init_accessors(m); + init_geoarrow(m); #ifdef VERSION_INFO m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO); diff --git a/tests/test_geoarrow.py b/tests/test_geoarrow.py new file mode 100644 index 0000000..4a54654 --- /dev/null +++ b/tests/test_geoarrow.py @@ -0,0 +1,38 @@ +import numpy as np +import pyarrow as pa +import geoarrow.pyarrow as ga + +import pytest + +import spherely + + +def test_from_geoarrow_wkt(): + + arr = ga.as_wkt(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) + + result = spherely.from_geoarrow(arr) + expected = spherely.create([1, 2, 3], [1, 2, 3]) + # object equality does not yet work + # np.testing.assert_array_equal(result, expected) + assert spherely.equals(result, expected).all() + + +def test_from_geoarrow_wkb(): + + arr = ga.as_wkt(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) + arr_wkb = ga.as_wkb(arr) + + result = spherely.from_geoarrow(arr_wkb) + expected = spherely.create([1, 2, 3], [1, 2, 3]) + assert spherely.equals(result, expected).all() + + +def test_from_geoarrow_native(): + + arr = ga.as_wkt(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) + arr_point = ga.as_geoarrow(arr) + + result = spherely.from_geoarrow(arr_point) + expected = spherely.create([1, 2, 3], [1, 2, 3]) + assert spherely.equals(result, expected).all() From 703707afe1d0a1ee89b28918e51595fe82eca16d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 4 Oct 2024 15:32:10 +0200 Subject: [PATCH 02/14] add support for planar and oriented keywords --- src/geoarrow.cpp | 73 +++++++++++++++++++++++++++--------------- src/pybind11.hpp | 14 -------- tests/test_geoarrow.py | 36 ++++++++++++++++++++- 3 files changed, 83 insertions(+), 40 deletions(-) diff --git a/src/geoarrow.cpp b/src/geoarrow.cpp index 480d0cd..918f271 100644 --- a/src/geoarrow.cpp +++ b/src/geoarrow.cpp @@ -7,28 +7,6 @@ namespace py = pybind11; namespace s2geog = s2geography; using namespace spherely; -// PyObjectGeography from_wkt(std::string a) { -// s2geog::WKTReader reader; -// std::unique_ptr s2geog = reader.read_feature(a); -// auto geog_ptr = std::make_unique(std::move(s2geog)); -// return PyObjectGeography::from_geog(std::move(geog_ptr)); -// } - -// void init_geoarrow(py::module& m) { -// m.def("from_wkt", -// py::vectorize(&from_wkt), -// py::arg("a"), -// R"pbdoc( -// Creates a geography object from a WKT string. - -// Parameters -// ---------- -// a : str -// WKT string - -// )pbdoc"); -// } - #ifdef __cplusplus extern "C" { #endif @@ -83,7 +61,9 @@ struct ArrowArray { } #endif -py::array_t from_geoarrow(py::object input) { +py::array_t from_geoarrow(py::object input, + bool oriented = false, + bool planar = false) { py::tuple capsules = input.attr("__arrow_c_array__")(); py::capsule schema_capsule = capsules[0]; py::capsule array_capsule = capsules[1]; @@ -94,7 +74,14 @@ py::array_t from_geoarrow(py::object input) { s2geog::geoarrow::Reader reader; std::vector> s2geog_vec; - reader.Init(schema, s2geog::geoarrow::ImportOptions()); + s2geog::geoarrow::ImportOptions options; + options.set_oriented(oriented); + if (planar) { + // TODO replace with constant + auto tol = S1Angle::Radians(100.0 / (6371.01 * 1000)); + options.set_tessellate_tolerance(tol); + } + reader.Init(schema, options); reader.ReadGeography(array, 0, array->length, &s2geog_vec); // Convert resulting vector to array of python objects @@ -113,5 +100,41 @@ py::array_t from_geoarrow(py::object input) { } void init_geoarrow(py::module& m) { - m.def("from_geoarrow", &from_geoarrow); + m.def("from_geoarrow", + &from_geoarrow, + py::arg("input"), + py::arg("oriented") = false, + py::arg("planar") = false, + R"pbdoc( + Create an array of geographies from an Arrow array object with a GeoArrow + extension type. + + See https://geoarrow.org/ for details on the GeoArrow specification. + + This functions accepts any Arrow array object implementing + the `Arrow PyCapsule Protocol`_ (i.e. having an ``__arrow_c_array__`` + method). + + .. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html + + Parameters + ---------- + input : pyarrow.Array, Arrow array + Any array object implementing the Arrow PyCapsule Protocol + (i.e. has a ``__arrow_c_array__`` method). The type of the array + should be one of the geoarrow geometry types. + oriented : bool, default False + Set to True if polygon ring directions are known to be correct + (i.e., exterior rings are defined counter clockwise and interior + rings are defined clockwise). + By default (False), it will return the polygon with the smaller + area. + planar : bool, default False + If set to True, the edges linestrings and polygons are assumed to + be planar. In that case, additional points will be added to the line + while creating the geography objects, to ensure every point is + within 100m of the original line. + By default (False), it is assumed that the edges are spherical + (i.e. represent the shortest path on the sphere between two points). + )pbdoc"); } diff --git a/src/pybind11.hpp b/src/pybind11.hpp index 845c16a..fc172dd 100644 --- a/src/pybind11.hpp +++ b/src/pybind11.hpp @@ -142,20 +142,6 @@ struct npy_format_descriptor { } }; -// // Register PyObjectGeography as a valid numpy dtype (numpy.object alias) -// // from: https://github.com/pybind/pybind11/pull/1152 -// template <> -// struct npy_format_descriptor { -// static constexpr auto name = _("object"); -// enum { value = npy_api::NPY_OBJECT_ }; -// static pybind11::dtype dtype() { -// if (auto ptr = npy_api::get().PyArray_DescrFromType_(value)) { -// return reinterpret_borrow(ptr); -// } -// pybind11_fail("Unsupported buffer format!"); -// } -// }; - // Override signature type hint for vectorized Geography arguments template struct handle_type_name> { diff --git a/tests/test_geoarrow.py b/tests/test_geoarrow.py index 4a54654..458ba42 100644 --- a/tests/test_geoarrow.py +++ b/tests/test_geoarrow.py @@ -1,4 +1,3 @@ -import numpy as np import pyarrow as pa import geoarrow.pyarrow as ga @@ -36,3 +35,38 @@ def test_from_geoarrow_native(): result = spherely.from_geoarrow(arr_point) expected = spherely.create([1, 2, 3], [1, 2, 3]) assert spherely.equals(result, expected).all() + + +polygon_with_bad_hole_wkt = ( + "POLYGON " + "((20 35, 10 30, 10 10, 30 5, 45 20, 20 35)," + "(30 20, 20 25, 20 15, 30 20))" +) + + +# @pytest.mark.skipif( +# Version(spherely.__s2geography_version__) < Version("0.2.0"), +# reason="Needs s2geography >= 0.2.0", +# ) +def test_from_geoarrow_oriented(): + # by default re-orients the inner ring + arr = ga.as_geoarrow([polygon_with_bad_hole_wkt]) + + result = spherely.from_geoarrow(arr) + assert ( + str(result[0]) + == "POLYGON ((20 35, 10 30, 10 10, 30 5, 45 20, 20 35), (20 15, 20 25, 30 20, 20 15))" + ) + + # if we force to not orient, we get an error + with pytest.raises(RuntimeError, match="Inconsistent loop orientations detected"): + spherely.from_geoarrow(arr, oriented=True) + + +def test_from_wkt_planar(): + arr = ga.as_geoarrow(["LINESTRING (-64 45, 0 45)"]) + result = spherely.from_geoarrow(arr) + assert spherely.distance(result[0], spherely.Point(45, -30)) > 10000 + + result = spherely.from_geoarrow(arr, planar=True) + assert spherely.distance(result[0], spherely.Point(45, -30)) < 100 From 08fa01b7ec6f5254c3ab2da68980d36585521b34 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 4 Oct 2024 15:34:27 +0200 Subject: [PATCH 03/14] make arguments position/keyword only --- src/geoarrow.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/geoarrow.cpp b/src/geoarrow.cpp index 918f271..b5860f1 100644 --- a/src/geoarrow.cpp +++ b/src/geoarrow.cpp @@ -103,6 +103,8 @@ void init_geoarrow(py::module& m) { m.def("from_geoarrow", &from_geoarrow, py::arg("input"), + py::pos_only(), + py::kw_only(), py::arg("oriented") = false, py::arg("planar") = false, R"pbdoc( From 6778ae1962bf8fab97e049cf093cd9a2d9353522 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 4 Oct 2024 15:42:20 +0200 Subject: [PATCH 04/14] only build function for recent s2geography --- CMakeLists.txt | 19 +++++++++++++------ src/spherely.cpp | 4 ++++ tests/test_geoarrow.py | 8 ++++++++ 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a8f3a3..3234581 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,16 +71,23 @@ endif() # Build -add_library(spherely MODULE +set(CPP_SOURCES src/geography.cpp src/accessors-geog.cpp - src/geoarrow.cpp src/predicates.cpp - src/spherely.cpp - ) + src/spherely.cpp) + +if(${s2geography_VERSION} VERSION_GREATER_EQUAL "0.2.0") + set(CPP_SOURCES ${CPP_SOURCES} src/geoarrow.cpp) +endif() + +add_library(spherely MODULE ${CPP_SOURCES}) -target_compile_definitions(spherely - PRIVATE VERSION_INFO=${PROJECT_VERSION}) +target_compile_definitions( + spherely + PRIVATE + VERSION_INFO=${PROJECT_VERSION} + S2GEOGRAPHY_VERSION=${s2geography_VERSION}) target_link_libraries(spherely PRIVATE pybind11::module pybind11::lto pybind11::windows_extras diff --git a/src/spherely.cpp b/src/spherely.cpp index 3fcfeb4..b0167c9 100644 --- a/src/spherely.cpp +++ b/src/spherely.cpp @@ -29,4 +29,8 @@ PYBIND11_MODULE(spherely, m) { #else m.attr("__version__") = "dev"; #endif + +#ifdef S2GEOGRAPHY_VERSION + m.attr("__s2geography_version__") = MACRO_STRINGIFY(S2GEOGRAPHY_VERSION); +#endif } diff --git a/tests/test_geoarrow.py b/tests/test_geoarrow.py index 458ba42..b04e177 100644 --- a/tests/test_geoarrow.py +++ b/tests/test_geoarrow.py @@ -1,3 +1,5 @@ +from packaging.version import Version + import pyarrow as pa import geoarrow.pyarrow as ga @@ -6,6 +8,12 @@ import spherely +pytestmark = pytest.mark.skipif( + Version(spherely.__s2geography_version__) < Version("0.2.0"), + reason="Needs s2geography >= 0.2.0", +) + + def test_from_geoarrow_wkt(): arr = ga.as_wkt(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) From a01dc4b31cac8ee2a99cee71b653797fbda62d4e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 4 Oct 2024 15:43:22 +0200 Subject: [PATCH 05/14] TEMP test with my branch of s2geography --- .github/workflows/run-tests.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml index 14a116f..3612736 100644 --- a/.github/workflows/run-tests.yaml +++ b/.github/workflows/run-tests.yaml @@ -57,8 +57,8 @@ jobs: - name: Fetch s2geography uses: actions/checkout@v3 with: - repository: paleolimbot/s2geography - ref: master + repository: jorisvandenbossche/s2geography + ref: geoarrow-update path: deps/s2geography fetch-depth: 0 if: | From d69b272ba7500a9670a55d5de78c92fa94166c45 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 4 Oct 2024 15:50:27 +0200 Subject: [PATCH 06/14] init_geoarrow only when available --- CMakeLists.txt | 4 +++- src/spherely.cpp | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3234581..8cbc7fd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,7 +87,9 @@ target_compile_definitions( spherely PRIVATE VERSION_INFO=${PROJECT_VERSION} - S2GEOGRAPHY_VERSION=${s2geography_VERSION}) + S2GEOGRAPHY_VERSION=${s2geography_VERSION} + S2GEOGRAPHY_VERSION_MAJOR=${s2geography_VERSION_MAJOR} + S2GEOGRAPHY_VERSION_MINOR=${s2geography_VERSION_MINOR}) target_link_libraries(spherely PRIVATE pybind11::module pybind11::lto pybind11::windows_extras diff --git a/src/spherely.cpp b/src/spherely.cpp index b0167c9..a63ea04 100644 --- a/src/spherely.cpp +++ b/src/spherely.cpp @@ -8,7 +8,10 @@ namespace py = pybind11; void init_geography(py::module&); void init_predicates(py::module&); void init_accessors(py::module&); +#if defined(S2GEOGRAPHY_VERSION_MAJOR) && \ + (S2GEOGRAPHY_VERSION_MAJOR >= 1 || S2GEOGRAPHY_VERSION_MINOR >= 2) void init_geoarrow(py::module&); +#endif PYBIND11_MODULE(spherely, m) { m.doc() = R"pbdoc( @@ -22,7 +25,10 @@ PYBIND11_MODULE(spherely, m) { init_geography(m); init_predicates(m); init_accessors(m); +#if defined(S2GEOGRAPHY_VERSION_MAJOR) && \ + (S2GEOGRAPHY_VERSION_MAJOR >= 1 || S2GEOGRAPHY_VERSION_MINOR >= 2) init_geoarrow(m); +#endif #ifdef VERSION_INFO m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO); From 03a07bf7ebab164b8726a5b78d43f3874adfd021 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 4 Oct 2024 15:53:56 +0200 Subject: [PATCH 07/14] fix env --- ci/environment.yml | 1 + tests/test_geoarrow.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/environment.yml b/ci/environment.yml index 6dca01c..28c8655 100644 --- a/ci/environment.yml +++ b/ci/environment.yml @@ -14,3 +14,4 @@ dependencies: - ninja - pytest - pip + - geoarrow-pyarrow diff --git a/tests/test_geoarrow.py b/tests/test_geoarrow.py index b04e177..cbefefd 100644 --- a/tests/test_geoarrow.py +++ b/tests/test_geoarrow.py @@ -1,6 +1,5 @@ from packaging.version import Version -import pyarrow as pa import geoarrow.pyarrow as ga import pytest From 871681730fdc4a821866d2fdfa2308ae8b4ecd1c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 4 Oct 2024 16:26:29 +0200 Subject: [PATCH 08/14] add geometry_encoding option for WKT/WKB without extenstion type --- src/geoarrow.cpp | 23 ++++++++++++++++++++--- tests/test_geoarrow.py | 28 +++++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/src/geoarrow.cpp b/src/geoarrow.cpp index b5860f1..75ce480 100644 --- a/src/geoarrow.cpp +++ b/src/geoarrow.cpp @@ -62,8 +62,9 @@ struct ArrowArray { #endif py::array_t from_geoarrow(py::object input, - bool oriented = false, - bool planar = false) { + bool oriented, + bool planar, + py::object geometry_encoding) { py::tuple capsules = input.attr("__arrow_c_array__")(); py::capsule schema_capsule = capsules[0]; py::capsule array_capsule = capsules[1]; @@ -81,7 +82,15 @@ py::array_t from_geoarrow(py::object input, auto tol = S1Angle::Radians(100.0 / (6371.01 * 1000)); options.set_tessellate_tolerance(tol); } - reader.Init(schema, options); + if (geometry_encoding.is(py::none())) { + reader.Init(schema, options); + } else if (geometry_encoding.equal(py::str("WKT"))) { + reader.Init(s2geog::geoarrow::Reader::InputType::kWKT, options); + } else if (geometry_encoding.equal(py::str("WKB"))) { + reader.Init(s2geog::geoarrow::Reader::InputType::kWKB, options); + } else { + throw std::invalid_argument("'geometry_encoding' should be one of None, 'WKT' or 'WKB'"); + } reader.ReadGeography(array, 0, array->length, &s2geog_vec); // Convert resulting vector to array of python objects @@ -107,6 +116,7 @@ void init_geoarrow(py::module& m) { py::kw_only(), py::arg("oriented") = false, py::arg("planar") = false, + py::arg("geometry_encoding") = py::none(), R"pbdoc( Create an array of geographies from an Arrow array object with a GeoArrow extension type. @@ -138,5 +148,12 @@ void init_geoarrow(py::module& m) { within 100m of the original line. By default (False), it is assumed that the edges are spherical (i.e. represent the shortest path on the sphere between two points). + geometry_encoding : str, default None + By default, the encoding is inferred from the GeoArrow extension + type of the input array. + However, for parsing WKT and WKB it is also possible to pass an + Arrow array without geoarrow type but with a plain string or + binary type, if specifying this keyword with "WKT" or "WKB", + respectively. )pbdoc"); } diff --git a/tests/test_geoarrow.py b/tests/test_geoarrow.py index cbefefd..bf3400f 100644 --- a/tests/test_geoarrow.py +++ b/tests/test_geoarrow.py @@ -1,5 +1,6 @@ from packaging.version import Version +import pyarrow as pa import geoarrow.pyarrow as ga import pytest @@ -23,6 +24,11 @@ def test_from_geoarrow_wkt(): # np.testing.assert_array_equal(result, expected) assert spherely.equals(result, expected).all() + # without extension type + arr = pa.array(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) + result = spherely.from_geoarrow(arr, geometry_encoding="WKT") + assert spherely.equals(result, expected).all() + def test_from_geoarrow_wkb(): @@ -33,6 +39,12 @@ def test_from_geoarrow_wkb(): expected = spherely.create([1, 2, 3], [1, 2, 3]) assert spherely.equals(result, expected).all() + # without extension type + arr_wkb = ga.as_wkb(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) + arr = arr_wkb.cast(pa.binary()) + result = spherely.from_geoarrow(arr, geometry_encoding="WKB") + assert spherely.equals(result, expected).all() + def test_from_geoarrow_native(): @@ -70,10 +82,24 @@ def test_from_geoarrow_oriented(): spherely.from_geoarrow(arr, oriented=True) -def test_from_wkt_planar(): +def test_from_geoarrow_planar(): arr = ga.as_geoarrow(["LINESTRING (-64 45, 0 45)"]) result = spherely.from_geoarrow(arr) assert spherely.distance(result[0], spherely.Point(45, -30)) > 10000 result = spherely.from_geoarrow(arr, planar=True) assert spherely.distance(result[0], spherely.Point(45, -30)) < 100 + + +def test_from_geoarrow_no_extension_type(): + arr = pa.array(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) + + with pytest.raises(RuntimeError, match="Expected extension type"): + spherely.from_geoarrow(arr) + + +def test_from_geoarrow_no_extension_type(): + arr = pa.array(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) + + with pytest.raises(ValueError, match="'geometry_encoding' should be one"): + spherely.from_geoarrow(arr, geometry_encoding="point") From 5c4367d92d92993829ef360562c855cd27defe5a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 6 Oct 2024 10:20:29 +0200 Subject: [PATCH 09/14] correct test name --- tests/test_geoarrow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_geoarrow.py b/tests/test_geoarrow.py index bf3400f..c7cbdce 100644 --- a/tests/test_geoarrow.py +++ b/tests/test_geoarrow.py @@ -98,7 +98,7 @@ def test_from_geoarrow_no_extension_type(): spherely.from_geoarrow(arr) -def test_from_geoarrow_no_extension_type(): +def test_from_geoarrow_invalid_encoding(): arr = pa.array(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) with pytest.raises(ValueError, match="'geometry_encoding' should be one"): From dc993aadb1a2d0eb0d3e75410f922369cb354bec Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 7 Oct 2024 22:12:34 +0200 Subject: [PATCH 10/14] separate Arrow ABI to separate file + include before s2geography --- src/arrow_abi.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++++ src/geoarrow.cpp | 56 ++--------------------------------------------- 2 files changed, 59 insertions(+), 54 deletions(-) create mode 100644 src/arrow_abi.h diff --git a/src/arrow_abi.h b/src/arrow_abi.h new file mode 100644 index 0000000..b8b62b8 --- /dev/null +++ b/src/arrow_abi.h @@ -0,0 +1,57 @@ +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// Extra guard for versions of Arrow without the canonical guard +#ifndef ARROW_FLAG_DICTIONARY_ORDERED + +#ifndef ARROW_C_DATA_INTERFACE +#define ARROW_C_DATA_INTERFACE + +#define ARROW_FLAG_DICTIONARY_ORDERED 1 +#define ARROW_FLAG_NULLABLE 2 +#define ARROW_FLAG_MAP_KEYS_SORTED 4 + +struct ArrowSchema { + // Array type description + const char* format; + const char* name; + const char* metadata; + int64_t flags; + int64_t n_children; + struct ArrowSchema** children; + struct ArrowSchema* dictionary; + + // Release callback + void (*release)(struct ArrowSchema*); + // Opaque producer-specific data + void* private_data; +}; + +struct ArrowArray { + // Array data description + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void** buffers; + struct ArrowArray** children; + struct ArrowArray* dictionary; + + // Release callback + void (*release)(struct ArrowArray*); + // Opaque producer-specific data + void* private_data; +}; + +#endif // ARROW_C_DATA_INTERFACE +#endif // ARROW_FLAG_DICTIONARY_ORDERED + +#ifdef __cplusplus +} +#endif diff --git a/src/geoarrow.cpp b/src/geoarrow.cpp index 75ce480..4a427ac 100644 --- a/src/geoarrow.cpp +++ b/src/geoarrow.cpp @@ -1,5 +1,7 @@ +// include this before s2geography to avoid ArrowArray compile issues #include +#include "arrow_abi.h" #include "geography.hpp" #include "pybind11.hpp" @@ -7,60 +9,6 @@ namespace py = pybind11; namespace s2geog = s2geography; using namespace spherely; -#ifdef __cplusplus -extern "C" { -#endif - -// Extra guard for versions of Arrow without the canonical guard -#ifndef ARROW_FLAG_DICTIONARY_ORDERED - -#ifndef ARROW_C_DATA_INTERFACE -#define ARROW_C_DATA_INTERFACE - -#define ARROW_FLAG_DICTIONARY_ORDERED 1 -#define ARROW_FLAG_NULLABLE 2 -#define ARROW_FLAG_MAP_KEYS_SORTED 4 - -struct ArrowSchema { - // Array type description - const char* format; - const char* name; - const char* metadata; - int64_t flags; - int64_t n_children; - struct ArrowSchema** children; - struct ArrowSchema* dictionary; - - // Release callback - void (*release)(struct ArrowSchema*); - // Opaque producer-specific data - void* private_data; -}; - -struct ArrowArray { - // Array data description - int64_t length; - int64_t null_count; - int64_t offset; - int64_t n_buffers; - int64_t n_children; - const void** buffers; - struct ArrowArray** children; - struct ArrowArray* dictionary; - - // Release callback - void (*release)(struct ArrowArray*); - // Opaque producer-specific data - void* private_data; -}; - -#endif // ARROW_C_DATA_INTERFACE -#endif // ARROW_FLAG_DICTIONARY_ORDERED - -#ifdef __cplusplus -} -#endif - py::array_t from_geoarrow(py::object input, bool oriented, bool planar, From 09be68ad6a3e9cfe6feacaf1be2525ff6b0ee900 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 7 Oct 2024 22:13:18 +0200 Subject: [PATCH 11/14] Revert "TEMP test with my branch of s2geography" This reverts commit a01dc4b31cac8ee2a99cee71b653797fbda62d4e. --- .github/workflows/run-tests.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml index 3612736..14a116f 100644 --- a/.github/workflows/run-tests.yaml +++ b/.github/workflows/run-tests.yaml @@ -57,8 +57,8 @@ jobs: - name: Fetch s2geography uses: actions/checkout@v3 with: - repository: jorisvandenbossche/s2geography - ref: geoarrow-update + repository: paleolimbot/s2geography + ref: master path: deps/s2geography fetch-depth: 0 if: | From 4784c183e0a6ff2821811185a1b849e5b67f9b2d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 25 Oct 2024 20:34:44 +0200 Subject: [PATCH 12/14] clean-up + add tessellate_tolerance --- docs/api.rst | 1 + src/geoarrow.cpp | 19 ++++++++++++------- src/spherely.pyi | 17 +++++++++++++++++ tests/test_geoarrow.py | 19 +++++++++---------- 4 files changed, 39 insertions(+), 17 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index d96de68..5851fce 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -71,3 +71,4 @@ Input/Output from_wkt to_wkt + from_geoarrow diff --git a/src/geoarrow.cpp b/src/geoarrow.cpp index 4a427ac..1fc8729 100644 --- a/src/geoarrow.cpp +++ b/src/geoarrow.cpp @@ -1,7 +1,7 @@ -// include this before s2geography to avoid ArrowArray compile issues #include #include "arrow_abi.h" +#include "constants.hpp" #include "geography.hpp" #include "pybind11.hpp" @@ -12,6 +12,7 @@ using namespace spherely; py::array_t from_geoarrow(py::object input, bool oriented, bool planar, + float tessellate_tolerance, py::object geometry_encoding) { py::tuple capsules = input.attr("__arrow_c_array__")(); py::capsule schema_capsule = capsules[0]; @@ -26,8 +27,7 @@ py::array_t from_geoarrow(py::object input, s2geog::geoarrow::ImportOptions options; options.set_oriented(oriented); if (planar) { - // TODO replace with constant - auto tol = S1Angle::Radians(100.0 / (6371.01 * 1000)); + auto tol = S1Angle::Radians(tessellate_tolerance / EARTH_RADIUS_METERS); options.set_tessellate_tolerance(tol); } if (geometry_encoding.is(py::none())) { @@ -64,6 +64,7 @@ void init_geoarrow(py::module& m) { py::kw_only(), py::arg("oriented") = false, py::arg("planar") = false, + py::arg("tessellate_tolerance") = 100.0, py::arg("geometry_encoding") = py::none(), R"pbdoc( Create an array of geographies from an Arrow array object with a GeoArrow @@ -90,12 +91,16 @@ void init_geoarrow(py::module& m) { By default (False), it will return the polygon with the smaller area. planar : bool, default False - If set to True, the edges linestrings and polygons are assumed to - be planar. In that case, additional points will be added to the line - while creating the geography objects, to ensure every point is - within 100m of the original line. + If set to True, the edges of linestrings and polygons are assumed + to be linear on the plane. In that case, additional points will + be added to the line while creating the geography objects, to + ensure every point is within 100m of the original line. By default (False), it is assumed that the edges are spherical (i.e. represent the shortest path on the sphere between two points). + tessellate_tolerance : float, default 100.0 + The maximum distance in meters that a point must be moved to + satisfy the planar edge constraint. This is only used if `planar` + is set to True. geometry_encoding : str, default None By default, the encoding is inferred from the GeoArrow extension type of the input array. diff --git a/src/spherely.pyi b/src/spherely.pyi index 883b1f3..eb818ce 100644 --- a/src/spherely.pyi +++ b/src/spherely.pyi @@ -5,7 +5,9 @@ from typing import ( Generic, Iterable, Literal, + Protocol, Sequence, + Tuple, TypeVar, overload, ) @@ -196,3 +198,18 @@ def from_wkt( planar: bool = False, tessellate_tolerance: float = 100.0, ) -> npt.NDArray[Any]: ... + +class ArrowArrayExportable(Protocol): + def __arrow_c_array__( + self, requested_schema: object | None = None + ) -> Tuple[object, object]: ... + +def from_geoarrow( + input: ArrowArrayExportable, + /, + *, + oriented: bool = False, + planar: bool = False, + tessellate_tolerance: float = 100.0, + geometry_encoding: str | None = None, +) -> npt.NDArray[Any]: ... diff --git a/tests/test_geoarrow.py b/tests/test_geoarrow.py index c7cbdce..d9a83b8 100644 --- a/tests/test_geoarrow.py +++ b/tests/test_geoarrow.py @@ -19,7 +19,7 @@ def test_from_geoarrow_wkt(): arr = ga.as_wkt(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) result = spherely.from_geoarrow(arr) - expected = spherely.create([1, 2, 3], [1, 2, 3]) + expected = spherely.points([1, 2, 3], [1, 2, 3]) # object equality does not yet work # np.testing.assert_array_equal(result, expected) assert spherely.equals(result, expected).all() @@ -36,7 +36,7 @@ def test_from_geoarrow_wkb(): arr_wkb = ga.as_wkb(arr) result = spherely.from_geoarrow(arr_wkb) - expected = spherely.create([1, 2, 3], [1, 2, 3]) + expected = spherely.points([1, 2, 3], [1, 2, 3]) assert spherely.equals(result, expected).all() # without extension type @@ -52,7 +52,7 @@ def test_from_geoarrow_native(): arr_point = ga.as_geoarrow(arr) result = spherely.from_geoarrow(arr_point) - expected = spherely.create([1, 2, 3], [1, 2, 3]) + expected = spherely.points([1, 2, 3], [1, 2, 3]) assert spherely.equals(result, expected).all() @@ -63,10 +63,6 @@ def test_from_geoarrow_native(): ) -# @pytest.mark.skipif( -# Version(spherely.__s2geography_version__) < Version("0.2.0"), -# reason="Needs s2geography >= 0.2.0", -# ) def test_from_geoarrow_oriented(): # by default re-orients the inner ring arr = ga.as_geoarrow([polygon_with_bad_hole_wkt]) @@ -82,13 +78,16 @@ def test_from_geoarrow_oriented(): spherely.from_geoarrow(arr, oriented=True) -def test_from_geoarrow_planar(): +def test_from_wkt_planar(): arr = ga.as_geoarrow(["LINESTRING (-64 45, 0 45)"]) result = spherely.from_geoarrow(arr) - assert spherely.distance(result[0], spherely.Point(45, -30)) > 10000 + assert spherely.distance(result, spherely.point(-30.1, 45)) > 10000 result = spherely.from_geoarrow(arr, planar=True) - assert spherely.distance(result[0], spherely.Point(45, -30)) < 100 + assert spherely.distance(result, spherely.point(-30.1, 45)) < 100 + + result = spherely.from_geoarrow(arr, planar=True, tessellate_tolerance=10) + assert spherely.distance(result, spherely.point(-30.1, 45)) < 10 def test_from_geoarrow_no_extension_type(): From 1b43efdda54d0612c28f674512b056d7a334a870 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 25 Oct 2024 20:57:53 +0200 Subject: [PATCH 13/14] input validation --- src/geoarrow.cpp | 5 +++++ tests/test_geoarrow.py | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/src/geoarrow.cpp b/src/geoarrow.cpp index 1fc8729..aebb733 100644 --- a/src/geoarrow.cpp +++ b/src/geoarrow.cpp @@ -14,6 +14,11 @@ py::array_t from_geoarrow(py::object input, bool planar, float tessellate_tolerance, py::object geometry_encoding) { + if (!py::hasattr(input, "__arrow_c_array__")) { + throw std::invalid_argument( + "input should be an Arrow-compatible array object (i.e. has an '__arrow_c_array__' " + "method)"); + } py::tuple capsules = input.attr("__arrow_c_array__")(); py::capsule schema_capsule = capsules[0]; py::capsule array_capsule = capsules[1]; diff --git a/tests/test_geoarrow.py b/tests/test_geoarrow.py index d9a83b8..02780d5 100644 --- a/tests/test_geoarrow.py +++ b/tests/test_geoarrow.py @@ -1,5 +1,6 @@ from packaging.version import Version +import numpy as np import pyarrow as pa import geoarrow.pyarrow as ga @@ -102,3 +103,8 @@ def test_from_geoarrow_invalid_encoding(): with pytest.raises(ValueError, match="'geometry_encoding' should be one"): spherely.from_geoarrow(arr, geometry_encoding="point") + + +def test_from_geoarrow_no_arrow_object(): + with pytest.raises(ValueError, match="input should be an Arrow-compatible array"): + spherely.from_geoarrow(np.array(["POINT (1 1)"], dtype=object)) From b2ff23dcf7c9914e97ac1ccd8b3e149ea64b4bda Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 26 Oct 2024 14:22:57 +0200 Subject: [PATCH 14/14] importorksip for pyarrow --- tests/test_geoarrow.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_geoarrow.py b/tests/test_geoarrow.py index 02780d5..e16a0de 100644 --- a/tests/test_geoarrow.py +++ b/tests/test_geoarrow.py @@ -1,8 +1,6 @@ from packaging.version import Version import numpy as np -import pyarrow as pa -import geoarrow.pyarrow as ga import pytest @@ -14,6 +12,9 @@ reason="Needs s2geography >= 0.2.0", ) +pa = pytest.importorskip("pyarrow") +ga = pytest.importorskip("geoarrow.pyarrow") + def test_from_geoarrow_wkt():