-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ENH: geography constructor from geoarrow
- Loading branch information
1 parent
e41dbfd
commit 56e4b52
Showing
7 changed files
with
298 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,3 +13,4 @@ dependencies: | |
- ninja | ||
- pytest | ||
- pip | ||
- geoarrow-pyarrow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,3 +14,4 @@ dependencies: | |
- ninja | ||
- pytest | ||
- pip | ||
- geoarrow-pyarrow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
#pragma once | ||
|
||
#include <stdint.h> | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
// Extra guard for versions of Arrow without the canonical guard | ||
#ifndef ARROW_FLAG_DICTIONARY_ORDERED | ||
|
||
#ifndef ARROW_C_DATA_INTERFACE | ||
#define ARROW_C_DATA_INTERFACE | ||
|
||
#define ARROW_FLAG_DICTIONARY_ORDERED 1 | ||
#define ARROW_FLAG_NULLABLE 2 | ||
#define ARROW_FLAG_MAP_KEYS_SORTED 4 | ||
|
||
struct ArrowSchema { | ||
// Array type description | ||
const char* format; | ||
const char* name; | ||
const char* metadata; | ||
int64_t flags; | ||
int64_t n_children; | ||
struct ArrowSchema** children; | ||
struct ArrowSchema* dictionary; | ||
|
||
// Release callback | ||
void (*release)(struct ArrowSchema*); | ||
// Opaque producer-specific data | ||
void* private_data; | ||
}; | ||
|
||
struct ArrowArray { | ||
// Array data description | ||
int64_t length; | ||
int64_t null_count; | ||
int64_t offset; | ||
int64_t n_buffers; | ||
int64_t n_children; | ||
const void** buffers; | ||
struct ArrowArray** children; | ||
struct ArrowArray* dictionary; | ||
|
||
// Release callback | ||
void (*release)(struct ArrowArray*); | ||
// Opaque producer-specific data | ||
void* private_data; | ||
}; | ||
|
||
#endif // ARROW_C_DATA_INTERFACE | ||
#endif // ARROW_FLAG_DICTIONARY_ORDERED | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
// include this before s2geography to avoid ArrowArray compile issues | ||
#include <s2geography.h> | ||
|
||
#include "arrow_abi.h" | ||
#include "geography.hpp" | ||
#include "pybind11.hpp" | ||
|
||
namespace py = pybind11; | ||
namespace s2geog = s2geography; | ||
using namespace spherely; | ||
|
||
py::array_t<PyObjectGeography> from_geoarrow(py::object input, | ||
bool oriented, | ||
bool planar, | ||
py::object geometry_encoding) { | ||
py::tuple capsules = input.attr("__arrow_c_array__")(); | ||
py::capsule schema_capsule = capsules[0]; | ||
py::capsule array_capsule = capsules[1]; | ||
|
||
const ArrowSchema* schema = static_cast<const ArrowSchema*>(schema_capsule); | ||
const ArrowArray* array = static_cast<const ArrowArray*>(array_capsule); | ||
|
||
s2geog::geoarrow::Reader reader; | ||
std::vector<std::unique_ptr<s2geog::Geography>> s2geog_vec; | ||
|
||
s2geog::geoarrow::ImportOptions options; | ||
options.set_oriented(oriented); | ||
if (planar) { | ||
// TODO replace with constant | ||
auto tol = S1Angle::Radians(100.0 / (6371.01 * 1000)); | ||
options.set_tessellate_tolerance(tol); | ||
} | ||
if (geometry_encoding.is(py::none())) { | ||
reader.Init(schema, options); | ||
} else if (geometry_encoding.equal(py::str("WKT"))) { | ||
reader.Init(s2geog::geoarrow::Reader::InputType::kWKT, options); | ||
} else if (geometry_encoding.equal(py::str("WKB"))) { | ||
reader.Init(s2geog::geoarrow::Reader::InputType::kWKB, options); | ||
} else { | ||
throw std::invalid_argument("'geometry_encoding' should be one of None, 'WKT' or 'WKB'"); | ||
} | ||
reader.ReadGeography(array, 0, array->length, &s2geog_vec); | ||
|
||
// Convert resulting vector to array of python objects | ||
auto result = py::array_t<PyObjectGeography>(array->length); | ||
py::buffer_info rbuf = result.request(); | ||
py::object* rptr = static_cast<py::object*>(rbuf.ptr); | ||
|
||
py::ssize_t i = 0; | ||
for (auto& s2geog_ptr : s2geog_vec) { | ||
auto geog_ptr = std::make_unique<spherely::Geography>(std::move(s2geog_ptr)); | ||
// return PyObjectGeography::from_geog(std::move(geog_ptr)); | ||
rptr[i] = py::cast(std::move(geog_ptr)); | ||
i++; | ||
} | ||
return result; | ||
} | ||
|
||
void init_geoarrow(py::module& m) { | ||
m.def("from_geoarrow", | ||
&from_geoarrow, | ||
py::arg("input"), | ||
py::pos_only(), | ||
py::kw_only(), | ||
py::arg("oriented") = false, | ||
py::arg("planar") = false, | ||
py::arg("geometry_encoding") = py::none(), | ||
R"pbdoc( | ||
Create an array of geographies from an Arrow array object with a GeoArrow | ||
extension type. | ||
See https://geoarrow.org/ for details on the GeoArrow specification. | ||
This functions accepts any Arrow array object implementing | ||
the `Arrow PyCapsule Protocol`_ (i.e. having an ``__arrow_c_array__`` | ||
method). | ||
.. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html | ||
Parameters | ||
---------- | ||
input : pyarrow.Array, Arrow array | ||
Any array object implementing the Arrow PyCapsule Protocol | ||
(i.e. has a ``__arrow_c_array__`` method). The type of the array | ||
should be one of the geoarrow geometry types. | ||
oriented : bool, default False | ||
Set to True if polygon ring directions are known to be correct | ||
(i.e., exterior rings are defined counter clockwise and interior | ||
rings are defined clockwise). | ||
By default (False), it will return the polygon with the smaller | ||
area. | ||
planar : bool, default False | ||
If set to True, the edges linestrings and polygons are assumed to | ||
be planar. In that case, additional points will be added to the line | ||
while creating the geography objects, to ensure every point is | ||
within 100m of the original line. | ||
By default (False), it is assumed that the edges are spherical | ||
(i.e. represent the shortest path on the sphere between two points). | ||
geometry_encoding : str, default None | ||
By default, the encoding is inferred from the GeoArrow extension | ||
type of the input array. | ||
However, for parsing WKT and WKB it is also possible to pass an | ||
Arrow array without geoarrow type but with a plain string or | ||
binary type, if specifying this keyword with "WKT" or "WKB", | ||
respectively. | ||
)pbdoc"); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
from packaging.version import Version | ||
|
||
import pyarrow as pa | ||
import geoarrow.pyarrow as ga | ||
|
||
import pytest | ||
|
||
import spherely | ||
|
||
|
||
pytestmark = pytest.mark.skipif( | ||
Version(spherely.__s2geography_version__) < Version("0.2.0"), | ||
reason="Needs s2geography >= 0.2.0", | ||
) | ||
|
||
|
||
def test_from_geoarrow_wkt(): | ||
|
||
arr = ga.as_wkt(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) | ||
|
||
result = spherely.from_geoarrow(arr) | ||
expected = spherely.create([1, 2, 3], [1, 2, 3]) | ||
# object equality does not yet work | ||
# np.testing.assert_array_equal(result, expected) | ||
assert spherely.equals(result, expected).all() | ||
|
||
# without extension type | ||
arr = pa.array(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) | ||
result = spherely.from_geoarrow(arr, geometry_encoding="WKT") | ||
assert spherely.equals(result, expected).all() | ||
|
||
|
||
def test_from_geoarrow_wkb(): | ||
|
||
arr = ga.as_wkt(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) | ||
arr_wkb = ga.as_wkb(arr) | ||
|
||
result = spherely.from_geoarrow(arr_wkb) | ||
expected = spherely.create([1, 2, 3], [1, 2, 3]) | ||
assert spherely.equals(result, expected).all() | ||
|
||
# without extension type | ||
arr_wkb = ga.as_wkb(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) | ||
arr = arr_wkb.cast(pa.binary()) | ||
result = spherely.from_geoarrow(arr, geometry_encoding="WKB") | ||
assert spherely.equals(result, expected).all() | ||
|
||
|
||
def test_from_geoarrow_native(): | ||
|
||
arr = ga.as_wkt(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) | ||
arr_point = ga.as_geoarrow(arr) | ||
|
||
result = spherely.from_geoarrow(arr_point) | ||
expected = spherely.create([1, 2, 3], [1, 2, 3]) | ||
assert spherely.equals(result, expected).all() | ||
|
||
|
||
polygon_with_bad_hole_wkt = ( | ||
"POLYGON " | ||
"((20 35, 10 30, 10 10, 30 5, 45 20, 20 35)," | ||
"(30 20, 20 25, 20 15, 30 20))" | ||
) | ||
|
||
|
||
# @pytest.mark.skipif( | ||
# Version(spherely.__s2geography_version__) < Version("0.2.0"), | ||
# reason="Needs s2geography >= 0.2.0", | ||
# ) | ||
def test_from_geoarrow_oriented(): | ||
# by default re-orients the inner ring | ||
arr = ga.as_geoarrow([polygon_with_bad_hole_wkt]) | ||
|
||
result = spherely.from_geoarrow(arr) | ||
assert ( | ||
str(result[0]) | ||
== "POLYGON ((20 35, 10 30, 10 10, 30 5, 45 20, 20 35), (20 15, 20 25, 30 20, 20 15))" | ||
) | ||
|
||
# if we force to not orient, we get an error | ||
with pytest.raises(RuntimeError, match="Inconsistent loop orientations detected"): | ||
spherely.from_geoarrow(arr, oriented=True) | ||
|
||
|
||
def test_from_geoarrow_planar(): | ||
arr = ga.as_geoarrow(["LINESTRING (-64 45, 0 45)"]) | ||
result = spherely.from_geoarrow(arr) | ||
assert spherely.distance(result[0], spherely.Point(45, -30)) > 10000 | ||
|
||
result = spherely.from_geoarrow(arr, planar=True) | ||
assert spherely.distance(result[0], spherely.Point(45, -30)) < 100 | ||
|
||
|
||
def test_from_geoarrow_no_extension_type(): | ||
arr = pa.array(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) | ||
|
||
with pytest.raises(RuntimeError, match="Expected extension type"): | ||
spherely.from_geoarrow(arr) | ||
|
||
|
||
def test_from_geoarrow_invalid_encoding(): | ||
arr = pa.array(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) | ||
|
||
with pytest.raises(ValueError, match="'geometry_encoding' should be one"): | ||
spherely.from_geoarrow(arr, geometry_encoding="point") |