Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: geography constructor from geoarrow #49

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,19 @@ endif()

# Build

add_library(spherely MODULE
set(CPP_SOURCES
src/accessors-geog.cpp
src/creation.cpp
src/geography.cpp
src/io.cpp
src/predicates.cpp
src/spherely.cpp
)
src/spherely.cpp)

if(${s2geography_VERSION} VERSION_GREATER_EQUAL "0.2.0")
set(CPP_SOURCES ${CPP_SOURCES} src/geoarrow.cpp)
endif()

add_library(spherely MODULE ${CPP_SOURCES})

target_compile_definitions(
spherely
Expand Down
1 change: 1 addition & 0 deletions ci/environment-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ dependencies:
- ninja
- pytest
- pip
- geoarrow-pyarrow
1 change: 1 addition & 0 deletions ci/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ dependencies:
- ninja
- pytest
- pip
- geoarrow-pyarrow
1 change: 1 addition & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,4 @@ Input/Output

from_wkt
to_wkt
from_geoarrow
57 changes: 57 additions & 0 deletions src/arrow_abi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#pragma once

#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

// Extra guard for versions of Arrow without the canonical guard
#ifndef ARROW_FLAG_DICTIONARY_ORDERED

#ifndef ARROW_C_DATA_INTERFACE
#define ARROW_C_DATA_INTERFACE

#define ARROW_FLAG_DICTIONARY_ORDERED 1
#define ARROW_FLAG_NULLABLE 2
#define ARROW_FLAG_MAP_KEYS_SORTED 4

struct ArrowSchema {
// Array type description
const char* format;
const char* name;
const char* metadata;
int64_t flags;
int64_t n_children;
struct ArrowSchema** children;
struct ArrowSchema* dictionary;

// Release callback
void (*release)(struct ArrowSchema*);
// Opaque producer-specific data
void* private_data;
};

struct ArrowArray {
// Array data description
int64_t length;
int64_t null_count;
int64_t offset;
int64_t n_buffers;
int64_t n_children;
const void** buffers;
struct ArrowArray** children;
struct ArrowArray* dictionary;

// Release callback
void (*release)(struct ArrowArray*);
// Opaque producer-specific data
void* private_data;
};

#endif // ARROW_C_DATA_INTERFACE
#endif // ARROW_FLAG_DICTIONARY_ORDERED

#ifdef __cplusplus
}
#endif
117 changes: 117 additions & 0 deletions src/geoarrow.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#include <s2geography.h>

#include "arrow_abi.h"
#include "constants.hpp"
#include "geography.hpp"
#include "pybind11.hpp"

namespace py = pybind11;
namespace s2geog = s2geography;
using namespace spherely;

py::array_t<PyObjectGeography> from_geoarrow(py::object input,
bool oriented,
bool planar,
float tessellate_tolerance,
py::object geometry_encoding) {
if (!py::hasattr(input, "__arrow_c_array__")) {
throw std::invalid_argument(
"input should be an Arrow-compatible array object (i.e. has an '__arrow_c_array__' "
"method)");
}
py::tuple capsules = input.attr("__arrow_c_array__")();
py::capsule schema_capsule = capsules[0];
py::capsule array_capsule = capsules[1];

const ArrowSchema* schema = static_cast<const ArrowSchema*>(schema_capsule);
const ArrowArray* array = static_cast<const ArrowArray*>(array_capsule);

s2geog::geoarrow::Reader reader;
std::vector<std::unique_ptr<s2geog::Geography>> s2geog_vec;

s2geog::geoarrow::ImportOptions options;
options.set_oriented(oriented);
if (planar) {
auto tol = S1Angle::Radians(tessellate_tolerance / EARTH_RADIUS_METERS);
options.set_tessellate_tolerance(tol);
}
if (geometry_encoding.is(py::none())) {
reader.Init(schema, options);
} else if (geometry_encoding.equal(py::str("WKT"))) {
reader.Init(s2geog::geoarrow::Reader::InputType::kWKT, options);
} else if (geometry_encoding.equal(py::str("WKB"))) {
reader.Init(s2geog::geoarrow::Reader::InputType::kWKB, options);
} else {
throw std::invalid_argument("'geometry_encoding' should be one of None, 'WKT' or 'WKB'");
}
reader.ReadGeography(array, 0, array->length, &s2geog_vec);

// Convert resulting vector to array of python objects
auto result = py::array_t<PyObjectGeography>(array->length);
py::buffer_info rbuf = result.request();
py::object* rptr = static_cast<py::object*>(rbuf.ptr);

py::ssize_t i = 0;
for (auto& s2geog_ptr : s2geog_vec) {
auto geog_ptr = std::make_unique<spherely::Geography>(std::move(s2geog_ptr));
// return PyObjectGeography::from_geog(std::move(geog_ptr));
rptr[i] = py::cast(std::move(geog_ptr));
i++;
}
return result;
}

void init_geoarrow(py::module& m) {
m.def("from_geoarrow",
&from_geoarrow,
py::arg("input"),
py::pos_only(),
py::kw_only(),
py::arg("oriented") = false,
py::arg("planar") = false,
py::arg("tessellate_tolerance") = 100.0,
py::arg("geometry_encoding") = py::none(),
R"pbdoc(
Create an array of geographies from an Arrow array object with a GeoArrow
extension type.

See https://geoarrow.org/ for details on the GeoArrow specification.

This functions accepts any Arrow array object implementing
the `Arrow PyCapsule Protocol`_ (i.e. having an ``__arrow_c_array__``
method).

.. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html

Parameters
----------
input : pyarrow.Array, Arrow array
Any array object implementing the Arrow PyCapsule Protocol
(i.e. has a ``__arrow_c_array__`` method). The type of the array
should be one of the geoarrow geometry types.
oriented : bool, default False
Set to True if polygon ring directions are known to be correct
(i.e., exterior rings are defined counter clockwise and interior
rings are defined clockwise).
By default (False), it will return the polygon with the smaller
area.
planar : bool, default False
If set to True, the edges of linestrings and polygons are assumed
to be linear on the plane. In that case, additional points will
be added to the line while creating the geography objects, to
ensure every point is within 100m of the original line.
By default (False), it is assumed that the edges are spherical
(i.e. represent the shortest path on the sphere between two points).
tessellate_tolerance : float, default 100.0
The maximum distance in meters that a point must be moved to
satisfy the planar edge constraint. This is only used if `planar`
is set to True.
geometry_encoding : str, default None
By default, the encoding is inferred from the GeoArrow extension
type of the input array.
However, for parsing WKT and WKB it is also possible to pass an
Arrow array without geoarrow type but with a plain string or
binary type, if specifying this keyword with "WKT" or "WKB",
respectively.
)pbdoc");
}
8 changes: 8 additions & 0 deletions src/spherely.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ void init_creation(py::module&);
void init_predicates(py::module&);
void init_accessors(py::module&);
void init_io(py::module&);
#if defined(S2GEOGRAPHY_VERSION_MAJOR) && \
(S2GEOGRAPHY_VERSION_MAJOR >= 1 || S2GEOGRAPHY_VERSION_MINOR >= 2)
void init_geoarrow(py::module&);
#endif

PYBIND11_MODULE(spherely, m) {
m.doc() = R"pbdoc(
Expand All @@ -25,6 +29,10 @@ PYBIND11_MODULE(spherely, m) {
init_predicates(m);
init_accessors(m);
init_io(m);
#if defined(S2GEOGRAPHY_VERSION_MAJOR) && \
(S2GEOGRAPHY_VERSION_MAJOR >= 1 || S2GEOGRAPHY_VERSION_MINOR >= 2)
init_geoarrow(m);
#endif

#ifdef VERSION_INFO
m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO);
Expand Down
17 changes: 17 additions & 0 deletions src/spherely.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ from typing import (
Generic,
Iterable,
Literal,
Protocol,
Sequence,
Tuple,
TypeVar,
overload,
)
Expand Down Expand Up @@ -196,3 +198,18 @@ def from_wkt(
planar: bool = False,
tessellate_tolerance: float = 100.0,
) -> npt.NDArray[Any]: ...

class ArrowArrayExportable(Protocol):
def __arrow_c_array__(
self, requested_schema: object | None = None
) -> Tuple[object, object]: ...

def from_geoarrow(
input: ArrowArrayExportable,
/,
*,
oriented: bool = False,
planar: bool = False,
tessellate_tolerance: float = 100.0,
geometry_encoding: str | None = None,
) -> npt.NDArray[Any]: ...
111 changes: 111 additions & 0 deletions tests/test_geoarrow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from packaging.version import Version

import numpy as np

import pytest

import spherely


pytestmark = pytest.mark.skipif(
Version(spherely.__s2geography_version__) < Version("0.2.0"),
reason="Needs s2geography >= 0.2.0",
)

pa = pytest.importorskip("pyarrow")
ga = pytest.importorskip("geoarrow.pyarrow")


def test_from_geoarrow_wkt():

arr = ga.as_wkt(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"])

result = spherely.from_geoarrow(arr)
expected = spherely.points([1, 2, 3], [1, 2, 3])
# object equality does not yet work
# np.testing.assert_array_equal(result, expected)
assert spherely.equals(result, expected).all()

# without extension type
arr = pa.array(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"])
result = spherely.from_geoarrow(arr, geometry_encoding="WKT")
assert spherely.equals(result, expected).all()


def test_from_geoarrow_wkb():

arr = ga.as_wkt(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"])
arr_wkb = ga.as_wkb(arr)

result = spherely.from_geoarrow(arr_wkb)
expected = spherely.points([1, 2, 3], [1, 2, 3])
assert spherely.equals(result, expected).all()

# without extension type
arr_wkb = ga.as_wkb(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"])
arr = arr_wkb.cast(pa.binary())
result = spherely.from_geoarrow(arr, geometry_encoding="WKB")
assert spherely.equals(result, expected).all()


def test_from_geoarrow_native():

arr = ga.as_wkt(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"])
arr_point = ga.as_geoarrow(arr)

result = spherely.from_geoarrow(arr_point)
expected = spherely.points([1, 2, 3], [1, 2, 3])
assert spherely.equals(result, expected).all()


polygon_with_bad_hole_wkt = (
"POLYGON "
"((20 35, 10 30, 10 10, 30 5, 45 20, 20 35),"
"(30 20, 20 25, 20 15, 30 20))"
)


def test_from_geoarrow_oriented():
# by default re-orients the inner ring
arr = ga.as_geoarrow([polygon_with_bad_hole_wkt])

result = spherely.from_geoarrow(arr)
assert (
str(result[0])
== "POLYGON ((20 35, 10 30, 10 10, 30 5, 45 20, 20 35), (20 15, 20 25, 30 20, 20 15))"
)

# if we force to not orient, we get an error
with pytest.raises(RuntimeError, match="Inconsistent loop orientations detected"):
spherely.from_geoarrow(arr, oriented=True)


def test_from_wkt_planar():
arr = ga.as_geoarrow(["LINESTRING (-64 45, 0 45)"])
result = spherely.from_geoarrow(arr)
assert spherely.distance(result, spherely.point(-30.1, 45)) > 10000

result = spherely.from_geoarrow(arr, planar=True)
assert spherely.distance(result, spherely.point(-30.1, 45)) < 100

result = spherely.from_geoarrow(arr, planar=True, tessellate_tolerance=10)
assert spherely.distance(result, spherely.point(-30.1, 45)) < 10


def test_from_geoarrow_no_extension_type():
arr = pa.array(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"])

with pytest.raises(RuntimeError, match="Expected extension type"):
spherely.from_geoarrow(arr)


def test_from_geoarrow_invalid_encoding():
arr = pa.array(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"])

with pytest.raises(ValueError, match="'geometry_encoding' should be one"):
spherely.from_geoarrow(arr, geometry_encoding="point")


def test_from_geoarrow_no_arrow_object():
with pytest.raises(ValueError, match="input should be an Arrow-compatible array"):
spherely.from_geoarrow(np.array(["POINT (1 1)"], dtype=object))
Loading