Skip to content

Commit

Permalink
refactor: Reuse GeoArrowArrayReader in ReaderImpl (#29)
Browse files Browse the repository at this point in the history
I am not entirely sure this is desired (it gives a bit of overhead), but
while trying to understand the reader/writer/builder/visitor hierarchy
in geoarrow-c, I noticed that here we were not using
`GeoArrowArrayReader`, but the separate readers for WKT/WKB
(`GeoArrowWKTReader`/`GeoArrowWKBReader`) and `GeoArrowArrayViewVisit`
for native coords, duplicating the iteration function for WKT/WKB.
While the `GeoArrowArrayReader` already abstracts away those three
options behind a single interface.

This gives a bit more overhead for the scalar `WKTReader` (I assume
because this constructs a `geoarrow::Reader` every time, and this now
not just initializes the `GeoArrowWKTReader`, but also the other structs
for WKB and native).
Given this is the scalar version (and you can always ensure to start
from arrow input for optimal performance), I think that overhead is
acceptable. But alternatively, we could also update
`GeoArrowArrayReader` to only initialize the WKT/WKB readers if needed
(when it gets data passed with that type), instead of upfront. Or
another option is to expose the `GeoArrowArrayViewVisitWKT/WKB`
functions in geoarrow.h, so that we can at least avoid reimplementing
those.

Also added some brief doc comments.
  • Loading branch information
jorisvandenbossche authored Oct 9, 2024
1 parent 829a0fe commit 9ebd7b1
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 82 deletions.
94 changes: 12 additions & 82 deletions src/s2geography/geoarrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ static inline int8_t GeoArrowBitmapReaderNextIsNull(
return (bitmap_reader->byte & (1 << bitmap_reader->bit_i)) == 0;
}

/// \brief Construct Geography objects while visiting a GeoArrow array.
///
/// This class implements the visitor callbacks that construct Geography
/// objects. This visitor gets called by the Reader, which iterates over the
/// features in a GeoArrow ArrowArray (that can be either in WKT or WKB
/// serialized format or in native format).
class Constructor {
public:
Constructor(const ImportOptions& options) : options_(options) {
Expand Down Expand Up @@ -577,17 +583,12 @@ class ReaderImpl {
public:
ReaderImpl() {
error_.message[0] = '\0';
wkt_reader_.private_data = nullptr;
wkb_reader_.private_data = nullptr;
reader_.private_data = nullptr;
}

~ReaderImpl() {
if (wkt_reader_.private_data != nullptr) {
GeoArrowWKTReaderReset(&wkt_reader_);
}

if (wkb_reader_.private_data != nullptr) {
GeoArrowWKBReaderReset(&wkb_reader_);
if (reader_.private_data != nullptr) {
GeoArrowArrayReaderReset(&reader_);
}
}

Expand All @@ -612,13 +613,7 @@ class ReaderImpl {
constructor_->InitVisitor(&visitor_);
visitor_.error = &error_;

if (array_view_.schema_view.type == GEOARROW_TYPE_WKT) {
GeoArrowWKTReaderInit(&wkt_reader_);
}

if (array_view_.schema_view.type == GEOARROW_TYPE_WKB) {
GeoArrowWKBReaderInit(&wkb_reader_);
}
GeoArrowArrayReaderInit(&reader_);
}

void ReadGeography(const ArrowArray* array, int64_t offset, int64_t length,
Expand All @@ -631,83 +626,18 @@ class ReaderImpl {
}

constructor_->SetOutput(out);

switch (array_view_.schema_view.type) {
case GEOARROW_TYPE_WKT:
code = VisitWKT(offset, length);
break;
case GEOARROW_TYPE_WKB:
code = VisitWKB(offset, length);
break;
default:
code = GeoArrowArrayViewVisit(&array_view_, offset, length, &visitor_);
break;
}

code = GeoArrowArrayReaderVisit(&reader_, &array_view_, offset, length, &visitor_);
ThrowNotOk(code);
}

private:
ImportOptions options_;
std::unique_ptr<FeatureConstructor> constructor_;
GeoArrowArrayView array_view_;
GeoArrowWKTReader wkt_reader_;
GeoArrowWKBReader wkb_reader_;
GeoArrowArrayReader reader_;
GeoArrowVisitor visitor_;
GeoArrowError error_;

int VisitWKT(int64_t offset, int64_t length) {
offset += array_view_.offset[0];
const uint8_t* validity = array_view_.validity_bitmap;
const int32_t* offsets = array_view_.offsets[0];
const char* data = reinterpret_cast<const char*>(array_view_.data);
GeoArrowStringView item;

GeoArrowBitmapReader bitmap;
GeoArrowBitmapReaderInit(&bitmap, validity, offset);

for (int64_t i = 0; i < length; i++) {
if (GeoArrowBitmapReaderNextIsNull(&bitmap)) {
GEOARROW_RETURN_NOT_OK(visitor_.feat_start(&visitor_));
GEOARROW_RETURN_NOT_OK(visitor_.null_feat(&visitor_));
GEOARROW_RETURN_NOT_OK(visitor_.feat_end(&visitor_));
} else {
item.size_bytes = offsets[offset + i + 1] - offsets[offset + i];
item.data = data + offsets[offset + i];
GEOARROW_RETURN_NOT_OK(
GeoArrowWKTReaderVisit(&wkt_reader_, item, &visitor_));
}
}

return GEOARROW_OK;
}

int VisitWKB(int64_t offset, int64_t length) {
offset += array_view_.offset[0];
const uint8_t* validity = array_view_.validity_bitmap;
const int32_t* offsets = array_view_.offsets[0];
const uint8_t* data = array_view_.data;
GeoArrowBufferView item;

GeoArrowBitmapReader bitmap;
GeoArrowBitmapReaderInit(&bitmap, validity, offset);

for (int64_t i = 0; i < length; i++) {
if (GeoArrowBitmapReaderNextIsNull(&bitmap)) {
GEOARROW_RETURN_NOT_OK(visitor_.feat_start(&visitor_));
GEOARROW_RETURN_NOT_OK(visitor_.null_feat(&visitor_));
GEOARROW_RETURN_NOT_OK(visitor_.feat_end(&visitor_));
} else {
item.size_bytes = offsets[offset + i + 1] - offsets[offset + i];
item.data = data + offsets[offset + i];
GEOARROW_RETURN_NOT_OK(
GeoArrowWKBReaderVisit(&wkb_reader_, item, &visitor_));
}
}

return GEOARROW_OK;
}

void ThrowNotOk(int code) {
if (code != GEOARROW_OK) {
throw Exception(error_.message);
Expand Down
4 changes: 4 additions & 0 deletions src/s2geography/geoarrow.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ class ImportOptions {

class ReaderImpl;

/// \brief Array reader for any GeoArrow extension array
///
/// This class is used to convert an ArrowArray with geoarrow data (serialized
/// or native) into a vector of Geography objects.
class Reader {
public:
enum class InputType { kWKT, kWKB };
Expand Down

0 comments on commit 9ebd7b1

Please sign in to comment.