diff --git a/docs/index.rst b/docs/index.rst index 8d1b009..d87fabf 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -19,6 +19,7 @@ Multi library support Currently, we support the following dataframe libraries with identical syntax: + - pandas - dask.dataframe - polars diff --git a/src/akimbo/__init__.py b/src/akimbo/__init__.py index 8816d4c..c336aef 100644 --- a/src/akimbo/__init__.py +++ b/src/akimbo/__init__.py @@ -2,15 +2,19 @@ from awkward import behavior, mixin_class, mixin_class_method # re-export +import akimbo.datetimes as datetimes import akimbo.mixin as mixin +import akimbo.strings as strings from akimbo.io import read_json, read_parquet from akimbo.version import version as __version__ # noqa __all__ = ( + "datetimes", "mixin", "read_parquet", "read_json", "behavior", "mixin_class", "mixin_class_method", + "strings", ) diff --git a/src/akimbo/datetimes.py b/src/akimbo/datetimes.py index b372c76..58b7922 100644 --- a/src/akimbo/datetimes.py +++ b/src/akimbo/datetimes.py @@ -6,6 +6,8 @@ import awkward as ak import pyarrow.compute as pc +from akimbo.mixin import Accessor + def _run_unary(layout, op, kind=None, **kw): if layout.is_leaf and (kind is None or layout.dtype.kind == kind): @@ -34,7 +36,7 @@ def func(arrays, **kwargs): return ak.transform(func, arr, other) -def dec(func, mode="unary"): +def dec(func, mode="unary", kind=None): # TODO: require kind= on functions that need timestamps if mode == "unary": @@ -46,7 +48,7 @@ def f(self, *args, **kwargs): kwargs.update({k: arg for k, arg in zip(sig, args)}) return self.accessor.to_output( - run_unary(self.accessor.array, func, **kwargs) + run_unary(self.accessor.array, func, kind=kind, **kwargs) ) elif mode == "binary": @@ -58,7 +60,9 @@ def f(self, other, *args, **kwargs): kwargs.update({k: arg for k, arg in zip(sig, args)}) return self.accessor.to_output( - run_binary(self.accessor.array, other.ak.array, func, **kwargs) + run_binary( + self.accessor.array, other.ak.array, func, kind=kind, **kwargs + ) ) else: @@ -123,3 +127,6 @@ def _to_arrow(array): def _make_unit_compatible(array): # TODO, actually convert units if not compatible return array + + +Accessor.register_accessor("dt", DatetimeAccessor) diff --git a/src/akimbo/mixin.py b/src/akimbo/mixin.py index aacc4d5..2690c1c 100644 --- a/src/akimbo/mixin.py +++ b/src/akimbo/mixin.py @@ -134,6 +134,7 @@ class Accessor(ArithmeticMixin): aggregations = True # False means data is partitioned series_type = () dataframe_type = () + subaccessors = {} def __init__(self, obj, behavior=None): self._obj = obj @@ -205,19 +206,9 @@ def array(self) -> ak.Array: """Data as an awkward array""" return ak.with_name(ak.from_arrow(self.arrow), self._behavior) - @property - def str(self): - """Nested string operations""" - from akimbo.strings import StringAccessor - - return StringAccessor(self) - - @property - def dt(self): - """Nested datetime operations""" - from akimbo.datetimes import DatetimeAccessor - - return DatetimeAccessor(self) + @classmethod + def register_accessor(cls, name, klass): + cls.subaccessors[name] = klass def merge(self): """Make a single complex series out of the columns of a dataframe""" @@ -255,6 +246,8 @@ def __getattr__(self, item): if hasattr(arr, item) and callable(getattr(arr, item)): func = getattr(arr, item) args = () + elif item in self.subaccessors: + return self.subaccessors[item](self) elif hasattr(ak, item): func = getattr(ak, item) args = (arr,) diff --git a/src/akimbo/strings.py b/src/akimbo/strings.py index d98be7a..be65bfc 100644 --- a/src/akimbo/strings.py +++ b/src/akimbo/strings.py @@ -5,6 +5,8 @@ import awkward as ak +from akimbo.mixin import Accessor + def _encode(layout): if layout.is_record: @@ -99,3 +101,6 @@ def f(*args, **kwargs): def __dir__(self) -> list[str]: return sorted(methods) + + +Accessor.register_accessor("str", StringAccessor)