Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

clib.Session.create_data: Improve docstrings and fix the comments for the dim parameter #3532

Merged
merged 9 commits into from
Oct 25, 2024
2 changes: 1 addition & 1 deletion pygmt/clib/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def vectors_to_arrays(vectors: Sequence[Any]) -> list[np.ndarray]:


def sequence_to_ctypes_array(
sequence: Sequence | None, ctype, size: int
sequence: Sequence[int | float] | np.ndarray | None, ctype, size: int
) -> ctp.Array | None:
"""
Convert a sequence of numbers into a ctypes array variable.
Expand Down
195 changes: 129 additions & 66 deletions pygmt/clib/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,19 @@

DIRECTIONS = ["GMT_IN", "GMT_OUT"]

MODES = ["GMT_CONTAINER_ONLY", "GMT_IS_OUTPUT"]
MODES = [
"GMT_CONTAINER_AND_DATA", # Create/Read/Write both container and the data array
"GMT_CONTAINER_ONLY", # Cread/Read/Write the container but no data array
"GMT_DATA_ONLY", # Create/Read/Write the container's data array only
"GMT_IS_OUTPUT", # For creating a resource as a container for output
]
MODE_MODIFIERS = [
"GMT_GRID_IS_CARTESIAN",
"GMT_GRID_IS_GEO",
"GMT_WITH_STRINGS",
"GMT_GRID_IS_CARTESIAN", # Grid is not geographic but Cartesian
"GMT_GRID_IS_GEO", # Grid is geographic, not Cartesian
"GMT_WITH_STRINGS", # Allocate string array for GMT_DATASET/GMT_VECTOR/GMT_MATRIX
]

REGISTRATIONS = ["GMT_GRID_PIXEL_REG", "GMT_GRID_NODE_REG"]
REGISTRATIONS = ["GMT_GRID_NODE_REG", "GMT_GRID_PIXEL_REG"]

DTYPES = {
np.int8: "GMT_CHAR",
Expand Down Expand Up @@ -643,53 +648,109 @@ def call_module(self, module: str, args: str | list[str]):

def create_data(
self,
family,
geometry,
mode,
dim=None,
ranges=None,
inc=None,
registration="GMT_GRID_NODE_REG",
pad=None,
):
family: str,
geometry: str,
mode: str,
dim: Sequence[int] | None = None,
ranges: Sequence[float] | None = None,
inc: Sequence[float] | None = None,
registration: Literal[
"GMT_GRID_NODE_REG", "GMT_GRID_PIXEL_REG"
] = "GMT_GRID_NODE_REG",
pad: int | None = None,
) -> ctp.c_void_p:
"""
Create an empty GMT data container.
Create an empty GMT data container and allocate space to hold data.

Valid data families and geometries are in ``FAMILIES`` and ``GEOMETRIES``.

There are two ways to define the dimensions needed to actually allocate memory:

1. Via ``ranges``, ``inc`` and ``registration``.
2. Via ``dim`` and ``registration``.

``dim`` contains up to 4 values and they have different meanings for
different GMT data families:

For ``GMT_DATASET``:

- 0: number of tables
- 1: number of segments per table
- 2: number of rows per segment
- 3: number of columns per row

For ``GMT_VECTOR``:

- 0: number of columns
- 1: number of rows [optional, can be 0 if unknown]
- 2: data type (e.g., ``GMT_DOUBLE``) [Will be overwritten by ``put_vector``]

For ``GMT_GRID``/``GMT_IMAGE``/``GMT_CUBE``/``GMT_MATRIX``:

- 0: number of columns
- 1: number of rows
- 2: number of bands or layers [Ignored for ``GMT_GRID``]
- 3: data type (e.g., ``GMT_DOUBLE``) [For ``GMT_MATRIX`` only, but will be
overwritten by ``put_matrix``]

In other words, ``inc`` is assumed to be 1.0, and ``ranges`` is
[0, dim[0], 0, dim[1]] for pixel registration or
[0, dim[0]-1.0, 0, dim[1]-1.0] for grid registration.


When creating a grid/image/cube, you can do it in one or two steps:

1. Call this function with ``mode="GMT_CONTAINER_AND_DATA"``. This creates
a header and allocates a grid or an image
2. Call this function twice:

1. First with ``mode="GMT_CONTAINER_ONLY"``, to create a header only and
compute the dimensions based on other parameters
2. Second with ``mode="GMT_DATA_ONLY"``, to allocate the grid/image/cube
array based on the dimensions already set. This time, you pass NULL for
``dim``/``ranges``/``inc``/``registration``/``pad`` and let ``data`` be
the void pointer returned in the first step.

**Note**: This is not implemented yet, since this function doesn't have the
``data`` parameter.

Parameters
----------
family : str
A valid GMT data family name (e.g., ``'GMT_IS_DATASET'``). See the
``FAMILIES`` attribute for valid names.
geometry : str
A valid GMT data geometry name (e.g., ``'GMT_IS_POINT'``). See the
``GEOMETRIES`` attribute for valid names.
mode : str
A valid GMT data mode (e.g., ``'GMT_IS_OUTPUT'``). See the
``MODES`` attribute for valid names.
dim : list of 4 integers
The dimensions of the dataset. See the documentation for the GMT C
API function ``GMT_Create_Data`` (``src/gmt_api.c``) for the full
range of options regarding 'dim'. If ``None``, will pass in the
NULL pointer.
ranges : list of 4 floats
The dataset extent. Also a bit of a complicated argument. See the C
function documentation. It's called ``range`` in the C function but
it would conflict with the Python built-in ``range`` function.
inc : list of 2 floats
The increments between points of the dataset. See the C function
documentation.
registration : str
The node registration (what the coordinates mean). Can be
``'GMT_GRID_PIXEL_REG'`` or ``'GMT_GRID_NODE_REG'``. Defaults to
``'GMT_GRID_NODE_REG'``.
pad : int
The grid padding. Defaults to ``GMT_PAD_DEFAULT``.
family
A valid GMT data family name (e.g., ``"GMT_IS_DATASET"``). See ``FAMILIES``
for valid names.
geometry
A valid GMT data geometry name (e.g., ``"GMT_IS_POINT"``). See
``GEOMETRIES`` for valid names.
mode
A valid GMT data mode. See ``MODES`` for valid names. For
``GMT_IS_DATASET``/``GMT_IS_MATRIX``/``GMT_IS_VECTOR``, adding
``GMT_WITH_STRINGS`` to the ``mode`` will allocate the corresponding arrays
of string pointers.
dim
The dimensions of the dataset, as explained above. If ``None``, will pass in
the NULL pointer.
ranges
The data extent.
inc
The increments between points of the dataset.
registration
The node registration. Can be ``"GMT_GRID_PIXEL_REG"`` or
``"GMT_GRID_NODE_REG"``.
pad
The padding for ``GMT_IS_GRID``/``GMT_IS_IMAGE``/``GMT_IS_CUBE``. If
``None``, defaults to ``"GMT_PAD_DEFAULT"``.

For ``GMT_IS_MATRIX``, it can be:

- 0: default row/col orientation [Default]
- 1: row-major format (C)
- 2: column-major format (FORTRAN)

Returns
-------
data_ptr : int
A ctypes pointer (an integer) to the allocated ``GMT_Dataset``
object.
data_ptr
A ctypes pointer (an integer) to the allocated GMT data container.
"""
c_create_data = self.get_libgmt_func(
"GMT_Create_Data",
Expand All @@ -703,8 +764,8 @@ def create_data(
ctp.POINTER(ctp.c_double), # inc
ctp.c_uint, # registration
ctp.c_int, # pad
ctp.c_void_p,
], # data
ctp.c_void_p, # data
],
restype=ctp.c_void_p,
)

Expand All @@ -717,31 +778,30 @@ def create_data(
geometry_int = self._parse_constant(geometry, valid=GEOMETRIES)
registration_int = self._parse_constant(registration, valid=REGISTRATIONS)

# Convert dim, ranges, and inc to ctypes arrays if given (will be None
# if not given to represent NULL pointers)
dim = sequence_to_ctypes_array(dim, ctp.c_uint64, 4)
ranges = sequence_to_ctypes_array(ranges, ctp.c_double, 4)
inc = sequence_to_ctypes_array(inc, ctp.c_double, 2)
# Convert dim, ranges, and inc to ctypes arrays if given (will be None if not
# given to represent NULL pointers)
dim_ctp = sequence_to_ctypes_array(dim, ctp.c_uint64, 4)
ranges_ctp = sequence_to_ctypes_array(ranges, ctp.c_double, 4)
inc_ctp = sequence_to_ctypes_array(inc, ctp.c_double, 2)

# Use a NULL pointer (None) for existing data to indicate that the
# container should be created empty. Fill it in later using put_vector
# and put_matrix.
# Use a NULL pointer (None) for existing data to indicate that the container
# should be created empty. Fill it in later using put_vector and put_matrix.
data_ptr = c_create_data(
self.session_pointer,
family_int,
geometry_int,
mode_int,
dim,
ranges,
inc,
dim_ctp,
ranges_ctp,
inc_ctp,
registration_int,
self._parse_pad(family, pad),
None,
)

if data_ptr is None:
raise GMTCLibError("Failed to create an empty GMT data pointer.")

msg = "Failed to create an empty GMT data pointer."
raise GMTCLibError(msg)
return data_ptr

def _parse_pad(self, family, pad):
Expand Down Expand Up @@ -1248,7 +1308,7 @@ def open_virtualfile(
... family=family,
... geometry=geometry,
... mode="GMT_CONTAINER_ONLY",
... dim=[2, 5, 1, 0], # columns, lines, segments, type
... dim=[2, 5, lib["GMT_INT"], 0], # ncolumns, nrows, dtype, unused
... )
... lib.put_vector(dataset, column=0, vector=x)
... lib.put_vector(dataset, column=1, vector=y)
Expand Down Expand Up @@ -1413,7 +1473,10 @@ def virtualfile_from_vectors(
geometry = "GMT_IS_POINT"

dataset = self.create_data(
family, geometry, mode="GMT_CONTAINER_ONLY", dim=[columns, rows, 1, 0]
family,
geometry,
mode="GMT_CONTAINER_ONLY",
dim=[columns, rows, self["GMT_DOUBLE"], 0],
)

# Use put_vector for columns with numerical type data
Expand Down Expand Up @@ -1501,12 +1564,13 @@ def virtualfile_from_matrix(self, matrix: np.ndarray) -> Generator[str, None, No
# around until the virtual file is closed.
matrix = np.ascontiguousarray(matrix)
rows, columns = matrix.shape
layers = 1

family = "GMT_IS_DATASET|GMT_VIA_MATRIX"
geometry = "GMT_IS_POINT"

dataset = self.create_data(
family, geometry, mode="GMT_CONTAINER_ONLY", dim=[columns, rows, 1, 0]
family, geometry, mode="GMT_CONTAINER_ONLY", dim=[columns, rows, layers, 0]
)

self.put_matrix(dataset, matrix)
Expand Down Expand Up @@ -1592,7 +1656,7 @@ def virtualfile_from_grid(self, grid: xr.DataArray) -> Generator[str, None, None
mode=f"GMT_CONTAINER_ONLY|{_gtype}",
ranges=region,
inc=inc,
registration=_reg,
registration=_reg, # type: ignore[arg-type]
)
self.put_matrix(gmt_grid, matrix)
with self.open_virtualfile(
Expand Down Expand Up @@ -1677,8 +1741,7 @@ def virtualfile_from_stringio(
mode="GMT_CONTAINER_ONLY|GMT_WITH_STRINGS",
dim=[n_tables, n_segments, n_rows, n_columns],
)
dataset = ctp.cast(dataset, ctp.POINTER(_GMT_DATASET))
table = dataset.contents.table[0].contents
table = ctp.cast(dataset, ctp.POINTER(_GMT_DATASET)).contents.table[0].contents
for i, segment in enumerate(segments):
seg = table.segment[i].contents
if segment["header"]:
Expand Down
8 changes: 4 additions & 4 deletions pygmt/tests/test_clib_create_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ def test_create_data_dataset():
family="GMT_IS_DATASET|GMT_VIA_VECTOR",
geometry="GMT_IS_POINT",
mode="GMT_CONTAINER_ONLY",
dim=[10, 20, 1, 0], # columns, rows, layers, dtype
dim=[10, 20, 0, 0], # ncolumns, nrows, dtype, unused
)
# Dataset from matrices
data_matrix = lib.create_data(
family="GMT_IS_DATASET|GMT_VIA_MATRIX",
geometry="GMT_IS_POINT",
mode="GMT_CONTAINER_ONLY",
dim=[10, 20, 1, 0],
dim=[10, 20, 1, 0], # ncolumns, nrows, nlayer, dtype
)
assert data_vector != data_matrix

Expand All @@ -40,7 +40,7 @@ def test_create_data_grid_dim():
family="GMT_IS_GRID|GMT_VIA_MATRIX",
geometry="GMT_IS_SURFACE",
mode="GMT_CONTAINER_ONLY",
dim=[10, 20, 1, 0],
dim=[10, 20, 1, 0], # ncolumns, nrows, nlayer, dtype
)


Expand Down Expand Up @@ -94,5 +94,5 @@ def test_create_data_fails():
family="GMT_IS_DATASET",
geometry="GMT_IS_SURFACE",
mode="GMT_CONTAINER_ONLY",
dim=[11, 10, 2, 0],
dim=[11, 10, 2, 0], # n_tables, n_segments, n_rows, n_columns
)
2 changes: 1 addition & 1 deletion pygmt/tests/test_clib_put_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_put_matrix(dtypes):
family="GMT_IS_DATASET|GMT_VIA_MATRIX",
geometry="GMT_IS_POINT",
mode="GMT_CONTAINER_ONLY",
dim=[shape[1], shape[0], 1, 0], # columns, rows, layers, dtype
dim=[shape[1], shape[0], 1, 0], # ncolumns, nrows, nlayers, dtype
)
data = np.arange(shape[0] * shape[1], dtype=dtype).reshape(shape)
lib.put_matrix(dataset, matrix=data)
Expand Down
2 changes: 1 addition & 1 deletion pygmt/tests/test_clib_put_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_put_strings():
family="GMT_IS_DATASET|GMT_VIA_VECTOR",
geometry="GMT_IS_POINT",
mode="GMT_CONTAINER_ONLY",
dim=[2, 5, 1, 0], # columns, rows, layers, dtype
dim=[2, 5, 0, 0], # ncolumns, nrows, dtype, unused
)
x = np.array([1, 2, 3, 4, 5], dtype=np.int32)
y = np.array([6, 7, 8, 9, 10], dtype=np.int32)
Expand Down
Loading