Skip to content

Commit

Permalink
Issue #259/#453#458 finetune crs normalization some more
Browse files Browse the repository at this point in the history
- rename to `normalize_crs` because it is not only about EPSG output, WKT2 output is also allowed
- reworked and simplified pyproj availability handling: when available: use it fully, when not: do best effort normalization
- Move tests into class for better grouping and overview
- add test coverage for "no pyproj available" code path
  • Loading branch information
soxofaan committed Aug 9, 2023
1 parent ba32cca commit b2a0e00
Show file tree
Hide file tree
Showing 5 changed files with 230 additions and 232 deletions.
6 changes: 4 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added


- Processes that take a CRS as argument now try harder to convert your input into a proper EPSG code, to avoid unexpected results when an invalid argument gets sent to the backend.
- Processes that take a CRS as argument now try harder to normalize your input to
a CRS representation that aligns with the openEO API (using `pyproj` library when available)
([#259](https://github.com/Open-EO/openeo-python-client/issues/259))
- Initial `load_geojson` support with `Connection.load_geojson()` ([#424](https://github.com/Open-EO/openeo-python-client/issues/424))
- Initial `load_url` (for vector cubes) support with `Connection.load_url()` ([#424](https://github.com/Open-EO/openeo-python-client/issues/424))
- Support lambda based property filtering in `Connection.load_stac()` ([#425](https://github.com/Open-EO/openeo-python-client/issues/425))
Expand All @@ -27,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Fix: MultibackendJobManager should stop when finished, also when job finishes with error ([#452](https://github.com/Open-EO/openeo-python-client/issues/432))


## [0.21.1] - 2023-07-19

### Fixed
Expand Down
6 changes: 3 additions & 3 deletions openeo/rest/datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from openeo.rest.service import Service
from openeo.rest.udp import RESTUserDefinedProcess
from openeo.rest.vectorcube import VectorCube
from openeo.util import get_temporal_extent, dict_no_none, rfc3339, guess_format, crs_to_epsg_code
from openeo.util import get_temporal_extent, dict_no_none, rfc3339, guess_format, normalize_crs

if typing.TYPE_CHECKING:
# Imports for type checking only (circular import issue at runtime).
Expand Down Expand Up @@ -332,7 +332,7 @@ def filter_bbox(
" Use keyword arguments or tuple/list argument instead.")
west, east, north, south = args[:4]
if len(args) > 4:
crs = crs_to_epsg_code(args[4])
crs = normalize_crs(args[4])
elif len(args) == 1 and (isinstance(args[0], (list, tuple)) and len(args[0]) == 4
or isinstance(args[0], (dict, shapely.geometry.base.BaseGeometry, Parameter))):
bbox = args[0]
Expand Down Expand Up @@ -834,7 +834,7 @@ def _get_geometry_argument(
# TODO: don't warn when the crs is Lon-Lat like EPSG:4326?
warnings.warn(f"Geometry with non-Lon-Lat CRS {crs!r} is only supported by specific back-ends.")
# TODO #204 alternative for non-standard CRS in GeoJSON object?
epsg_code = crs_to_epsg_code(crs)
epsg_code = normalize_crs(crs)
if epsg_code is not None:
# proj did recognize the CRS
crs_name = f"EPSG:{epsg_code}"
Expand Down
2 changes: 1 addition & 1 deletion openeo/rest/vectorcube.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from openeo.rest._datacube import THIS, UDF, _ProcessGraphAbstraction, build_child_callback
from openeo.rest.job import BatchJob
from openeo.rest.mlmodel import MlModel
from openeo.util import dict_no_none, guess_format, crs_to_epsg_code, to_bbox_dict, InvalidBBoxException
from openeo.util import dict_no_none, guess_format, to_bbox_dict, InvalidBBoxException

if typing.TYPE_CHECKING:
# Imports for type checking only (circular import issue at runtime).
Expand Down
119 changes: 49 additions & 70 deletions openeo/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@
import shapely.geometry.base
from deprecated import deprecated

try:
# pyproj is an optional dependency
import pyproj
except ImportError:
pyproj = None


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -535,7 +542,7 @@ class BBoxDict(dict):
def __init__(self, *, west: float, south: float, east: float, north: float, crs: Optional[Union[str, int]] = None):
super().__init__(west=west, south=south, east=east, north=north)
if crs is not None:
self.update(crs=crs_to_epsg_code(crs))
self.update(crs=normalize_crs(crs))

# TODO: provide west, south, east, north, crs as @properties? Read-only or read-write?

Expand Down Expand Up @@ -635,86 +642,58 @@ def get(self, fraction: float) -> str:
return f"{self.left}{bar:{self.fill}<{width}s}{self.right}"


def crs_to_epsg_code(crs: Union[str, int, dict, None]) -> Optional[int]:
"""Convert a CRS string or int to an integer EPGS code, where CRS usually comes from user input.
Three cases:
- If it is already an integer we just keep it.
- If it is None it stays None, and empty strings become None as well.
- If it is a string we try to parse it with the pyproj library.
- Strings of the form "EPSG:<int>" will be converted to teh value <int>
- For any other strings formats, it will work if pyproj supports is,
otherwise it won't.
The result is **always** an EPSG code, so the CRS should be one that is
defined in EPSG. For any other definitions pyproj will only give you the
closest EPSG match and that result is possibly inaccurate.
Note that we also need to support WKT string (WKT2),
see also: https://github.com/Open-EO/openeo-processes/issues/58
def normalize_crs(crs: Any, *, use_pyproj: bool = True) -> Union[None, int, str]:
"""
Normalize given data structure (typically just an int or string)
that encodes a CRS (Coordinate Reference System) to an EPSG (int) code or WKT2 CRS string.
For very the oldest supported version of Python: v3.6 there is a problem
because the pyproj version that is compatible with Python 3.6 is too old
and does not properly support WKT2.
Behavior and data structure support depends on the availability of the ``pyproj`` library:
- If the ``pyproj`` library is available: use that to do parsing and conversion.
This means that anything that is supported by ``pyproj.CRS.from_user_input`` is allowed.
See the ``pyproj`` docs for more details.
- Otherwise, some best effort validation is done:
EPSG looking int/str values will be parsed as such, other strings will be assumed to be WKT2 already.
Other data structures will not be accepted.
For a list of CRS input formats that proj supports
see: https://pyproj4.github.io/pyproj/stable/api/crs/crs.html#pyproj.crs.CRS.from_user_input
:param crs: data structure that encodes a CRS, typically just an int or string value.
If the ``pyproj`` library is available, everything supported by it is allowed
:param use_pyproj: whether ``pyproj`` should be leveraged at all
(mainly useful for testing the "no pyproj available" code path)
:param crs:
Input from user for the Coordinate Reference System to convert to an
EPSG code.
:return: EPSG code as int, or WKT2 string. Or None if input was empty .
:raises ValueError:
When the crs is a not a supported CRS string.
:raises TypeError:
When crs is none of the supported types: str, int, None
When the given CRS data can not be parsed/converted/normalized.
:return: An EPGS code if it could be found, otherwise None
"""

# Only convert to the default if it is an explicitly allowed type.
if crs in (None, "", {}):
return None

if not isinstance(crs, (int, str, dict)):
raise TypeError("The allowed type for the parameter 'crs' are: str, int, dict and None")

# If we want to stop processing as soon as we have an int value, then we
# should not accept values that are complete non-sense, as best as we can.
crs_intermediate = crs
if isinstance(crs, int):
crs_intermediate = crs
elif isinstance(crs, str):
# This conversion is needed to support strings that only contain an integer,
# e.g. "4326" though it is a string, is a otherwise a correct EPSG code.
if pyproj and use_pyproj:
try:
crs_intermediate = int(crs)
except ValueError as exc:
# So we need to process it with pyproj, below.
logger.debug("crs_to_epsg_code received crs input that was not an int: crs={crs}, exception caught: {exc}")

if isinstance(crs_intermediate, int):
if crs_intermediate <= 0:
raise ValueError(f"When crs is an integer value it has to be > 0.")
else:
return crs_intermediate

try:
import pyproj.crs
except ImportError as exc:
message = (
f"Cannot convert CRS string: {crs}. "
+ "Need pyproj to convert this CRS string but the pyproj library is not installed."
)
logger.error(message)
raise ValueError(message) from ImportError
# (if available:) let pyproj do the validation/parsing
crs_obj = pyproj.CRS.from_user_input(crs)
# Convert back to EPSG int or WKT2 string
crs = crs_obj.to_epsg() or crs_obj.to_wkt()
except pyproj.ProjError as e:
raise ValueError(f"Failed to normalize CRS data with pyproj: {crs}") from e
else:
try:
converted_crs = pyproj.crs.CRS.from_user_input(crs)
except pyproj.exceptions.CRSError as exc:
logger.error(f"Could not convert CRS string to EPSG code: crs={crs}, exception: {exc}", exc_info=True)
raise ValueError(crs) from exc
# Best effort simple validation/normalization
if isinstance(crs, int) and crs > 0:
# Assume int is already valid EPSG code
pass
elif isinstance(crs, str):
# Parse as EPSG int code if it looks like that,
# otherwise: leave it as-is, assuming it is a valid WKT2 CRS string
if re.match(r"^(epsg:)?\d+$", crs.strip(), flags=re.IGNORECASE):
crs = int(crs.split(":")[-1])
elif "GEOGCRS[" in crs:
# Very simple WKT2 CRS detection heuristic
logger.warning(f"Assuming this is a valid WK2 CRS string: {repr_truncate(crs)}")
else:
raise ValueError(f"Can not normalize CRS string {repr_truncate(crs)}")
else:
return converted_crs.to_epsg()
raise ValueError(f"Can not normalize CRS data {type(crs)}")

return crs
Loading

0 comments on commit b2a0e00

Please sign in to comment.