diff --git a/.environment.yml b/.environment.yml
index 4a55fc9..eed4fee 100644
--- a/.environment.yml
+++ b/.environment.yml
@@ -6,13 +6,27 @@ dependencies:
- python=3.11
- affine
- black
+ - contextily
- geopandas
+ - jupyter
+ - matplotlib
- nbstripout
+ - networkx
- numpy
+ - openpyxl
+ - pandera
- pip
+ - pyarrow
+ - pyogrio
- pytest
- pytest-cov
- rasterio
+ - seaborn
- shapely
+ - scipy
- pip:
+ - hilbertcurve
+ - irv_autopkg_client
- python-igraph
+ - snkit
+ - tqdm
diff --git a/README.md b/README.md
index ca9ef06..f966ffd 100644
--- a/README.md
+++ b/README.md
@@ -192,10 +192,10 @@ The `snail.core.intersections` module is built using `pybind11` with
>
> Copyright (c) 2020-23 Tom Russell and all [snail contributors](https://github.com/nismod/snail/graphs/contributors)
-This library is developed by researchers in the [Oxford Programme for Sustainable
+This library is developed by researchers in the [Oxford Programme for Sustainable
Infrastructure Systems](https://opsis.eci.ox.ac.uk/) at the University of Oxford,
funded by multiple research projects.
-This research received funding from the FCDO Climate Compatible Growth Programme.
-The views expressed here do not necessarily reflect the UK government's official
+This research received funding from the FCDO Climate Compatible Growth Programme.
+The views expressed here do not necessarily reflect the UK government's official
policies.
diff --git a/pyproject.toml b/pyproject.toml
index 9def7df..d6b760a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name="nismod-snail"
-version="0.3.2"
+version="0.4.0"
license={file = "LICENSE"}
description="The spatial networks impact assessment library"
readme="README.md"
@@ -24,22 +24,37 @@ keywords=[]
requires-python=">=3.8"
dependencies=[
"geopandas",
+ "matplotlib",
+ "openpyxl",
+ "pandera",
"pyarrow",
+ "pyogrio",
"python-igraph",
"rasterio",
"shapely",
+ "scipy"
]
[project.optional-dependencies]
dev=[
"affine",
"black",
+ "hilbertcurve",
"nbstripout",
"numpy",
"pytest-cov",
"pytest",
]
docs=["sphinx", "m2r2"]
+tutorials=[
+ "contextily",
+ "irv_autopkg_client",
+ "jupyter",
+ "networkx",
+ "seaborn",
+ "snkit",
+ "tqdm",
+]
[project.urls]
Homepage = "https://github.com/nismod/snail"
diff --git a/src/snail/__init__.py b/src/snail/__init__.py
index b06d6dd..f9fb3e3 100644
--- a/src/snail/__init__.py
+++ b/src/snail/__init__.py
@@ -1,6 +1,5 @@
"""snail - the spatial networks impact assessment library
"""
-import pkg_resources
# Import things to define what is accessible directly on snail, when a client
# writes::
@@ -10,8 +9,10 @@
# from snail.network import Network
try:
- __version__ = pkg_resources.get_distribution(__name__).version
-except pkg_resources.DistributionNotFound:
+ from importlib.metadata import version
+
+ __version__ = version("nismod-snail")
+except:
__version__ = "unknown"
diff --git a/src/snail/cli.py b/src/snail/cli.py
index a4f45e3..8ce26af 100644
--- a/src/snail/cli.py
+++ b/src/snail/cli.py
@@ -8,8 +8,9 @@
import pandas
from snail.intersection import (
- Transform,
+ GridDefinition,
apply_indices,
+ get_raster_values_for_splits,
prepare_linestrings,
prepare_polygons,
prepare_points,
@@ -19,7 +20,7 @@
split_points,
)
from snail.io import (
- associate_raster_file,
+ read_raster_band_data,
associate_raster_files,
read_features,
read_raster_metadata,
@@ -171,7 +172,7 @@ def snail(args=None):
def split(args):
"""snail split command"""
if args.raster:
- transform, all_bands = read_raster_metadata(args.raster)
+ grid, all_bands = read_raster_metadata(args.raster)
else:
crs = None
width = args.width
@@ -181,26 +182,35 @@ def split(args):
sys.exit(
"Error: Expected either a raster file or transform, width and height of splitting grid"
)
- transform = Transform(crs, width, height, affine_transform)
- logging.info(f"Splitting grid {transform=}")
+ grid = GridDefinition(crs, width, height, affine_transform)
+ logging.info(f"Splitting {grid=}")
- features = geopandas.read_file(args.features)
+ features = read_features(Path(args.features))
features_crs = features.crs
geom_type = _sample_geom_type(features)
if "Point" in geom_type:
+ logging.info(f"Preparing points")
prepared = prepare_points(features)
- splits = split_points(prepared)
+ logging.info(f"Splitting points")
+ splits = split_features_for_rasters(prepared, [grid], split_points)
elif "LineString" in geom_type:
+ logging.info(f"Preparing linestrings")
prepared = prepare_linestrings(features)
- splits = split_linestrings(prepared, transform)
+ logging.info(f"Splitting linestrings")
+ splits = split_features_for_rasters(
+ prepared, [grid], split_linestrings
+ )
elif "Polygon" in geom_type:
+ logging.info(f"Preparing polygons")
prepared = prepare_polygons(features)
- splits = split_polygons(prepared, transform)
+ logging.info(f"Splitting polygons")
+ splits = split_features_for_rasters(prepared, [grid], split_polygons)
else:
raise ValueError("Could not process vector data of type %s", geom_type)
- splits = apply_indices(splits, transform)
+ logging.info(f"Applying indices")
+ splits = apply_indices(splits, grid)
if args.attribute and args.raster:
if args.band:
@@ -225,9 +235,10 @@ def split(args):
args.raster,
band_index,
)
- splits[key] = associate_raster_file(
- splits, args.raster, band_number=int(band_index)
+ band_data = read_raster_band_data(
+ args.raster, band_number=int(band_index)
)
+ splits[key] = get_raster_values_for_splits(splits, band_data)
splits.set_crs(features_crs, inplace=True)
splits.to_file(args.output)
diff --git a/src/snail/core/__init__.py b/src/snail/core/__init__.py
index cb2c9ac..34217ab 100644
--- a/src/snail/core/__init__.py
+++ b/src/snail/core/__init__.py
@@ -1,4 +1,4 @@
-from .intersections import (get_cell_indices, split_linestring, split_polygon)
+from .intersections import get_cell_indices, split_linestring, split_polygon
__all__ = [
"get_cell_indices",
diff --git a/src/snail/damages.py b/src/snail/damages.py
new file mode 100644
index 0000000..5abf167
--- /dev/null
+++ b/src/snail/damages.py
@@ -0,0 +1,255 @@
+"""Damage assessment"""
+from abc import ABC
+import numpy
+import pandas
+import pandera
+from scipy.interpolate import interp1d
+from pandera.typing import DataFrame, Series
+
+
+# TODO check `nismod/east-africa-transport` and `nismod/jamaica-infrastructure`
+# manipulations of damage curves
+
+
+class DamageCurve(ABC):
+ """A damage curve"""
+
+ def __init__(self):
+ pass
+
+ def damage_fraction(exposure: numpy.array) -> numpy.array:
+ """Evaluate damage fraction for exposure to a given hazard intensity"""
+ pass
+
+
+class PiecewiseLinearDamageCurveSchema(pandera.DataFrameModel):
+ intensity: Series[float]
+ damage: Series[float]
+
+
+class PiecewiseLinearDamageCurve(DamageCurve):
+ """A piecewise-linear damage curve"""
+
+ intensity: Series[float]
+ damage: Series[float]
+
+ def __init__(self, curve: DataFrame[PiecewiseLinearDamageCurveSchema]):
+ curve = curve.copy()
+ self.intensity, self.damage = self.clip_curve_data(
+ curve.intensity, curve.damage
+ )
+
+ bounds = (self.damage.min(), self.damage.max())
+ self._interpolate = interp1d(
+ self.intensity,
+ self.damage,
+ kind="linear",
+ fill_value=bounds,
+ bounds_error=False,
+ copy=False,
+ )
+
+ def __eq__(self, other):
+ damage_eq = (self.damage == other.damage).all()
+ intensity_eq = (self.intensity == other.intensity).all()
+ return damage_eq and intensity_eq
+
+ @classmethod
+ def from_csv(
+ cls,
+ fname,
+ intensity_col="intensity",
+ damage_col="damage_ratio",
+ comment="#",
+ **kwargs,
+ ):
+ """Read a damage curve from a CSV file.
+
+ By default, the CSV should have columns named "intensity" and "damage_ratio",
+ with any additional header lines commented out by "#".
+
+ Any additional keyword arguments are passed through to ``pandas.read_csv``
+
+ Parameters
+ ----------
+ fname: str, path object or file-like object
+ intensity_col: str, default "intensity"
+ Column name to read hazard intensity values
+ damage_col: str, default "damage_ratio"
+ Column name to read damage values
+ comment: str, default "#"
+ Indicates remainder of the line in the CSV should not be parsed.
+ If found at the beginning of a line, the line will be ignored
+ altogether.
+ kwargs:
+ see pandas.read_csv documentation
+
+ Returns
+ -------
+ PiecewiseLinearDamageCurve
+ """
+ curve_data = pandas.read_csv(fname, comment=comment, **kwargs).rename(
+ columns={
+ intensity_col: "intensity",
+ damage_col: "damage",
+ }
+ )
+ return PiecewiseLinearDamageCurve(curve_data)
+
+ @classmethod
+ def from_excel(
+ cls,
+ fname,
+ sheet_name=0,
+ intensity_col="intensity",
+ damage_col="damage_ratio",
+ comment="#",
+ **kwargs,
+ ):
+ """Read a damage curve from an Excel file.
+
+ By default, the file should have columns named "intensity" and "damage_ratio",
+ with any additional header lines commented out by "#".
+
+ Any additional keyword arguments are passed through to ``pandas.read_excel``
+
+ Parameters
+ ----------
+ fname: str, path object or file-like object
+ sheet_name: str, int
+ Strings are used for sheet names. Integers are used in zero-indexed sheet
+ positions (chart sheets do not count as a sheet position).
+ intensity_col: str, default "intensity"
+ Column name to read hazard intensity values
+ damage_col: str, default "damage_ratio"
+ Column name to read damage values
+ comment: str, default "#"
+ Indicates remainder of the line in the CSV should not be parsed.
+ If found at the beginning of a line, the line will be ignored
+ altogether.
+ kwargs:
+ see pandas.read_csv documentation
+
+ Returns
+ -------
+ PiecewiseLinearDamageCurve
+ """
+ curve_data = pandas.read_excel(
+ fname, sheet_name=sheet_name, comment=comment, **kwargs
+ ).rename(
+ columns={
+ intensity_col: "intensity",
+ damage_col: "damage",
+ }
+ )
+ return PiecewiseLinearDamageCurve(curve_data)
+
+ def damage_fraction(self, exposure: numpy.array) -> numpy.array:
+ """Evaluate damage fraction for exposure to a given hazard intensity"""
+ return self._interpolate(exposure)
+
+ def translate_y(self, y: float):
+ damage = self.damage + y
+
+ return PiecewiseLinearDamageCurve(
+ pandas.DataFrame(
+ {
+ "intensity": self.intensity,
+ "damage": damage,
+ }
+ )
+ )
+
+ def scale_y(self, y: float):
+ damage = self.damage * y
+
+ return PiecewiseLinearDamageCurve(
+ pandas.DataFrame(
+ {
+ "intensity": self.intensity,
+ "damage": damage,
+ }
+ )
+ )
+
+ def translate_x(self, x: float):
+ intensity = self.intensity + x
+
+ return PiecewiseLinearDamageCurve(
+ pandas.DataFrame(
+ {
+ "intensity": intensity,
+ "damage": self.damage,
+ }
+ )
+ )
+
+ def scale_x(self, x: float):
+ intensity = self.intensity * x
+
+ return PiecewiseLinearDamageCurve(
+ pandas.DataFrame(
+ {
+ "intensity": intensity,
+ "damage": self.damage,
+ }
+ )
+ )
+
+ @staticmethod
+ def clip_curve_data(intensity, damage):
+ if (damage.max() > 1) or (damage.min() < 0):
+ # WARNING clipping out-of-bounds damage fractions
+ bounds = (
+ intensity.min(),
+ intensity.max(),
+ )
+ inverse = interp1d(
+ damage,
+ intensity,
+ kind="linear",
+ fill_value=bounds,
+ bounds_error=False,
+ )
+
+ if damage.max() > 1:
+ one_intercept = inverse(1)
+ idx = numpy.searchsorted(intensity, one_intercept)
+ # if one_intercept is in our intensities
+ if intensity[idx] == one_intercept:
+ # no action - damage fraction will be set to 1
+ pass
+ else:
+ # else insert new interpolation point
+ damage = numpy.insert(damage, idx, 1)
+ intensity = numpy.insert(intensity, idx, one_intercept)
+
+ if damage.min() < 0:
+ zero_intercept = inverse(0)
+ idx = numpy.searchsorted(intensity, zero_intercept)
+ # if zero_intercept is in our intensities
+ if intensity[idx] == zero_intercept:
+ # no action - damage fraction will be set to 0
+ pass
+ else:
+ # else insert new interpolation point
+ damage = numpy.insert(damage, idx, 0)
+ intensity = numpy.insert(intensity, idx, zero_intercept)
+
+ damage = numpy.clip(damage, 0, 1)
+
+ return intensity, damage
+
+ def plot(self, ax=None):
+ import matplotlib.pyplot as plt
+
+ if ax is None:
+ fig = plt.figure()
+ ax = fig.add_subplot(1, 1, 1)
+
+ ax.plot(self.intensity, self.damage, color="tab:blue")
+ ax.set_ylim([0, 1])
+ ax.set_ylabel("Damage Fraction")
+ ax.set_xlabel("Hazard Intensity")
+
+ return ax
diff --git a/src/snail/intersection.py b/src/snail/intersection.py
index 5b7017d..26b7686 100644
--- a/src/snail/intersection.py
+++ b/src/snail/intersection.py
@@ -1,13 +1,15 @@
import logging
+import math
import os
from dataclasses import dataclass
-from itertools import product
from typing import Callable, List, Tuple
import geopandas
import numpy
import pandas
-from shapely.geometry import mapping, shape, box
+import rasterio
+from shapely import box
+from shapely.geometry import mapping, shape
from shapely.ops import linemerge, polygonize
from snail.core.intersections import (
@@ -33,29 +35,84 @@
POLYGON_COORDINATE_PRECISION = 9
-@dataclass
-class Transform:
- """Store a raster transform and CRS"""
+@dataclass(frozen=True)
+class GridDefinition:
+ """Store a raster transform and CRS
+
+ A note on `transform` - these six numbers define the transform from `i,j`
+ cell index (column/row) coordinates in the rectangular grid to `x,y`
+ geographic coordinates, in the coordinate reference system of the input and
+ output files. They effectively form the first two rows of a 3x3 matrix:
+
+
+ ```
+ | x | | a b c | | i |
+ | y | = | d e f | | j |
+ | 1 | | 0 0 1 | | 1 |
+ ```
+
+ In cases without shear or rotation, `a` and `e` define scaling or grid cell
+ size, while `c` and `f` define the offset or grid upper-left corner:
+
+ ```
+ | x_scale 0 x_offset |
+ | 0 y_scale y_offset |
+ | 0 0 1 |
+ ```
+ """
crs: str
width: int
height: int
transform: Tuple[float]
+ @classmethod
+ def from_rasterio_dataset(cls, dataset):
+ crs = dataset.crs
+ width = dataset.width
+ height = dataset.height
+ # trim transform to 6 - we expect the first two rows of 3x3 matrix
+ transform = tuple(dataset.transform)[:6]
+ return GridDefinition(crs, width, height, transform)
+
+ @classmethod
+ def from_raster(cls, fname):
+ with rasterio.open(fname) as dataset:
+ grid = GridDefinition.from_rasterio_dataset(dataset)
+ return grid
+
+ @classmethod
+ def from_extent(
+ cls,
+ xmin: float,
+ ymin: float,
+ xmax: float,
+ ymax: float,
+ cell_width: float,
+ cell_height: float,
+ crs,
+ ):
+ return GridDefinition(
+ crs=crs,
+ width=math.ceil((xmax - xmin) / cell_width),
+ height=math.ceil((ymax - ymin) / cell_height),
+ transform=(cell_width, 0, xmin, 0, cell_height, ymin),
+ )
+
def split_features_for_rasters(
features: geopandas.GeoDataFrame,
- transforms: List[Transform],
+ grids: List[GridDefinition],
split_func: Callable,
):
# lookup per transform
- for i, t in enumerate(transforms):
- logging.info("Splitting on transform %s %s", i, t)
+ for i, grid in enumerate(grids):
+ logging.info("Splitting on grid %s %s", i, grid)
# transform to grid CRS
- crs_features = features.to_crs(t.crs)
- crs_features = split_func(crs_features, t)
+ crs_features = features.to_crs(grid.crs)
+ crs_features = split_func(crs_features, grid)
# save cell index for fast lookup of raster values
- crs_features = apply_indices(crs_features, t, f"i_{i}", f"j_{i}")
+ crs_features = apply_indices(crs_features, grid, f"i_{i}", f"j_{i}")
# transform back
features = crs_features.to_crs(features.crs)
return features
@@ -80,9 +137,9 @@ def prepare_polygons(
def split_points(
- points: geopandas.GeoDataFrame, t: Transform
+ points: geopandas.GeoDataFrame, grid: GridDefinition
) -> geopandas.GeoDataFrame:
- """Split points along the grid defined by a transform
+ """Split points along a grid
This is a no-op, written for equivalence when processing multiple
geometry types.
@@ -91,14 +148,19 @@ def split_points(
def split_linestrings(
- linestring_features: geopandas.GeoDataFrame, t: Transform
+ linestring_features: geopandas.GeoDataFrame, grid: GridDefinition
) -> geopandas.GeoDataFrame:
- """Split linestrings along the grid defined by a transform"""
+ """Split linestrings along a grid"""
+ # TODO check for MultiLineString
+ # throw error or coerce (df.explode)
pieces = []
for i in tqdm(range(len(linestring_features))):
# split edge
geom_splits = split_linestring(
- linestring_features.geometry[i], t.width, t.height, t.transform
+ linestring_features.geometry[i],
+ grid.width,
+ grid.height,
+ grid.transform,
)
for j, s in enumerate(geom_splits):
# splitting sometimes returns zero-length linestrings on edge of raster
@@ -117,7 +179,9 @@ def split_linestrings(
logging.info(
f"Split {len(linestring_features)} edges into {len(pieces)} pieces"
)
- splits_df = geopandas.GeoDataFrame(pieces, crs=t.crs, geometry="geometry")
+ splits_df = geopandas.GeoDataFrame(
+ pieces, crs=grid.crs, geometry="geometry"
+ )
return splits_df
@@ -126,39 +190,38 @@ def _transform(i, j, a, b, c, d, e, f) -> Tuple[float]:
def split_polygons(
- polygon_features: geopandas.GeoDataFrame, t: Transform
+ polygon_features: geopandas.GeoDataFrame, grid: GridDefinition
) -> geopandas.GeoDataFrame:
- """Split polygons along the grid defined by a transform"""
- pieces = []
+ """Split polygons along a grid"""
##
- # Fairly slow but solid approach, loop over cells and
+ # Fairly slow but solid approach, generate cells as boxes and
# use geopandas (shapely/GEOS) intersection
##
- a, b, c, d, e, f = t.transform
- for i, j in tqdm(
- product(range(t.width), range(t.height)), total=t.width * t.height
- ):
- ulx, uly = _transform(i, j, a, b, c, d, e, f)
- lrx, lry = _transform(i + 1, j + 1, a, b, c, d, e, f)
- cell_geom = box(ulx, uly, lrx, lry)
- idx = polygon_features.geometry.sindex.query(cell_geom)
- subset = polygon_features.iloc[idx].copy()
- if len(subset):
- subset.geometry = subset.intersection(cell_geom)
- subset = subset[
- ~(subset.geometry.is_empty | subset.geometry.isna())
- ]
- subset = subset.explode(ignore_index=True)
- subset = subset[subset.geometry.type == "Polygon"]
- pieces.append(subset)
- splits_df = pandas.concat(pieces)
- return splits_df
+ box_geoms = generate_grid_boxes(grid)
+ splits = polygon_features.overlay(box_geoms, how="intersection")
+ splits = splits[~(splits.geometry.is_empty | splits.geometry.isna())]
+ splits = splits.explode(ignore_index=True)
+ splits = splits[splits.geometry.type == "Polygon"]
+ return splits
+
+
+def generate_grid_boxes(grid):
+ a, b, c, d, e, f = grid.transform
+ idx = numpy.arange(grid.width * grid.height)
+ i, j = numpy.unravel_index(idx, (grid.height, grid.width))
+ ulx = i * a + j * b + c
+ uly = i * d + j * e + f
+ lrx = (i + 1) * a + (j + 1) * b + c
+ lry = (i + 1) * d + (j + 1) * e + f
+ return geopandas.GeoDataFrame(
+ data={}, geometry=box(ulx, lry, lrx, uly), crs=grid.crs
+ )
def split_polygons_experimental(
- polygon_features: geopandas.GeoDataFrame, t: Transform
+ polygon_features: geopandas.GeoDataFrame, grid: GridDefinition
) -> geopandas.GeoDataFrame:
- """Split polygons along the grid defined by a transform
+ """Split polygons along a grid
Experimental implementation of `split_polygons`, possibly fast/incorrect
with some inputs.
@@ -177,7 +240,10 @@ def split_polygons_experimental(
for i in tqdm(range(len(polygon_features))):
# split area
geom_splits = split_polygon(
- polygon_features.geometry[i], t.width, t.height, t.transform
+ polygon_features.geometry[i],
+ grid.width,
+ grid.height,
+ grid.transform,
)
# round to high precision (avoid floating point errors)
geom_splits = [
@@ -196,7 +262,7 @@ def split_polygons_experimental(
f" Split {len(polygon_features)} areas into {len(pieces)} pieces"
)
splits_df = geopandas.GeoDataFrame(pieces)
- splits_df.crs = t.crs
+ splits_df.crs = grid.crs
return splits_df
@@ -215,8 +281,8 @@ def _set_precision(geom, precision):
return shape(geom_mapping)
-def associate_raster(
- features: pandas.DataFrame,
+def get_raster_values_for_splits(
+ splits: pandas.DataFrame,
data: numpy.ndarray,
index_i: str = "index_i",
index_j: str = "index_j",
@@ -228,54 +294,70 @@ def associate_raster(
N.B. This will pass through no data values from the raster (no filtering).
- Args:
- df: Table of features, each with cell indices
- to look up raster pixel. Indices must be stored under columns with
- names referenced by index_i and index_j.
- fname: Filename of raster file to read data from
- Returns:
- pd.Series: Series of raster values, with same row indexing as df.
+ Parameters
+ ----------
+ splits: pandas.DataFrame
+ Table of features, each with cell indices
+ to look up raster pixel. Indices must be stored under columns with
+ names referenced by index_i and index_j.
+ data: numpy.ndarray
+ Raster data (2D array)
+ index_i: str
+ Column name for i-indices
+ index_j: str
+ Column name for j-indices
+
+ Returns
+ -------
+ pd.Series
+ Series of raster values, with same row indexing as df.
"""
# 2D numpy indexing is j, i (i.e. row, column)
with_data = pandas.Series(
- index=features.index, data=data[features[index_j], features[index_i]]
+ index=splits.index, data=data[splits[index_j], splits[index_i]]
)
# set NaN for out-of-bounds
- with_data[
- (features[index_i] == -1) | (features[index_j] == -1)
- ] = numpy.nan
+ with_data[(splits[index_i] == -1) | (splits[index_j] == -1)] = numpy.nan
return with_data
def apply_indices(
features: geopandas.GeoDataFrame,
- transform: Transform,
+ grid: GridDefinition,
index_i="index_i",
index_j="index_j",
) -> geopandas.GeoDataFrame:
def f(geom, *args, **kwargs):
- return get_indices(geom, transform, index_i, index_j)
+ return get_indices(geom, grid, index_i, index_j)
indices = features.geometry.apply(f, result_type="expand")
return pandas.concat([features, indices], axis="columns")
def get_indices(
- geom, t: Transform, index_i="index_i", index_j="index_j"
+ geom, grid: GridDefinition, index_i="index_i", index_j="index_j"
) -> pandas.Series:
"""Given a geometry, find the cell index (i, j) of its midpoint
- for the enclosing raster transform.
+ for the enclosing grid.
N.B. There is no checking whether a geometry spans more than one cell.
"""
- i, j = get_cell_indices(geom, t.height, t.width, t.transform)
+ i, j = get_cell_indices(geom, grid.height, grid.width, grid.transform)
# Raise error if cell index would be out of bounds
# assert 0 <= i < t.width
# assert 0 <= j < t.height
# Or - special value (-1,-1) if cell would be out of bounds
- if i >= t.width or i < 0 or j >= t.height or j < 0:
+ if i >= grid.width or i < 0 or j >= grid.height or j < 0:
i = -1
j = -1
return pandas.Series(index=(index_i, index_j), data=[i, j])
+
+
+def idx_to_ij(idx: int, width: int, height: int) -> Tuple[int]:
+ return numpy.unravel_index(idx, (height, width))
+
+
+def ij_to_idx(ij: Tuple[int], width: int, height: int):
+ return numpy.ravel_multi_index(ij, (height, width))
diff --git a/src/snail/io.py b/src/snail/io.py
index 39a249a..d56c253 100644
--- a/src/snail/io.py
+++ b/src/snail/io.py
@@ -6,49 +6,60 @@
import pandas
import rasterio
-from snail.intersection import Transform, associate_raster
+from snail.intersection import GridDefinition, get_raster_values_for_splits
-def associate_raster_files(features, rasters):
+def associate_raster_files(splits, rasters):
+ """Read values from a list of raster files for a set of indexed split geometries
+
+ Parameters
+ ----------
+ splits: pandas.DataFrame
+ split geometries with raster indices in columns named "i_{grid_id}", "j_{grid_id}"
+ for each grid_id in `rasters`
+
+ rasters: pandas.DataFrame
+ table of raster metadata with columns: key, grid_id, path, bands
+
+ Returns
+ -------
+ pandas.DataFrame
+ split geometries with raster data values at indexed locations
+ """
# to prevent a fragmented dataframe (and a memory explosion), add series to a dict
# and then concat afterwards -- do not append to an existing dataframe
raster_data: dict[str, pandas.Series] = {}
# associate values
- for raster in rasters.itertuples():
+ for raster, band_number, band_data in read_rasters(rasters):
logging.info(
- "Associating values from raster %s transform %s",
+ "Associating values from raster %s grid %s band %s",
raster.key,
- raster.transform_id,
+ raster.grid_id,
+ band_number,
+ )
+ raster_data[raster.key] = get_raster_values_for_splits(
+ splits,
+ band_data,
+ f"i_{raster.grid_id}",
+ f"j_{raster.grid_id}",
)
- for band_number in raster.bands:
- raster_data[raster.key] = associate_raster_file(
- features,
- raster.path,
- f"i_{raster.transform_id}",
- f"j_{raster.transform_id}",
- band_number,
- )
raster_data = pandas.DataFrame(raster_data)
- features = pandas.concat([features, raster_data], axis="columns")
+ splits = pandas.concat([splits, raster_data], axis="columns")
- return features
+ return splits
-def associate_raster_file(
- df: pandas.DataFrame,
- fname: str,
- index_i: str = "index_i",
- index_j: str = "index_j",
- band_number: int = 1,
-) -> pandas.Series:
- band_data = read_band_data(fname, band_number)
- raster_values = associate_raster(df, band_data, index_i, index_j)
- return raster_values
+def read_rasters(rasters):
+ for raster in rasters.itertuples():
+ for band_number in raster.bands:
+ yield raster, band_number, read_raster_band_data(
+ raster.path, band_number
+ )
-def read_band_data(
+def read_raster_band_data(
fname: str,
band_number: int = 1,
) -> numpy.ndarray:
@@ -59,50 +70,50 @@ def read_band_data(
def extend_rasters_metadata(
rasters: pandas.DataFrame,
-) -> Tuple[pandas.DataFrame, List[Transform]]:
- transforms = []
- transform_ids = []
+) -> Tuple[pandas.DataFrame, List[GridDefinition]]:
+ grids = []
+ grid_ids = []
raster_bands = []
for raster in rasters.itertuples():
logging.info("Reading metadata from raster %s", raster.path)
- transform, bands = read_raster_metadata(raster.path)
+ grid, bands = read_raster_metadata(raster.path)
# add transform to list if not present
- if transform not in transforms:
- transforms.append(transform)
+ if grid not in grids:
+ grids.append(grid)
# record raster/transform details
- transform_id = transforms.index(transform)
- transform_ids.append(transform_id)
+ grid_id = grids.index(grid)
+ grid_ids.append(grid_id)
raster_bands.append(bands)
- rasters["transform_id"] = transform_ids
+ rasters["grid_id"] = grid_ids
if "bands" not in rasters.columns:
rasters["bands"] = raster_bands
- return rasters, transforms
+ return rasters, grids
-def read_raster_metadata(path) -> Tuple[Transform, Tuple[int]]:
+def read_raster_metadata(path) -> Tuple[GridDefinition, Tuple[int]]:
with rasterio.open(path) as dataset:
- crs = dataset.crs
- width = dataset.width
- height = dataset.height
- affine_transform = tuple(dataset.transform)[
- :6
- ] # trim to 6 - we expect the first two rows of 3x3 matrix
bands = dataset.indexes
- transform = Transform(crs, width, height, affine_transform)
- return transform, bands
+ grid = GridDefinition.from_rasterio_dataset(dataset)
+ return grid, bands
def read_features(path, layer=None):
if path.suffix in (".parquet", ".geoparquet"):
features = geopandas.read_parquet(path)
else:
+ try:
+ import pyogrio
+
+ engine = "pyogrio"
+ except ImportError:
+ engine = "fiona"
if layer:
- features = geopandas.read_file(path, layer=layer)
+ features = geopandas.read_file(path, layer=layer, engine=engine)
else:
- features = geopandas.read_file(path)
+ features = geopandas.read_file(path, engine=engine)
return features
diff --git a/tests/core/test_core.py b/tests/core/test_core.py
new file mode 100644
index 0000000..c50b916
--- /dev/null
+++ b/tests/core/test_core.py
@@ -0,0 +1,79 @@
+import pytest
+import snail.core.intersections
+
+from shapely.geometry import LineString, Polygon
+
+
+nrows = 2
+ncols = 2
+transform = [1, 0, 0, 0, 1, 0]
+
+
+@pytest.mark.parametrize(
+ "test_linestring,expected",
+ [
+ (
+ LineString([(0.5, 0.5), (0.75, 0.5), (1.5, 0.5), (1.5, 1.5)]),
+ [
+ LineString([(0.5, 0.5), (0.75, 0.5), (1.0, 0.5)]),
+ LineString([(1.0, 0.5), (1.5, 0.5), (1.5, 1.0)]),
+ LineString([(1.5, 1.0), (1.5, 1.5)]),
+ ],
+ ),
+ (
+ LineString([(0.5, 0.5), (0.75, 0.5), (1.5, 1.5)]),
+ [
+ LineString([(0.5, 0.5), (0.75, 0.5), (1.0, 0.8333333)]),
+ LineString([(1.0, 0.8333333), (1.125, 1.0)]),
+ LineString([(1.125, 1.0), (1.5, 1.5)]),
+ ],
+ ),
+ ],
+)
+def test_linestring_splitting(test_linestring, expected):
+ splits = snail.core.intersections.split_linestring(
+ test_linestring, nrows, ncols, transform
+ )
+ for split, expected_split in zip(splits, expected):
+ assert split.equals_exact(expected_split, 1e-7)
+
+
+@pytest.mark.parametrize(
+ "test_linestring,expected",
+ [
+ (
+ LineString([(0.25, 1.25), (0.5, 1.5), (0.5, 1.75)]),
+ (0, 1),
+ ),
+ (
+ LineString([(1.25, 1.25), (1.5, 1.5), (1.5, 1.75)]),
+ (1, 1),
+ ),
+ ],
+)
+def test_get_cell_indices(test_linestring, expected):
+ cell_indices = snail.core.intersections.get_cell_indices(
+ test_linestring, nrows, ncols, transform
+ )
+ assert cell_indices == expected
+
+
+@pytest.mark.xfail
+def test_split_polygons():
+ bad_poly = Polygon(
+ (
+ [-0.0062485600499826, 51.61041647955],
+ [-0.0062485600499826, 51.602083146149994],
+ [0.0020847733500204, 51.602083146149994],
+ # [0.0020847733500204, 51.61041647955],
+ # [-0.0062485600499826, 51.61041647955],
+ )
+ )
+
+ # expect a RuntimeError: Expected even number of crossings on gridline.
+ snail.core.intersections.split_polygon(
+ bad_poly,
+ 36082,
+ 18000,
+ (1000.0, 0.0, -18041000.0, 0.0, -1000.0, 9000000.0),
+ )
diff --git a/tests/core/test_intersections.py b/tests/core/test_intersections.py
deleted file mode 100644
index e3fc871..0000000
--- a/tests/core/test_intersections.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import unittest
-
-from shapely.geometry import LineString
-
-import snail.core.intersections
-
-
-class TestIntersections(unittest.TestCase):
- def setUp(self):
- self.nrows = 2
- self.ncols = 2
- self.transform = [1, 0, 0, 0, 1, 0]
-
- def test_linestring_splitting(self):
- test_linestrings = [
- LineString([(0.5, 0.5), (0.75, 0.5), (1.5, 0.5), (1.5, 1.5)]),
- LineString([(0.5, 0.5), (0.75, 0.5), (1.5, 1.5)]),
- ]
- expected = [
- [
- LineString([(0.5, 0.5), (0.75, 0.5), (1.0, 0.5)]),
- LineString([(1.0, 0.5), (1.5, 0.5), (1.5, 1.0)]),
- LineString([(1.5, 1.0), (1.5, 1.5)]),
- ],
- [
- LineString([(0.5, 0.5), (0.75, 0.5), (1.0, 0.8333)]),
- LineString([(1.0, 0.8333), (1.125, 1.0)]),
- LineString([(1.125, 1.0), (1.5, 1.5)]),
- ],
- ]
-
- for i, test_data in enumerate(zip(test_linestrings, expected)):
- test_linestring, expected_splits = test_data
- with self.subTest(i=i):
- splits = snail.core.intersections.split_linestring(
- test_linestring, self.nrows, self.ncols, self.transform
- )
- self.assertTrue(
- [
- split.equals_exact(expected_split, 0.5e-6)
- for split, expected_split in zip(
- splits, expected_splits
- )
- ]
- )
-
- def test_get_cell_indices(self):
- test_linestrings = [
- LineString([(0.25, 1.25), (0.5, 1.5), (0.5, 1.75)]),
- LineString([(1.25, 1.25), (1.5, 1.5), (1.5, 1.75)]),
- ]
- expected_cell_indices = [(0, 1), (1, 1)]
-
- for i, test_linestring in enumerate(test_linestrings):
- with self.subTest(i=i):
- cell_indices = snail.core.intersections.get_cell_indices(
- test_linestring, self.nrows, self.ncols, self.transform
- )
- self.assertEqual(cell_indices, expected_cell_indices[i])
diff --git a/tests/integration/paved-road-flood-depth-damage.csv b/tests/integration/paved-road-flood-depth-damage.csv
new file mode 100644
index 0000000..1a49f68
--- /dev/null
+++ b/tests/integration/paved-road-flood-depth-damage.csv
@@ -0,0 +1,8 @@
+# Unpaved road flood depth/damage curve
+inundation_depth_(m) road_unpaved unrelated
+0 0 a
+1 0.28 b
+2 0.46 c
+3 0.64 d
+4 0.82 e
+5 1 f
diff --git a/tests/integration/paved-road-flood-depth-damage.xlsx b/tests/integration/paved-road-flood-depth-damage.xlsx
new file mode 100644
index 0000000..4c709ed
Binary files /dev/null and b/tests/integration/paved-road-flood-depth-damage.xlsx differ
diff --git a/tests/integration/piecewise-linear-damage-curve.csv b/tests/integration/piecewise-linear-damage-curve.csv
new file mode 100644
index 0000000..cbc0b9d
--- /dev/null
+++ b/tests/integration/piecewise-linear-damage-curve.csv
@@ -0,0 +1,13 @@
+# Example damage curve,
+# - use # to add comment lines to a damage curve CSV
+# - default column names are "intensity" and ""damage_ratio"
+# - "damage_ratio" must be in the range [0, 1]
+# - ""intensity"" must be in some hazard/peril units and should be
+# documented in the header. E.g. "intensity is 10 minute mean surface
+# wind speeds in m/s" or "intensity is flood depth in metres"
+# - the header can also record source/citation details
+intensity,damage_ratio
+0,0
+10,0.1
+20,0.2
+30,1
diff --git a/tests/integration/piecewise-linear-damage-curve.xlsx b/tests/integration/piecewise-linear-damage-curve.xlsx
new file mode 100644
index 0000000..0071af6
Binary files /dev/null and b/tests/integration/piecewise-linear-damage-curve.xlsx differ
diff --git a/tests/split_polygons_rings.py b/tests/split_polygons_rings.py
deleted file mode 100644
index 751d772..0000000
--- a/tests/split_polygons_rings.py
+++ /dev/null
@@ -1,55 +0,0 @@
-expected_polygons_rings = [
- [
- (2.0, 0.875),
- (1.5, 0.25),
- (1.0, 0.875),
- (1.0, 1.0),
- (2.0, 1.0),
- (2.0, 0.875),
- ],
- [(2.1, 1.0), (2.0, 0.875), (2.0, 1.0), (2.1, 1.0)],
- [
- (2.5, 2.0),
- (2.5, 1.5),
- (2.1, 1.0),
- (2.0, 1.0),
- (2.0, 2.0),
- (2.5, 2.0),
- ],
- [
- (2.5, 3.0),
- (2.5, 2.0),
- (2.0, 2.0),
- (2.0, 2.875),
- (2.1, 3.0),
- (2.5, 3.0),
- ],
- [(2.1, 3.0), (2.5, 3.5), (2.5, 3.0), (2.1, 3.0)],
- [
- (1.0, 2.875),
- (1.5, 2.25),
- (2.0, 2.875),
- (2.0, 2.0),
- (1.0, 2.0),
- (1.0, 2.875),
- ],
- [
- (0.9, 3.0),
- (1.0, 2.875),
- (1.0, 2.0),
- (0.5, 2.0),
- (0.5, 3.0),
- (0.9, 3.0),
- ],
- [(0.5, 3.0), (0.5, 3.5), (0.9, 3.0), (0.5, 3.0)],
- [
- (0.9, 1.0),
- (0.5, 1.5),
- (0.5, 2.0),
- (1.0, 2.0),
- (1.0, 1.0),
- (0.9, 1.0),
- ],
- [(1.0, 0.875), (0.9, 1.0), (1.0, 1.0), (1.0, 0.875)],
- [(2.0, 1.0), (1.0, 1.0), (1.0, 2.0), (2.0, 2.0), (2.0, 1.0)],
-]
diff --git a/tests/test_damages.py b/tests/test_damages.py
new file mode 100644
index 0000000..915ae61
--- /dev/null
+++ b/tests/test_damages.py
@@ -0,0 +1,194 @@
+"""Test damage assessment"""
+import os
+
+import numpy
+import pandas
+import pytest
+from numpy.testing import assert_allclose
+from snail.damages import DamageCurve, PiecewiseLinearDamageCurve
+
+
+@pytest.fixture
+def curve():
+ curve_data = pandas.DataFrame(
+ {"intensity": [0.0, 10, 20, 30], "damage": [0, 0.1, 0.2, 1.0]}
+ )
+ return PiecewiseLinearDamageCurve(curve_data)
+
+
+def test_linear_curve(curve):
+ # check inheritance
+ assert isinstance(curve, DamageCurve)
+
+
+def test_equality(curve):
+ curve_copy = PiecewiseLinearDamageCurve(
+ pandas.DataFrame(
+ {
+ "intensity": [0.0, 10, 20, 30],
+ "damage": [0, 0.1, 0.2, 1.0],
+ }
+ )
+ )
+ assert curve == curve_copy
+
+
+def test_read_csv(curve):
+ fname = os.path.join(
+ os.path.dirname(__file__),
+ "integration",
+ "piecewise-linear-damage-curve.csv",
+ )
+ curve_from_file = PiecewiseLinearDamageCurve.from_csv(fname)
+ assert curve == curve_from_file
+
+
+def test_read_csv_arguments():
+ fname = os.path.join(
+ os.path.dirname(__file__),
+ "integration",
+ "paved-road-flood-depth-damage.csv",
+ )
+ actual = PiecewiseLinearDamageCurve.from_csv(
+ fname,
+ intensity_col="inundation_depth_(m)",
+ damage_col="road_unpaved",
+ sep="\t",
+ )
+ expected = PiecewiseLinearDamageCurve(
+ pandas.DataFrame(
+ {
+ "intensity": [0, 1, 2, 3, 4, 5],
+ "damage": [0, 0.28, 0.46, 0.64, 0.82, 1],
+ }
+ )
+ )
+ assert actual == expected
+
+
+def test_read_excel(curve):
+ fname = os.path.join(
+ os.path.dirname(__file__),
+ "integration",
+ "piecewise-linear-damage-curve.xlsx",
+ )
+ curve_from_file = PiecewiseLinearDamageCurve.from_excel(fname)
+ assert curve == curve_from_file
+
+
+def test_read_excel_arguments():
+ fname = os.path.join(
+ os.path.dirname(__file__),
+ "integration",
+ "paved-road-flood-depth-damage.xlsx",
+ )
+ actual = PiecewiseLinearDamageCurve.from_excel(
+ fname,
+ sheet_name="flood",
+ intensity_col="inundation_depth_(m)",
+ damage_col="road_unpaved",
+ )
+ expected = PiecewiseLinearDamageCurve(
+ pandas.DataFrame(
+ {
+ "intensity": [0, 1, 2, 3, 4, 5],
+ "damage": [0, 0.28, 0.46, 0.64, 0.82, 1],
+ }
+ )
+ )
+ assert actual == expected
+
+
+def test_linear_curve_pass_through(curve):
+ # check specified intensities give specified damages
+ assert_allclose(curve.damage_fraction(curve.intensity), curve.damage)
+
+
+def test_linear_curve_interpolation(curve):
+ # sense-check interpolation
+ intensities = numpy.array([5, 15, 25])
+ expected = numpy.array([0.05, 0.15, 0.6])
+ actual = curve.damage_fraction(intensities)
+ assert_allclose(actual, expected)
+
+
+def test_linear_curve_out_of_bounds(curve):
+ # sense-check out-of-bounds
+ intensities = numpy.array(
+ [numpy.NINF, -999, numpy.NZERO, 0, 30, 999, numpy.inf, numpy.nan]
+ )
+ expected = numpy.array([0, 0, 0, 0, 1, 1, 1, numpy.nan])
+ actual = curve.damage_fraction(intensities)
+ assert_allclose(actual, expected)
+
+
+def test_linear_curve_translate_y(curve):
+ increased = curve.translate_y(0.1)
+ expected = numpy.array([0.1, 0.2, 0.3, 1.0, 1.0])
+ assert_allclose(increased.damage, expected)
+
+ expected = numpy.array([0, 10, 20, 28.75, 30])
+ assert_allclose(increased.intensity, expected)
+
+
+def test_linear_curve_translate_y_down(curve):
+ decreased = curve.translate_y(-0.1)
+ expected = numpy.array([0, 0, 0.1, 0.9])
+ assert_allclose(decreased.damage, expected)
+
+ expected = numpy.array([0, 10, 20, 30])
+ assert_allclose(decreased.intensity, expected)
+
+
+def test_linear_curve_scale_y(curve):
+ increased = curve.scale_y(2)
+ expected = numpy.array([0, 0.2, 0.4, 1.0, 1.0])
+ assert_allclose(increased.damage, expected)
+
+ expected = numpy.array([0, 10, 20, 23.75, 30])
+ assert_allclose(increased.intensity, expected)
+
+
+def test_linear_curve_scale_y_down(curve):
+ decreased = curve.scale_y(0.1)
+ expected = numpy.array([0, 0.01, 0.02, 0.1])
+ assert_allclose(decreased.damage, expected)
+
+ expected = numpy.array([0, 10, 20, 30])
+ assert_allclose(decreased.intensity, expected)
+
+
+def test_linear_curve_translate_x(curve):
+ increased = curve.translate_x(5)
+ expected = numpy.array([0, 0.1, 0.2, 1.0])
+ assert_allclose(increased.damage, expected)
+
+ expected = numpy.array([5, 15, 25, 35])
+ assert_allclose(increased.intensity, expected)
+
+
+def test_linear_curve_translate_x_down(curve):
+ decreased = curve.translate_x(-5)
+ expected = numpy.array([0, 0.1, 0.2, 1.0])
+ assert_allclose(decreased.damage, expected)
+
+ expected = numpy.array([-5, 5, 15, 25])
+ assert_allclose(decreased.intensity, expected)
+
+
+def test_linear_curve_scale_x(curve):
+ increased = curve.scale_x(2)
+ expected = numpy.array([0, 0.1, 0.2, 1.0])
+ assert_allclose(increased.damage, expected)
+
+ expected = numpy.array([0, 20, 40, 60])
+ assert_allclose(increased.intensity, expected)
+
+
+def test_linear_curve_scale_x_down(curve):
+ decreased = curve.scale_x(0.1)
+ expected = numpy.array([0, 0.1, 0.2, 1.0])
+ assert_allclose(decreased.damage, expected)
+
+ expected = numpy.array([0, 1, 2, 3])
+ assert_allclose(decreased.intensity, expected)
diff --git a/tests/test_intersection.py b/tests/test_intersection.py
new file mode 100644
index 0000000..6f1dfd9
--- /dev/null
+++ b/tests/test_intersection.py
@@ -0,0 +1,212 @@
+import os
+
+import geopandas as gpd
+import numpy as np
+import pytest
+from hilbertcurve.hilbertcurve import HilbertCurve
+from numpy.testing import assert_array_equal
+from rasterio.crs import CRS
+from shapely.geometry import LineString, Polygon
+from shapely.geometry.polygon import LinearRing, orient
+
+from snail.intersection import (
+ GridDefinition,
+ split_linestrings,
+ split_polygons,
+)
+
+
+@pytest.fixture
+def linestrings():
+ test_linestrings = [
+ LineString([(0.5, 0.5), (0.75, 0.5), (1.5, 0.5), (1.5, 1.5)]),
+ LineString([(0.5, 0.5), (0.75, 0.5), (1.5, 1.5)]),
+ ]
+ gdf = gpd.GeoDataFrame(
+ {"col1": ["name1", "name2"], "geometry": test_linestrings}
+ )
+ return gdf
+
+
+@pytest.fixture
+def linestrings_split():
+ expected_splits = [
+ LineString([(0.5, 0.5), (0.75, 0.5), (1.0, 0.5)]),
+ LineString([(1.0, 0.5), (1.5, 0.5), (1.5, 1.0)]),
+ LineString([(1.5, 1.0), (1.5, 1.5)]),
+ ] + [
+ LineString([(0.5, 0.5), (0.75, 0.5), (1.0, 0.8333)]),
+ LineString([(1.0, 0.8333), (1.125, 1.0)]),
+ LineString([(1.125, 1.0), (1.5, 1.5)]),
+ ]
+ expected_gdf = gpd.GeoDataFrame(
+ {"col1": ["name1"] * 3 + ["name2"] * 3, "geometry": expected_splits},
+ index=[0] * 3 + [1] * 3,
+ )
+ return expected_gdf
+
+
+@pytest.fixture
+def polygon():
+ test_linearing = LinearRing(
+ [
+ (1.5, 0.25),
+ (2.5, 1.5),
+ (2.5, 3.5),
+ (1.5, 2.25),
+ (0.5, 3.5),
+ (0.5, 1.5),
+ ]
+ )
+ counter_clockwise = 1
+ test_polygon = orient(Polygon(test_linearing), counter_clockwise)
+ return gpd.GeoDataFrame({"col1": ["name1"], "geometry": [test_polygon]})
+
+
+@pytest.fixture
+def polygon_split():
+ rings = [
+ [
+ (0.9, 1.0),
+ (0.5, 1.5),
+ (0.5, 2.0),
+ (1.0, 2.0),
+ (1.0, 1.0),
+ (0.9, 1.0),
+ ],
+ [
+ (0.9, 3.0),
+ (1.0, 2.875),
+ (1.0, 2.0),
+ (0.5, 2.0),
+ (0.5, 3.0),
+ (0.9, 3.0),
+ ],
+ [(1.0, 0.875), (0.9, 1.0), (1.0, 1.0), (1.0, 0.875)],
+ [(0.5, 3.0), (0.5, 3.5), (0.9, 3.0), (0.5, 3.0)],
+ [
+ (2.0, 0.875),
+ (1.5, 0.25),
+ (1.0, 0.875),
+ (1.0, 1.0),
+ (2.0, 1.0),
+ (2.0, 0.875),
+ ],
+ [
+ (1.0, 2.875),
+ (1.5, 2.25),
+ (2.0, 2.875),
+ (2.0, 2.0),
+ (1.0, 2.0),
+ (1.0, 2.875),
+ ],
+ [(2.0, 1.0), (1.0, 1.0), (1.0, 2.0), (2.0, 2.0), (2.0, 1.0)],
+ [(2.1, 1.0), (2.0, 0.875), (2.0, 1.0), (2.1, 1.0)],
+ [
+ (2.5, 2.0),
+ (2.5, 1.5),
+ (2.1, 1.0),
+ (2.0, 1.0),
+ (2.0, 2.0),
+ (2.5, 2.0),
+ ],
+ [
+ (2.5, 3.0),
+ (2.5, 2.0),
+ (2.0, 2.0),
+ (2.0, 2.875),
+ (2.1, 3.0),
+ (2.5, 3.0),
+ ],
+ [(2.1, 3.0), (2.5, 3.5), (2.5, 3.0), (2.1, 3.0)],
+ ]
+ expected_polygons = [Polygon(ring) for ring in rings]
+ expected_idx = ["name1"] * len(rings)
+ expected_gdf = gpd.GeoDataFrame(
+ {"col1": expected_idx, "geometry": expected_polygons}
+ )
+ expected_gdf["index"] = 0
+ return expected_gdf.set_index("index")
+
+
+@pytest.fixture
+def grid():
+ return GridDefinition(
+ crs=None, width=4, height=4, transform=(1, 0, 0, 0, 1, 0)
+ )
+
+
+def test_grid_from_extent(grid):
+ actual = GridDefinition.from_extent(
+ xmin=0, ymin=0, xmax=4, ymax=4, cell_width=1, cell_height=1, crs=None
+ )
+ assert actual == grid
+
+
+def test_grid_from_raster():
+ fname = os.path.join(
+ os.path.dirname(__file__),
+ "integration",
+ "range.tif",
+ )
+ actual = GridDefinition.from_raster(fname)
+ expected = GridDefinition(
+ crs=CRS.from_epsg(4326),
+ width=23,
+ height=14,
+ transform=(
+ 0.008333333347826087,
+ 0.0,
+ -1.341666667,
+ 0.0,
+ -0.008333333285714315,
+ 51.808333333,
+ ),
+ )
+ assert actual == expected
+
+
+class TestSnailIntersections:
+ def test_split_linestrings(self, grid, linestrings, linestrings_split):
+ actual = split_linestrings(linestrings, grid)
+ expected_gdf = linestrings_split
+
+ # Assertions
+
+ # Ideally we'd like to use geopandas.assert_geodataframe_equal to
+ # to compare both expected and actual geodfs, but this function offers
+ # little control over tolerance. When using option "check_less_precise",
+ # it used GeoSeries.geom_almost_equals under the hood, which has an kwarg
+ # "decimal". But assert_geodataframe_equal does not recognise kwarg "decimal".
+ assert (
+ actual["geometry"]
+ .geom_almost_equals(expected_gdf["geometry"], decimal=3)
+ .values.all()
+ )
+ assert_array_equal(actual["col1"].values, expected_gdf["col1"].values)
+
+ def test_split_polygons(self, grid, polygon, polygon_split):
+ actual = sort_polygons(split_polygons(polygon, grid))
+ expected = sort_polygons(polygon_split)
+
+ for i in range(len(actual)):
+ actual_geom = actual.iloc[i, 1]
+ expected_geom = expected.iloc[i, 1]
+ assert actual_geom.equals(expected_geom)
+ assert_array_equal(actual["col1"].values, expected["col1"].values)
+
+
+def sort_polygons(df):
+ iterations = 6 # all coords must be <= (2**p - 1) ; 2**6 - 1 == 63
+ ndimensions = 2
+ hilbert_curve = HilbertCurve(iterations, ndimensions)
+ points = df.geometry.centroid
+ coords = np.array(
+ list(zip(points.x.values.tolist(), points.y.values.tolist()))
+ )
+ int_coords = (coords * 10).astype(int)
+ distances = hilbert_curve.distances_from_points(int_coords)
+ df["hilbert_distance"] = distances
+ return df.sort_values(by="hilbert_distance").drop(
+ columns="hilbert_distance"
+ )
diff --git a/tests/test_multi_intersections.py b/tests/test_multi_intersections.py
deleted file mode 100644
index 8b6682b..0000000
--- a/tests/test_multi_intersections.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import unittest
-
-from numpy.testing import assert_array_equal
-import geopandas as gpd
-from shapely.geometry import LineString, Polygon
-from shapely.geometry.polygon import LinearRing, orient
-
-from snail.intersection import (
- Transform,
- split_linestrings,
- split_polygons,
-)
-
-from split_polygons_rings import expected_polygons_rings
-
-
-def get_couple_of_linestrings():
- test_linestrings = [
- LineString([(0.5, 0.5), (0.75, 0.5), (1.5, 0.5), (1.5, 1.5)]),
- LineString([(0.5, 0.5), (0.75, 0.5), (1.5, 1.5)]),
- ]
- gdf = gpd.GeoDataFrame(
- {"col1": ["name1", "name2"], "geometry": test_linestrings}
- )
- return gdf
-
-
-def get_polygon_vector_data():
- test_linearing = LinearRing(
- [
- (1.5, 0.25),
- (2.5, 1.5),
- (2.5, 3.5),
- (1.5, 2.25),
- (0.5, 3.5),
- (0.5, 1.5),
- ]
- )
- counter_clockwise = 1
- test_polygon = orient(Polygon(test_linearing), counter_clockwise)
- return gpd.GeoDataFrame({"col1": ["name1"], "geometry": [test_polygon]})
-
-
-def get_split_polygons():
- expected_polygons = [Polygon(ring) for ring in expected_polygons_rings]
- expected_idx = ["name1"] * len(expected_polygons_rings)
- expected_gdf = gpd.GeoDataFrame(
- {"col1": expected_idx, "geometry": expected_polygons}
- )
- expected_gdf["index"] = 0
- return expected_gdf.set_index("index")
-
-
-def get_split_linestrings():
- expected_splits = [
- LineString([(0.5, 0.5), (0.75, 0.5), (1.0, 0.5)]),
- LineString([(1.0, 0.5), (1.5, 0.5), (1.5, 1.0)]),
- LineString([(1.5, 1.0), (1.5, 1.5)]),
- ] + [
- LineString([(0.5, 0.5), (0.75, 0.5), (1.0, 0.8333)]),
- LineString([(1.0, 0.8333), (1.125, 1.0)]),
- LineString([(1.125, 1.0), (1.5, 1.5)]),
- ]
- expected_gdf = gpd.GeoDataFrame(
- {"col1": ["name1"] * 3 + ["name2"] * 3, "geometry": expected_splits},
- index=[0] * 3 + [1] * 3,
- )
- return expected_gdf
-
-
-class TestSnailIntersections(unittest.TestCase):
- def setUp(self):
- self.raster_dataset = Transform(
- crs=None, width=4, height=4, transform=(1, 0, 0, 0, 1, 0)
- )
-
- def test_split_linestrings(self):
- vector_data = get_couple_of_linestrings()
- gdf = split_linestrings(vector_data, self.raster_dataset)
- expected_gdf = get_split_linestrings()
-
- # Assertions
-
- # Ideally we'd like to use geopandas.assert_geodataframe_equal to
- # to compare both expected and actual geodfs, but this function offers
- # little control over tolerance. When using option "check_less_precise",
- # it used GeoSeries.geom_almost_equals under the hood, which has an kwarg
- # "decimal". But assert_geodataframe_equal does not recognise kwarg "decimal".
- self.assertTrue(
- list(
- gdf["geometry"]
- .geom_almost_equals(expected_gdf["geometry"], decimal=3)
- .values
- )
- )
- assert_array_equal(gdf["col1"].values, expected_gdf["col1"].values)
-
- def test_split_polygons(self):
- vector_data = get_polygon_vector_data()
- gdf = split_polygons(vector_data, self.raster_dataset)
- expected_gdf = get_split_polygons()
- self.assertTrue(
- list(
- gdf["geometry"]
- .geom_almost_equals(expected_gdf["geometry"], decimal=3)
- .values
- )
- )
- assert_array_equal(gdf["col1"].values, expected_gdf["col1"].values)
diff --git a/tests/test_routing.py b/tests/test_routing.py
index 635d79a..ec3c050 100644
--- a/tests/test_routing.py
+++ b/tests/test_routing.py
@@ -1,30 +1,27 @@
-import unittest
from igraph import Graph
-
from snail.routing import shortest_paths
-class TestSnailRouting(unittest.TestCase):
- def test_shortest_paths(self):
- """
- e4
- 0--4
- e0 | |
- 1 | e3
- e1 | |
- 2--3
- e2
- """
- g = Graph.Ring(n=5, circular=True)
- g.vs["name"] = ["node_" + str(i) for i in range(5)]
- g.es["length_km"] = [1, 1, 1, 3, 1]
- sps = shortest_paths(
- ["node_0", "node_2"], ["node_4", "node_3"], g, "length_km"
- )
- expected_paths = [
- [4], # 0 to 4, along edge 4
- [0, 1, 2], # 0 to 3, avoids long edge 3
- [1, 0, 4], # 2 to 4, avoids long edge 3
- [2], # 2 to 3, along edge 2
- ]
- self.assertEqual(sps, expected_paths)
+def test_shortest_paths():
+ """
+ e4
+ 0--4
+ e0 | |
+ 1 | e3
+ e1 | |
+ 2--3
+ e2
+ """
+ g = Graph.Ring(n=5, circular=True)
+ g.vs["name"] = ["node_" + str(i) for i in range(5)]
+ g.es["length_km"] = [1, 1, 1, 3, 1]
+ actual = shortest_paths(
+ ["node_0", "node_2"], ["node_4", "node_3"], g, "length_km"
+ )
+ expected = [
+ [4], # 0 to 4, along edge 4
+ [0, 1, 2], # 0 to 3, avoids long edge 3
+ [1, 0, 4], # 2 to 4, avoids long edge 3
+ [2], # 2 to 3, along edge 2
+ ]
+ assert actual == expected
diff --git a/tutorials/01-data-preparation-ghana.ipynb b/tutorials/01-data-preparation-ghana.ipynb
index 273c233..64d8fc2 100644
--- a/tutorials/01-data-preparation-ghana.ipynb
+++ b/tutorials/01-data-preparation-ghana.ipynb
@@ -1,6 +1,7 @@
{
"cells": [
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -15,6 +16,27 @@
]
},
{
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# The os and subprocess modules are built into Python\n",
+ "# see https://docs.python.org/3/library/os.html\n",
+ "import os\n",
+ "\n",
+ "# see https://docs.python.org/3/library/subprocess.html\n",
+ "import subprocess\n",
+ "\n",
+ "# see https://docs.python.org/3/library/time.html\n",
+ "import time\n",
+ "\n",
+ "# see https://docs.python.org/3/library/pathlib.html\n",
+ "from pathlib import Path"
+ ]
+ },
+ {
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -29,21 +51,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# edit this if using a Mac (otherwise delete)\n",
- "data_folder = \"/Users/NAME/Desktop/ghana_tutorial\"\n",
+ "data_folder = Path(\"/Users/NAME/Desktop/ghana_tutorial\")\n",
"\n",
"# edit this if using Windows (otherwise delete)\n",
- "data_folder = \"C:\\\\Users\\\\NAME\\\\Desktop\\\\ghana_tutorial\"\n",
+ "data_folder = Path(\"C:/Users/NAME/Desktop/ghana_tutorial\")\n",
"\n",
"# delete this line\n",
- "data_folder = \"../data\""
+ "data_folder = Path(\"../data\")"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -52,17 +75,10 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
- "# The os and subprocess modules are built into Python\n",
- "# see https://docs.python.org/3/library/os.html\n",
- "import os\n",
- "\n",
- "# see https://docs.python.org/3/library/subprocess.html\n",
- "import subprocess\n",
- "\n",
"# Pandas and GeoPandas are libraries for working with datasets\n",
"# see https://geopandas.org/\n",
"import geopandas as gpd\n",
@@ -71,6 +87,14 @@
"# see https://pandas.pydata.org/\n",
"import pandas as pd\n",
"\n",
+ "# This package interacts with a risk data extract service, also accessible at\n",
+ "# https://global.infrastructureresilience.org/downloads\n",
+ "import irv_autopkg_client\n",
+ "\n",
+ "# We'll use snail to intersect roads with flooding\n",
+ "import snail.intersection\n",
+ "import snail.io\n",
+ "\n",
"# snkit helps generate connected networks from lines and nodes\n",
"# see https://snkit.readthedocs.io/\n",
"import snkit\n",
@@ -82,6 +106,7 @@
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -89,6 +114,7 @@
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -96,6 +122,7 @@
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -104,18 +131,17 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"roads = gpd.read_file(\n",
- " os.path.join(\n",
- " data_folder, \"ghana-latest-free.shp\", \"gis_osm_roads_free_1.shp\"\n",
- " )\n",
+ " data_folder / \"ghana-latest-free.shp\" / \"gis_osm_roads_free_1.shp\"\n",
")"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -124,23 +150,282 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " osm_id | \n",
+ " code | \n",
+ " fclass | \n",
+ " name | \n",
+ " ref | \n",
+ " oneway | \n",
+ " maxspeed | \n",
+ " layer | \n",
+ " bridge | \n",
+ " tunnel | \n",
+ " geometry | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 4790591 | \n",
+ " 5121 | \n",
+ " unclassified | \n",
+ " Airport Road | \n",
+ " NaN | \n",
+ " B | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " F | \n",
+ " F | \n",
+ " LINESTRING (-0.17184 5.60847, -0.17182 5.60849... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 4790592 | \n",
+ " 5122 | \n",
+ " residential | \n",
+ " Nortei Ababio Road | \n",
+ " NaN | \n",
+ " B | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " F | \n",
+ " F | \n",
+ " LINESTRING (-0.18282 5.61197, -0.18336 5.61198... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 4790594 | \n",
+ " 5115 | \n",
+ " tertiary | \n",
+ " Airport Road | \n",
+ " NaN | \n",
+ " F | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " F | \n",
+ " F | \n",
+ " LINESTRING (-0.17544 5.60550, -0.17418 5.60555... | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4790596 | \n",
+ " 5121 | \n",
+ " unclassified | \n",
+ " Airport Road | \n",
+ " NaN | \n",
+ " F | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " F | \n",
+ " F | \n",
+ " LINESTRING (-0.17207 5.60853, -0.17207 5.60844... | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4790597 | \n",
+ " 5122 | \n",
+ " residential | \n",
+ " Volta Road | \n",
+ " NaN | \n",
+ " B | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " F | \n",
+ " F | \n",
+ " LINESTRING (-0.18282 5.61197, -0.18280 5.61262... | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 338073 | \n",
+ " 1182192627 | \n",
+ " 5141 | \n",
+ " service | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " B | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " F | \n",
+ " F | \n",
+ " LINESTRING (-0.17508 5.71756, -0.17511 5.71756... | \n",
+ "
\n",
+ " \n",
+ " 338074 | \n",
+ " 1182192628 | \n",
+ " 5141 | \n",
+ " service | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " B | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " F | \n",
+ " F | \n",
+ " LINESTRING (-0.17501 5.71759, -0.17508 5.71756) | \n",
+ "
\n",
+ " \n",
+ " 338075 | \n",
+ " 1182192629 | \n",
+ " 5141 | \n",
+ " service | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " B | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " F | \n",
+ " F | \n",
+ " LINESTRING (-0.17506 5.71778, -0.17500 5.71764... | \n",
+ "
\n",
+ " \n",
+ " 338076 | \n",
+ " 1182207852 | \n",
+ " 5114 | \n",
+ " secondary | \n",
+ " Education Ridge Road | \n",
+ " R92 | \n",
+ " B | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " F | \n",
+ " F | \n",
+ " LINESTRING (-0.97456 9.56428, -0.97542 9.56413... | \n",
+ "
\n",
+ " \n",
+ " 338077 | \n",
+ " 1182207853 | \n",
+ " 5114 | \n",
+ " secondary | \n",
+ " Bontanga - Dalung Road | \n",
+ " R92 | \n",
+ " B | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " F | \n",
+ " F | \n",
+ " LINESTRING (-0.99413 9.58079, -0.99425 9.58107... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
338078 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " osm_id code fclass name ref oneway \\\n",
+ "0 4790591 5121 unclassified Airport Road NaN B \n",
+ "1 4790592 5122 residential Nortei Ababio Road NaN B \n",
+ "2 4790594 5115 tertiary Airport Road NaN F \n",
+ "3 4790596 5121 unclassified Airport Road NaN F \n",
+ "4 4790597 5122 residential Volta Road NaN B \n",
+ "... ... ... ... ... ... ... \n",
+ "338073 1182192627 5141 service NaN NaN B \n",
+ "338074 1182192628 5141 service NaN NaN B \n",
+ "338075 1182192629 5141 service NaN NaN B \n",
+ "338076 1182207852 5114 secondary Education Ridge Road R92 B \n",
+ "338077 1182207853 5114 secondary Bontanga - Dalung Road R92 B \n",
+ "\n",
+ " maxspeed layer bridge tunnel \\\n",
+ "0 0 0 F F \n",
+ "1 0 0 F F \n",
+ "2 0 0 F F \n",
+ "3 0 0 F F \n",
+ "4 0 0 F F \n",
+ "... ... ... ... ... \n",
+ "338073 0 0 F F \n",
+ "338074 0 0 F F \n",
+ "338075 0 0 F F \n",
+ "338076 0 0 F F \n",
+ "338077 0 0 F F \n",
+ "\n",
+ " geometry \n",
+ "0 LINESTRING (-0.17184 5.60847, -0.17182 5.60849... \n",
+ "1 LINESTRING (-0.18282 5.61197, -0.18336 5.61198... \n",
+ "2 LINESTRING (-0.17544 5.60550, -0.17418 5.60555... \n",
+ "3 LINESTRING (-0.17207 5.60853, -0.17207 5.60844... \n",
+ "4 LINESTRING (-0.18282 5.61197, -0.18280 5.61262... \n",
+ "... ... \n",
+ "338073 LINESTRING (-0.17508 5.71756, -0.17511 5.71756... \n",
+ "338074 LINESTRING (-0.17501 5.71759, -0.17508 5.71756) \n",
+ "338075 LINESTRING (-0.17506 5.71778, -0.17500 5.71764... \n",
+ "338076 LINESTRING (-0.97456 9.56428, -0.97542 9.56413... \n",
+ "338077 LINESTRING (-0.99413 9.58079, -0.99425 9.58107... \n",
+ "\n",
+ "[338078 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"roads"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['unclassified', 'residential', 'tertiary', 'tertiary_link',\n",
+ " 'secondary', 'trunk', 'service', 'primary', 'trunk_link',\n",
+ " 'primary_link', 'secondary_link', 'footway', 'path', 'track',\n",
+ " 'motorway', 'track_grade3', 'track_grade4', 'motorway_link',\n",
+ " 'steps', 'pedestrian', 'bridleway', 'cycleway', 'track_grade2',\n",
+ " 'track_grade5', 'track_grade1', 'living_street'], dtype=object)"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"roads.fclass.unique()"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -148,6 +433,7 @@
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -156,7 +442,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@@ -188,6 +474,7 @@
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -199,7 +486,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
@@ -208,7 +495,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
@@ -223,6 +510,7 @@
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -231,7 +519,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
@@ -241,24 +529,166 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " osm_id | \n",
+ " road_type | \n",
+ " name | \n",
+ " geometry | \n",
+ " id | \n",
+ " from_id | \n",
+ " to_id | \n",
+ " length_m | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 15684 | \n",
+ " 1181982913 | \n",
+ " secondary | \n",
+ " Kumbungu-Zangbalung road | \n",
+ " LINESTRING (-0.95804 9.56291, -0.95811 9.56294... | \n",
+ " roade_15684 | \n",
+ " roadn_12219 | \n",
+ " roadn_12220 | \n",
+ " 1870.991174 | \n",
+ "
\n",
+ " \n",
+ " 15685 | \n",
+ " 1182141809 | \n",
+ " secondary_link | \n",
+ " NaN | \n",
+ " LINESTRING (-1.59420 6.65761, -1.59426 6.65768... | \n",
+ " roade_15685 | \n",
+ " roadn_12221 | \n",
+ " roadn_12216 | \n",
+ " 47.244512 | \n",
+ "
\n",
+ " \n",
+ " 15686 | \n",
+ " 1182207852 | \n",
+ " secondary | \n",
+ " Education Ridge Road | \n",
+ " LINESTRING (-0.97456 9.56428, -0.97542 9.56413... | \n",
+ " roade_15686 | \n",
+ " roadn_12220 | \n",
+ " roadn_8005 | \n",
+ " 2242.279664 | \n",
+ "
\n",
+ " \n",
+ " 15687 | \n",
+ " 1182207852 | \n",
+ " secondary | \n",
+ " Education Ridge Road | \n",
+ " LINESTRING (-0.99028 9.57190, -0.99202 9.57587... | \n",
+ " roade_15687 | \n",
+ " roadn_8005 | \n",
+ " roadn_12222 | \n",
+ " 1069.950243 | \n",
+ "
\n",
+ " \n",
+ " 15688 | \n",
+ " 1182207853 | \n",
+ " secondary | \n",
+ " Bontanga - Dalung Road | \n",
+ " LINESTRING (-0.99413 9.58079, -0.99425 9.58107... | \n",
+ " roade_15688 | \n",
+ " roadn_12222 | \n",
+ " roadn_8006 | \n",
+ " 6604.650117 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " osm_id road_type name \\\n",
+ "15684 1181982913 secondary Kumbungu-Zangbalung road \n",
+ "15685 1182141809 secondary_link NaN \n",
+ "15686 1182207852 secondary Education Ridge Road \n",
+ "15687 1182207852 secondary Education Ridge Road \n",
+ "15688 1182207853 secondary Bontanga - Dalung Road \n",
+ "\n",
+ " geometry id \\\n",
+ "15684 LINESTRING (-0.95804 9.56291, -0.95811 9.56294... roade_15684 \n",
+ "15685 LINESTRING (-1.59420 6.65761, -1.59426 6.65768... roade_15685 \n",
+ "15686 LINESTRING (-0.97456 9.56428, -0.97542 9.56413... roade_15686 \n",
+ "15687 LINESTRING (-0.99028 9.57190, -0.99202 9.57587... roade_15687 \n",
+ "15688 LINESTRING (-0.99413 9.58079, -0.99425 9.58107... roade_15688 \n",
+ "\n",
+ " from_id to_id length_m \n",
+ "15684 roadn_12219 roadn_12220 1870.991174 \n",
+ "15685 roadn_12221 roadn_12216 47.244512 \n",
+ "15686 roadn_12220 roadn_8005 2242.279664 \n",
+ "15687 roadn_8005 roadn_12222 1069.950243 \n",
+ "15688 roadn_12222 roadn_8006 6604.650117 "
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"roads.tail()"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\n",
+ "Name: WGS 84\n",
+ "Axis Info [ellipsoidal]:\n",
+ "- Lat[north]: Geodetic latitude (degree)\n",
+ "- Lon[east]: Geodetic longitude (degree)\n",
+ "Area of Use:\n",
+ "- name: World.\n",
+ "- bounds: (-180.0, -90.0, 180.0, 90.0)\n",
+ "Datum: World Geodetic System 1984 ensemble\n",
+ "- Ellipsoid: WGS 84\n",
+ "- Prime Meridian: Greenwich"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "roads.crs = {\"init\": \"epsg:4326\"}\n",
- "road_nodes.crs = {\"init\": \"epsg:4326\"}"
+ "roads.set_crs(4326, inplace=True)\n",
+ "road_nodes.set_crs(4326, inplace=True)\n",
+ "road_nodes.crs"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -267,127 +697,201 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"roads.to_file(\n",
- " os.path.join(data_folder, \"GHA_OSM_roads.gpkg\"),\n",
+ " data_folder / \"GHA_OSM_roads.gpkg\",\n",
" layer=\"edges\",\n",
" driver=\"GPKG\",\n",
")\n",
"road_nodes.to_file(\n",
- " os.path.join(data_folder, \"GHA_OSM_roads.gpkg\"),\n",
+ " data_folder / \"GHA_OSM_roads.gpkg\",\n",
" layer=\"nodes\",\n",
" driver=\"GPKG\",\n",
")"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
- "## Activity 2: Extract and polygonise hazard data"
+ "## Activity 2: Extract hazard data"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
- "### Step 1) Download flood hazard data from Aqueduct"
+ "The full [Aqueduct dataset](https://www.wri.org/resources/data-sets/aqueduct-floods-hazard-maps) is available to download openly. \n",
+ "\n",
+ "Country-level extracts are available through the [Global Systemic Risk Assessment Tool (G-SRAT)](https://global.infrastructureresilience.org/downloads/). This section uses that service to download an extract for Ghana."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "country_iso = \"gha\""
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
- "The full [Aqueduct dataset](https://www.wri.org/resources/data-sets/aqueduct-floods-hazard-maps) is available to download openly. There are some scripts and summary of the data you may find useful at [nismod/aqueduct](https://github.com/nismod/aqueduct).\n",
- "\n",
- "There are almost 700 files in the full Aqueduct dataset, of up to around 100MB each, so we don't recommend downloading all of them unless you intend to do further analysis.\n",
- "\n",
- "For later tutorials, we provide a preprocessed set of hazard polygons for the Ghana example. \n",
- "\n",
- "The next steps show how to clip a region out of the global dataset and polygonise it, in case you wish to reproduce this analysis in another part of the world.\n",
- "\n",
- "For now, we suggest downloading [inunriver_historical_000000000WATCH_1980_rp00100.tif](http://wri-projects.s3.amazonaws.com/AqueductFloodTool/download/v2/inunriver_historical_000000000WATCH_1980_rp00100.tif) to work through the next steps. Save the downloaded file in a new folder titled `flood_layer` under your data_folder."
+ "Create a client to connect to the data API:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "client = irv_autopkg_client.Client()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "job_id = client.job_submit(country_iso, [\"wri_aqueduct.version_2\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Processing...\n"
+ ]
+ }
+ ],
+ "source": [
+ "while not client.job_complete(job_id):\n",
+ " print(\"Processing...\")\n",
+ " time.sleep(1)"
]
},
{
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "client.extract_download(\n",
+ " country_iso,\n",
+ " data_folder / \"flood_layer\",\n",
+ " # there may be other datasets available, but only download the following\n",
+ " dataset_filter=[\"wri_aqueduct.version_2\"],\n",
+ " overwrite=True,\n",
+ ")"
+ ]
+ },
+ {
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
- "### Step 2) Run the code below to polygonise the tif files\n",
+ "### Alternative: download flood hazard data from Aqueduct"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The full [Aqueduct dataset](https://www.wri.org/resources/data-sets/aqueduct-floods-hazard-maps) is available to download. There are some scripts and summary of the data you may find useful at [nismod/aqueduct](https://github.com/nismod/aqueduct).\n",
+ "\n",
+ "There are almost 700 files in the full Aqueduct dataset, of up to around 100MB each, so we don't recommend downloading all of them unless you intend to do further analysis.\n",
"\n",
- "This converts the flood maps from *tiff files (raster data)* into *shape files (vector data)*. It will take a little time to run."
+ "The next steps show how to clip a region out of the global dataset, in case you prefer to work from the original global Aqueduct files.\n",
+ "\n",
+ "To follow this step, we suggest downloading [inunriver_historical_000000000WATCH_1980_rp00100.tif](http://wri-projects.s3.amazonaws.com/AqueductFloodTool/download/v2/inunriver_historical_000000000WATCH_1980_rp00100.tif) to work through the next steps. Save the downloaded file in a new folder titled `flood_layer` under your data_folder."
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 19,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Looking in ../data/flood_layer\n",
+ "Found tif file inunriver_historical_000000000WATCH_1980_rp00100.tif\n",
+ "['gdalwarp', '-te', '-3.262509', '4.737128', '1.187968', '11.162937', '../data/flood_layer/inunriver_historical_000000000WATCH_1980_rp00100.tif', '../data/flood_layer/gha/wri_aqueduct_version_2/inunriver_historical_000000000WATCH_1980_rp00100-gha.tif']\n",
+ "Creating output file that is 534P x 771L.\n",
+ "Processing ../data/flood_layer/inunriver_historical_000000000WATCH_1980_rp00100.tif [1/1] : 0Using internal nodata values (e.g. -9999) for image ../data/flood_layer/inunriver_historical_000000000WATCH_1980_rp00100.tif.\n",
+ "Copying nodata values from source ../data/flood_layer/inunriver_historical_000000000WATCH_1980_rp00100.tif to destination ../data/flood_layer/gha/wri_aqueduct_version_2/inunriver_historical_000000000WATCH_1980_rp00100-gha.tif.\n",
+ "...10...20...30...40...50...60...70...80...90...100 - done.\n",
+ "\n",
+ "\n",
+ "../data/flood_layer/gha/wri_aqueduct_version_2/inunriver_historical_000000000WATCH_1980_rp00100-gha.tif\n",
+ "Looking in ../data/flood_layer/gha\n",
+ "Looking in ../data/flood_layer/gha/wri_aqueduct_version_2\n"
+ ]
+ }
+ ],
"source": [
"xmin = \"-3.262509\"\n",
"ymin = \"4.737128\"\n",
"xmax = \"1.187968\"\n",
"ymax = \"11.162937\"\n",
"\n",
- "for root, dirs, files in os.walk(data_folder, \"flood_layer\"):\n",
+ "for root, dirs, files in os.walk(os.path.join(data_folder, \"flood_layer\")):\n",
" print(\"Looking in\", root)\n",
- " for file in sorted(files):\n",
- " if file.endswith(\".tif\") and not file.endswith(\"p.tif\"):\n",
- " print(\"Found tif file\", file)\n",
- " stem = file[:-4]\n",
- " input_file = os.path.join(root, file)\n",
+ " for file_ in sorted(files):\n",
+ " if file_.endswith(\".tif\") and not file_.endswith(\n",
+ " f\"-{country_iso}.tif\"\n",
+ " ):\n",
+ " print(\"Found tif file\", file_)\n",
+ " stem = file_[:-4]\n",
+ " input_file = os.path.join(root, file_)\n",
"\n",
" # Clip file to bounds\n",
- " clip_file = os.path.join(root, f\"{stem}_clip.tif\")\n",
- " try:\n",
- " os.remove(clip_file)\n",
- " except FileNotFoundError:\n",
- " pass\n",
- " p = subprocess.run(\n",
- " [\n",
- " \"gdalwarp\",\n",
- " \"-te\",\n",
- " xmin,\n",
- " ymin,\n",
- " xmax,\n",
- " ymax,\n",
- " input_file,\n",
- " clip_file,\n",
- " ],\n",
- " capture_output=True,\n",
+ " clip_file = os.path.join(\n",
+ " root,\n",
+ " \"gha\",\n",
+ " \"wri_aqueduct_version_2\",\n",
+ " f\"{stem}-{country_iso}.tif\",\n",
" )\n",
- " print(p.stdout.decode(\"utf8\"))\n",
- " print(p.stderr.decode(\"utf8\"))\n",
- " print(clip_file)\n",
- "\n",
- " # Create vector outline of raster areas\n",
- " # note that this rounds the floating-point values of flood depth from\n",
- " # the raster to the nearest integer in the vector outlines\n",
- " polygons_file = os.path.join(root, f\"{stem}.gpkg\")\n",
" try:\n",
- " os.remove(polygons_file)\n",
+ " os.remove(clip_file)\n",
" except FileNotFoundError:\n",
" pass\n",
- " p = subprocess.run(\n",
- " [\n",
- " \"gdal_polygonize.py\",\n",
- " clip_file,\n",
- " \"-q\",\n",
- " \"-f\",\n",
- " \"GPKG\",\n",
- " polygons_file,\n",
- " ],\n",
- " capture_output=True,\n",
- " )\n",
+ " cmd = [\n",
+ " \"gdalwarp\",\n",
+ " \"-te\",\n",
+ " xmin,\n",
+ " ymin,\n",
+ " xmax,\n",
+ " ymax,\n",
+ " input_file,\n",
+ " clip_file,\n",
+ " ]\n",
+ " print(cmd)\n",
+ " p = subprocess.run(cmd, capture_output=True)\n",
" print(p.stdout.decode(\"utf8\"))\n",
" print(p.stderr.decode(\"utf8\"))\n",
- " print(polygons_file)"
+ " print(clip_file)"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -395,6 +899,7 @@
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -402,6 +907,7 @@
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -410,27 +916,44 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
- "flood_path = os.path.join(\n",
+ "flood_path = Path(\n",
" data_folder,\n",
" \"flood_layer\",\n",
- " \"inunriver_historical_000000000WATCH_1980_rp00100.gpkg\",\n",
+ " \"gha\",\n",
+ " \"wri_aqueduct_version_2\",\n",
+ " \"inunriver_historical_000000000WATCH_1980_rp00100-gha.tif\",\n",
")\n",
"\n",
- "output_path = os.path.join(\n",
+ "output_path = Path(\n",
" data_folder,\n",
" \"results\",\n",
- " \"inunriver_historical_000000000WATCH_1980_rp00100_exposure.gpkg\",\n",
- ")\n",
- "\n",
- "flood = gpd.read_file(flood_path).rename(columns={\"DN\": \"depth_m\"})\n",
- "flood = flood[flood.depth_m > 0]"
+ " \"inunriver_historical_000000000WATCH_1980_rp00100__roads_exposure.gpkg\",\n",
+ ")"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Read in pre-processed road edges, as created earlier."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "roads = gpd.read_file(data_folder / \"GHA_OSM_roads.gpkg\", layer=\"edges\")"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -439,15 +962,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
- "flood_intersections = gpd.overlay(GHA_OSM_roads, flood, how=\"intersection\")\n",
- "flood_intersections"
+ "grid, bands = snail.io.read_raster_metadata(flood_path)\n",
+ "\n",
+ "prepared = snail.intersection.prepare_linestrings(roads)\n",
+ "flood_intersections = snail.intersection.split_linestrings(prepared, grid)\n",
+ "flood_intersections = snail.intersection.apply_indices(\n",
+ " flood_intersections, grid\n",
+ ")\n",
+ "flood_data = snail.io.read_raster_band_data(flood_path)\n",
+ "flood_intersections[\n",
+ " \"inunriver__epoch_historical__rcp_baseline__rp_100\"\n",
+ "] = snail.intersection.get_raster_values_for_splits(\n",
+ " flood_intersections, flood_data\n",
+ ")"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -456,7 +991,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
@@ -468,14 +1003,115 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 29,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " osm_id | \n",
+ " road_type | \n",
+ " name | \n",
+ " id | \n",
+ " from_id | \n",
+ " to_id | \n",
+ " length_m | \n",
+ " geometry | \n",
+ " split | \n",
+ " index_i | \n",
+ " index_j | \n",
+ " inunriver__epoch_historical__rcp_baseline__rp_100 | \n",
+ " flood_length_m | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 15688 | \n",
+ " 1182207853 | \n",
+ " secondary | \n",
+ " Bontanga - Dalung Road | \n",
+ " roade_15688 | \n",
+ " roadn_12222 | \n",
+ " roadn_8006 | \n",
+ " 6604.650117 | \n",
+ " LINESTRING (-1.00963 9.62941, -1.01021 9.63122... | \n",
+ " 8 | \n",
+ " 270 | \n",
+ " 183 | \n",
+ " 0.0 | \n",
+ " 782.156843 | \n",
+ "
\n",
+ " \n",
+ " 15688 | \n",
+ " 1182207853 | \n",
+ " secondary | \n",
+ " Bontanga - Dalung Road | \n",
+ " roade_15688 | \n",
+ " roadn_12222 | \n",
+ " roadn_8006 | \n",
+ " 6604.650117 | \n",
+ " LINESTRING (-1.01227 9.63597, -1.01230 9.63605... | \n",
+ " 9 | \n",
+ " 269 | \n",
+ " 183 | \n",
+ " 0.0 | \n",
+ " 135.659825 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " osm_id road_type name id \\\n",
+ "15688 1182207853 secondary Bontanga - Dalung Road roade_15688 \n",
+ "15688 1182207853 secondary Bontanga - Dalung Road roade_15688 \n",
+ "\n",
+ " from_id to_id length_m \\\n",
+ "15688 roadn_12222 roadn_8006 6604.650117 \n",
+ "15688 roadn_12222 roadn_8006 6604.650117 \n",
+ "\n",
+ " geometry split index_i \\\n",
+ "15688 LINESTRING (-1.00963 9.62941, -1.01021 9.63122... 8 270 \n",
+ "15688 LINESTRING (-1.01227 9.63597, -1.01230 9.63605... 9 269 \n",
+ "\n",
+ " index_j inunriver__epoch_historical__rcp_baseline__rp_100 \\\n",
+ "15688 183 0.0 \n",
+ "15688 183 0.0 \n",
+ "\n",
+ " flood_length_m \n",
+ "15688 782.156843 \n",
+ "15688 135.659825 "
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "flood_intersections.tail()"
+ "flood_intersections.tail(2)"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -484,69 +1120,118 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 30,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "728.5879687723159"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "exposed_length = flood_intersections.flood_length_m.sum()\n",
- "exposed_length"
+ "exposed_1m = flood_intersections[\n",
+ " flood_intersections.inunriver__epoch_historical__rcp_baseline__rp_100 >= 1\n",
+ "]\n",
+ "exposed_length_km = exposed_1m.flood_length_m.sum() * 1e-3\n",
+ "exposed_length_km"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 31,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "29069.876011778793"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "all_roads_in_dataset_length = GHA_OSM_roads.length_m.sum()\n",
- "all_roads_in_dataset_length"
+ "all_roads_in_dataset_length_km = roads.length_m.sum() * 1e-3\n",
+ "all_roads_in_dataset_length_km"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 32,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.025063332519103282"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "proportion = exposed_length / all_roads_in_dataset_length\n",
+ "proportion = exposed_length_km / all_roads_in_dataset_length_km\n",
"proportion"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 33,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'2.5% of roads in this dataset are exposed to flood depths of >= 1m in a historical 1-in-100 year flood'"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "f\"{proportion:.0%} of roads in this dataset are exposed to flood depths of >= 1m in a historical 1-in-100 year flood\""
+ "f\"{proportion:.1%} of roads in this dataset are exposed to flood depths of >= 1m in a historical 1-in-100 year flood\""
]
},
{
- "cell_type": "markdown",
+ "cell_type": "code",
+ "execution_count": 34,
"metadata": {},
+ "outputs": [],
"source": [
- "Save to file (with spatial data)"
+ "output_path.parent.mkdir(parents=True, exist_ok=True)"
]
},
{
- "cell_type": "code",
- "execution_count": null,
+ "attachments": {},
+ "cell_type": "markdown",
"metadata": {},
- "outputs": [],
"source": [
- "flood_intersections.to_file(output_path, driver=\"GPKG\")"
+ "Save to file (with spatial data)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
- "flood_intersections"
+ "flood_intersections.to_file(output_path, driver=\"GPKG\")"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -555,12 +1240,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"flood_intersections.drop(columns=\"geometry\").to_csv(\n",
- " output_path.replace(\".gpkg\", \".csv\")\n",
+ " output_path.parent / output_path.name.replace(\".gpkg\", \".csv\")\n",
")"
]
}
@@ -581,7 +1266,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.10"
+ "version": "3.11.4"
}
},
"nbformat": 4,
diff --git a/tutorials/02-assess-damage-and-disruption.ipynb b/tutorials/02-assess-damage-and-disruption.ipynb
index 5c7555b..2bf11a0 100644
--- a/tutorials/02-assess-damage-and-disruption.ipynb
+++ b/tutorials/02-assess-damage-and-disruption.ipynb
@@ -1,6 +1,7 @@
{
"cells": [
{
+ "attachments": {},
"cell_type": "markdown",
"id": "spoken-texture",
"metadata": {},
@@ -26,16 +27,14 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"id": "brief-stephen",
"metadata": {},
"outputs": [],
"source": [
"# Imports from Python standard library\n",
"import os\n",
- "\n",
- "# see https://docs.python.org/3/library/warnings.html\n",
- "import warnings\n",
+ "from pathlib import Path\n",
"\n",
"# see https://docs.python.org/3/library/glob.html\n",
"from glob import glob\n",
@@ -52,6 +51,13 @@
"# seaborn helps produce more complex plots\n",
"# see https://seaborn.pydata.org/\n",
"import seaborn as sns\n",
+ "\n",
+ "from scipy.integrate import simpson\n",
+ "\n",
+ "import snail.damages\n",
+ "import snail.intersection\n",
+ "import snail.io\n",
+ "\n",
"from pyproj import Geod\n",
"\n",
"# tqdm lets us show progress bars (and تقدّم means \"progress\" in Arabic)\n",
@@ -60,6 +66,7 @@
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"id": "characteristic-reputation",
"metadata": {},
@@ -69,15 +76,16 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"id": "twelve-threat",
"metadata": {},
"outputs": [],
"source": [
- "data_folder = \"../data\""
+ "data_folder = Path(\"../data\")"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"id": "hired-knife",
"metadata": {},
@@ -86,6 +94,7 @@
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"id": "defensive-passion",
"metadata": {},
@@ -95,30 +104,178 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"id": "driven-restoration",
"metadata": {},
- "outputs": [],
- "source": [
- "hazard_files = sorted(glob(os.path.join(data_folder, \"flood_layer/*.gpkg\")))\n",
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " path | \n",
+ " key | \n",
+ " grid_id | \n",
+ " bands | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " ../data/flood_layer/gha/wri_aqueduct_version_2... | \n",
+ " wri_aqueduct-version_2-inuncoast_historical_wt... | \n",
+ " 0 | \n",
+ " (1,) | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " ../data/flood_layer/gha/wri_aqueduct_version_2... | \n",
+ " wri_aqueduct-version_2-inuncoast_historical_wt... | \n",
+ " 0 | \n",
+ " (1,) | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " ../data/flood_layer/gha/wri_aqueduct_version_2... | \n",
+ " wri_aqueduct-version_2-inuncoast_historical_wt... | \n",
+ " 0 | \n",
+ " (1,) | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " ../data/flood_layer/gha/wri_aqueduct_version_2... | \n",
+ " wri_aqueduct-version_2-inuncoast_historical_wt... | \n",
+ " 0 | \n",
+ " (1,) | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " ../data/flood_layer/gha/wri_aqueduct_version_2... | \n",
+ " wri_aqueduct-version_2-inuncoast_historical_wt... | \n",
+ " 0 | \n",
+ " (1,) | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 374 | \n",
+ " ../data/flood_layer/gha/wri_aqueduct_version_2... | \n",
+ " wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-... | \n",
+ " 0 | \n",
+ " (1,) | \n",
+ "
\n",
+ " \n",
+ " 375 | \n",
+ " ../data/flood_layer/gha/wri_aqueduct_version_2... | \n",
+ " wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-... | \n",
+ " 0 | \n",
+ " (1,) | \n",
+ "
\n",
+ " \n",
+ " 376 | \n",
+ " ../data/flood_layer/gha/wri_aqueduct_version_2... | \n",
+ " wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-... | \n",
+ " 0 | \n",
+ " (1,) | \n",
+ "
\n",
+ " \n",
+ " 377 | \n",
+ " ../data/flood_layer/gha/wri_aqueduct_version_2... | \n",
+ " wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-... | \n",
+ " 0 | \n",
+ " (1,) | \n",
+ "
\n",
+ " \n",
+ " 378 | \n",
+ " ../data/flood_layer/gha/wri_aqueduct_version_2... | \n",
+ " wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-... | \n",
+ " 0 | \n",
+ " (1,) | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
379 rows × 4 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " path \\\n",
+ "0 ../data/flood_layer/gha/wri_aqueduct_version_2... \n",
+ "1 ../data/flood_layer/gha/wri_aqueduct_version_2... \n",
+ "2 ../data/flood_layer/gha/wri_aqueduct_version_2... \n",
+ "3 ../data/flood_layer/gha/wri_aqueduct_version_2... \n",
+ "4 ../data/flood_layer/gha/wri_aqueduct_version_2... \n",
+ ".. ... \n",
+ "374 ../data/flood_layer/gha/wri_aqueduct_version_2... \n",
+ "375 ../data/flood_layer/gha/wri_aqueduct_version_2... \n",
+ "376 ../data/flood_layer/gha/wri_aqueduct_version_2... \n",
+ "377 ../data/flood_layer/gha/wri_aqueduct_version_2... \n",
+ "378 ../data/flood_layer/gha/wri_aqueduct_version_2... \n",
+ "\n",
+ " key grid_id bands \n",
+ "0 wri_aqueduct-version_2-inuncoast_historical_wt... 0 (1,) \n",
+ "1 wri_aqueduct-version_2-inuncoast_historical_wt... 0 (1,) \n",
+ "2 wri_aqueduct-version_2-inuncoast_historical_wt... 0 (1,) \n",
+ "3 wri_aqueduct-version_2-inuncoast_historical_wt... 0 (1,) \n",
+ "4 wri_aqueduct-version_2-inuncoast_historical_wt... 0 (1,) \n",
+ ".. ... ... ... \n",
+ "374 wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-... 0 (1,) \n",
+ "375 wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-... 0 (1,) \n",
+ "376 wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-... 0 (1,) \n",
+ "377 wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-... 0 (1,) \n",
+ "378 wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-... 0 (1,) \n",
+ "\n",
+ "[379 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "hazard_paths = sorted(\n",
+ " glob(str(data_folder / \"flood_layer/gha/wri_aqueduct_version_2/wri*.tif\"))\n",
+ ")\n",
+ "hazard_files = pd.DataFrame({\"path\": hazard_paths})\n",
+ "hazard_files[\"key\"] = [Path(path).stem for path in hazard_paths]\n",
+ "hazard_files, grids = snail.io.extend_rasters_metadata(hazard_files)\n",
"hazard_files"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "headed-impression",
+ "execution_count": 19,
+ "id": "81fe43d5",
"metadata": {},
"outputs": [],
"source": [
- "def read_file_without_warnings(path, **kwd):\n",
- " with warnings.catch_warnings():\n",
- " warnings.simplefilter(\"ignore\")\n",
- " data = gpd.read_file(path, **kwd)\n",
- " return data"
+ "assert len(grids) == 1\n",
+ "grid = grids[0]"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"id": "described-consciousness",
"metadata": {},
@@ -128,130 +285,1044 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"id": "recreational-renaissance",
"metadata": {},
- "outputs": [],
- "source": [
- "roads = read_file_without_warnings(\n",
- " os.path.join(data_folder, \"GHA_OSM_roads.gpkg\"), layer=\"edges\"\n",
- ")\n",
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " osm_id | \n",
+ " road_type | \n",
+ " name | \n",
+ " id | \n",
+ " from_id | \n",
+ " to_id | \n",
+ " length_m | \n",
+ " geometry | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 4790594 | \n",
+ " tertiary | \n",
+ " Airport Road | \n",
+ " roade_0 | \n",
+ " roadn_0 | \n",
+ " roadn_1 | \n",
+ " 236.526837 | \n",
+ " LINESTRING (-0.17544 5.60550, -0.17418 5.60555... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 4790599 | \n",
+ " tertiary | \n",
+ " South Liberation Link | \n",
+ " roade_1 | \n",
+ " roadn_2 | \n",
+ " roadn_10683 | \n",
+ " 18.539418 | \n",
+ " LINESTRING (-0.17889 5.59979, -0.17872 5.59977) | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " osm_id road_type name id from_id to_id \\\n",
+ "0 4790594 tertiary Airport Road roade_0 roadn_0 roadn_1 \n",
+ "1 4790599 tertiary South Liberation Link roade_1 roadn_2 roadn_10683 \n",
+ "\n",
+ " length_m geometry \n",
+ "0 236.526837 LINESTRING (-0.17544 5.60550, -0.17418 5.60555... \n",
+ "1 18.539418 LINESTRING (-0.17889 5.59979, -0.17872 5.59977) "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "roads_file = data_folder / \"GHA_OSM_roads.gpkg\"\n",
+ "roads = gpd.read_file(roads_file, layer=\"edges\")\n",
"roads.head(2)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 21,
"id": "dressed-madrid",
"metadata": {},
"outputs": [],
"source": [
- "for hazard_file in tqdm(hazard_files):\n",
- " # read file\n",
- " flood = read_file_without_warnings(hazard_file).rename(\n",
- " columns={\"DN\": \"depth_m\"}\n",
- " )\n",
- " flood = flood[flood.depth_m > 0]\n",
- "\n",
- " # run intersection\n",
- " intersections = gpd.overlay(roads, flood, how=\"intersection\")\n",
- " # calculate intersection lengths\n",
- " geod = Geod(ellps=\"WGS84\")\n",
- " intersections[\"flood_length_m\"] = intersections.geometry.apply(\n",
- " geod.geometry_length\n",
- " )\n",
- " # save file\n",
- " output_file = os.path.join(\n",
- " data_folder,\n",
- " \"results\",\n",
- " os.path.basename(hazard_file).replace(\".gpkg\", \"_exposure.gpkg\"),\n",
- " )\n",
- " if len(intersections):\n",
- " intersections.to_file(output_file, driver=\"GPKG\")"
+ "# split roads along hazard data grid\n",
+ "\n",
+ "# TODO top-level \"overlay_rasters\"\n",
+ "# TODO for vector in vectors / for raster in rasters \"overlay_raster\"\n",
+ "\n",
+ "\n",
+ "# push into split_linestrings, flag to disable\n",
+ "prepared = snail.intersection.prepare_linestrings(roads)\n",
+ "\n",
+ "flood_intersections = snail.intersection.split_linestrings(prepared, grid)\n",
+ "\n",
+ "# push into split_linestrings\n",
+ "flood_intersections = snail.intersection.apply_indices(\n",
+ " flood_intersections, grid, index_i=\"i_0\", index_j=\"j_0\"\n",
+ ")\n",
+ "\n",
+ "flood_intersections = snail.io.associate_raster_files(\n",
+ " flood_intersections, hazard_files\n",
+ ")\n",
+ "\n",
+ "# calculate the length of each stretch of road\n",
+ "# don't include in snail wrapper top-level function\n",
+ "geod = Geod(ellps=\"WGS84\")\n",
+ "flood_intersections[\"length_m\"] = flood_intersections.geometry.apply(\n",
+ " geod.geometry_length\n",
+ ")"
]
},
{
- "cell_type": "markdown",
- "id": "interstate-chile",
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "ea2207fe",
"metadata": {},
+ "outputs": [],
"source": [
- "List all the results just created:"
+ "# save to file\n",
+ "output_file = os.path.join(\n",
+ " data_folder,\n",
+ " \"results\",\n",
+ " str(roads_file.name).replace(\".gpkg\", \"_edges___exposure.geoparquet\"),\n",
+ ")\n",
+ "\n",
+ "flood_intersections.to_parquet(output_file)"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "short-enforcement",
+ "execution_count": 23,
+ "id": "cc2f238c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['osm_id', 'road_type', 'name', 'id', 'from_id', 'to_id', 'length_m',\n",
+ " 'geometry', 'split', 'i_0',\n",
+ " ...\n",
+ " 'wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-ESM-CHEM_2050_rp01000-gha',\n",
+ " 'wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-ESM-CHEM_2080_rp00002-gha',\n",
+ " 'wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-ESM-CHEM_2080_rp00005-gha',\n",
+ " 'wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-ESM-CHEM_2080_rp00010-gha',\n",
+ " 'wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-ESM-CHEM_2080_rp00025-gha',\n",
+ " 'wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-ESM-CHEM_2080_rp00050-gha',\n",
+ " 'wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-ESM-CHEM_2080_rp00100-gha',\n",
+ " 'wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-ESM-CHEM_2080_rp00250-gha',\n",
+ " 'wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-ESM-CHEM_2080_rp00500-gha',\n",
+ " 'wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-ESM-CHEM_2080_rp01000-gha'],\n",
+ " dtype='object', length=390)"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "flood_intersections.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "8d7fbaa1",
"metadata": {},
"outputs": [],
"source": [
- "intersection_files = sorted(\n",
- " glob(os.path.join(data_folder, \"results/inunriver*.gpkg\"))\n",
- ")\n",
- "intersection_files"
+ "data_cols = [col for col in flood_intersections.columns if \"wri\" in col]"
]
},
{
- "cell_type": "markdown",
- "id": "formed-glory",
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "blond-intervention",
"metadata": {},
- "source": [
- "Read and combine all the exposed lengths:"
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " split | \n",
+ " road_type | \n",
+ " length_m | \n",
+ " key | \n",
+ " depth_m | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 445 | \n",
+ " roade_1432 | \n",
+ " 9 | \n",
+ " tertiary | \n",
+ " 153.153316 | \n",
+ " wri_aqueduct-version_2-inuncoast_historical_wt... | \n",
+ " 0.466355 | \n",
+ "
\n",
+ " \n",
+ " 446 | \n",
+ " roade_1432 | \n",
+ " 10 | \n",
+ " tertiary | \n",
+ " 28.151241 | \n",
+ " wri_aqueduct-version_2-inuncoast_historical_wt... | \n",
+ " 1.349324 | \n",
+ "
\n",
+ " \n",
+ " 450 | \n",
+ " roade_1453 | \n",
+ " 9 | \n",
+ " tertiary | \n",
+ " 412.834012 | \n",
+ " wri_aqueduct-version_2-inuncoast_historical_wt... | \n",
+ " 0.006246 | \n",
+ "
\n",
+ " \n",
+ " 457 | \n",
+ " roade_1459 | \n",
+ " 3 | \n",
+ " primary | \n",
+ " 929.965564 | \n",
+ " wri_aqueduct-version_2-inuncoast_historical_wt... | \n",
+ " 0.516396 | \n",
+ "
\n",
+ " \n",
+ " 459 | \n",
+ " roade_1459 | \n",
+ " 5 | \n",
+ " primary | \n",
+ " 921.598630 | \n",
+ " wri_aqueduct-version_2-inuncoast_historical_wt... | \n",
+ " 0.407549 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 2052280 | \n",
+ " roade_15595 | \n",
+ " 35 | \n",
+ " secondary | \n",
+ " 1277.291170 | \n",
+ " wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-... | \n",
+ " 1.505718 | \n",
+ "
\n",
+ " \n",
+ " 2052281 | \n",
+ " roade_15595 | \n",
+ " 37 | \n",
+ " secondary | \n",
+ " 411.835653 | \n",
+ " wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-... | \n",
+ " 10.555718 | \n",
+ "
\n",
+ " \n",
+ " 2052282 | \n",
+ " roade_15663 | \n",
+ " 0 | \n",
+ " tertiary | \n",
+ " 835.677777 | \n",
+ " wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-... | \n",
+ " 15.260432 | \n",
+ "
\n",
+ " \n",
+ " 2052283 | \n",
+ " roade_15663 | \n",
+ " 1 | \n",
+ " tertiary | \n",
+ " 341.015419 | \n",
+ " wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-... | \n",
+ " 18.910431 | \n",
+ "
\n",
+ " \n",
+ " 2052284 | \n",
+ " roade_15663 | \n",
+ " 2 | \n",
+ " tertiary | \n",
+ " 1025.220133 | \n",
+ " wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-... | \n",
+ " 21.370432 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1070692 rows × 6 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id split road_type length_m \\\n",
+ "445 roade_1432 9 tertiary 153.153316 \n",
+ "446 roade_1432 10 tertiary 28.151241 \n",
+ "450 roade_1453 9 tertiary 412.834012 \n",
+ "457 roade_1459 3 primary 929.965564 \n",
+ "459 roade_1459 5 primary 921.598630 \n",
+ "... ... ... ... ... \n",
+ "2052280 roade_15595 35 secondary 1277.291170 \n",
+ "2052281 roade_15595 37 secondary 411.835653 \n",
+ "2052282 roade_15663 0 tertiary 835.677777 \n",
+ "2052283 roade_15663 1 tertiary 341.015419 \n",
+ "2052284 roade_15663 2 tertiary 1025.220133 \n",
+ "\n",
+ " key depth_m \n",
+ "445 wri_aqueduct-version_2-inuncoast_historical_wt... 0.466355 \n",
+ "446 wri_aqueduct-version_2-inuncoast_historical_wt... 1.349324 \n",
+ "450 wri_aqueduct-version_2-inuncoast_historical_wt... 0.006246 \n",
+ "457 wri_aqueduct-version_2-inuncoast_historical_wt... 0.516396 \n",
+ "459 wri_aqueduct-version_2-inuncoast_historical_wt... 0.407549 \n",
+ "... ... ... \n",
+ "2052280 wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-... 1.505718 \n",
+ "2052281 wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-... 10.555718 \n",
+ "2052282 wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-... 15.260432 \n",
+ "2052283 wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-... 18.910431 \n",
+ "2052284 wri_aqueduct-version_2-inunriver_rcp8p5_MIROC-... 21.370432 \n",
+ "\n",
+ "[1070692 rows x 6 columns]"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# find any max depth and filter > 0\n",
+ "all_intersections = flood_intersections[\n",
+ " flood_intersections[data_cols].max(axis=1) > 0\n",
+ "]\n",
+ "# subset columns\n",
+ "all_intersections = all_intersections.drop(\n",
+ " columns=[\"osm_id\", \"name\", \"from_id\", \"to_id\", \"geometry\", \"i_0\", \"j_0\"]\n",
+ ")\n",
+ "# melt and check again for depth\n",
+ "all_intersections = all_intersections.melt(\n",
+ " id_vars=[\"id\", \"split\", \"road_type\", \"length_m\"],\n",
+ " value_vars=data_cols,\n",
+ " var_name=\"key\",\n",
+ " value_name=\"depth_m\",\n",
+ ").query(\"depth_m > 0\")\n",
+ "all_intersections"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "blond-intervention",
+ "execution_count": 26,
+ "id": "320791b0",
"metadata": {},
"outputs": [],
"source": [
- "all_intersections = []\n",
+ "river = all_intersections[all_intersections.key.str.contains(\"inunriver\")]\n",
+ "coast = all_intersections[all_intersections.key.str.contains(\"inuncoast\")]\n",
"\n",
- "for intersection_file in tqdm(intersection_files):\n",
- " # split up the filename to pull out metadata\n",
- " hazard, rcp, gcm, epoch, rp, _ = os.path.basename(intersection_file).split(\n",
- " \"_\"\n",
- " )\n",
- " gcm = gcm.replace(\"0\", \"\")\n",
- " rp = int(rp.replace(\"rp\", \"\"))\n",
- " epoch = int(epoch)\n",
- "\n",
- " # read file\n",
- " intersections = read_file_without_warnings(intersection_file)\n",
- " # drop road length and geometry fields\n",
- " intersections.drop(columns=\"length_m\", inplace=True)\n",
- " # add metadata about the hazard and scenario\n",
- " intersections[\"hazard\"] = hazard\n",
- " intersections[\"rcp\"] = rcp\n",
- " intersections[\"gcm\"] = gcm\n",
- " intersections[\"epoch\"] = epoch\n",
- " intersections[\"rp\"] = rp\n",
- "\n",
- " all_intersections.append(intersections)\n",
- "\n",
- "# group all together\n",
- "all_intersections = pd.concat(all_intersections)\n",
- "all_intersections"
+ "coast_keys = coast.key.str.extract(\n",
+ " r\"wri_aqueduct-version_2-(?P\\w+)_(?P[^_]+)_(?P[^_]+)_(?P[^_]+)_rp(?P