From 435df4125a216e5b511746e93f010d60bea30f45 Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Fri, 2 Feb 2024 17:23:37 +0000
Subject: [PATCH 01/27] Draft buildings download rules

---
 workflow/buildings/ghsl-built.smk | 57 +++++++++++++++++++++++++++++++
 workflow/buildings/giri-bem.smk   | 35 +++++++++++++++++++
 workflow/buildings/overture.smk   | 24 +++++++++++++
 3 files changed, 116 insertions(+)
 create mode 100644 workflow/buildings/ghsl-built.smk
 create mode 100644 workflow/buildings/giri-bem.smk
 create mode 100644 workflow/buildings/overture.smk

diff --git a/workflow/buildings/ghsl-built.smk b/workflow/buildings/ghsl-built.smk
new file mode 100644
index 00000000..e04827c2
--- /dev/null
+++ b/workflow/buildings/ghsl-built.smk
@@ -0,0 +1,57 @@
+"""
+Download JRC Global Human Settlement Layer Built-up Surface
+
+GHS-BUILT-S R2023A - GHS built-up surface grid, derived from Sentinel2 composite
+and Landsat, multitemporal (1975-2030)
+
+Reference
+---------
+https://ghsl.jrc.ec.europa.eu/download.php?ds=bu
+
+Dataset:
+
+> Pesaresi M., Politis P. (2023): GHS-BUILT-S R2023A - GHS built-up surface
+> grid, derived from Sentinel2 composite and Landsat, multitemporal
+> (1975-2030)European Commission, Joint Research Centre (JRC) PID:
+> http://data.europa.eu/89h/9f06f36f-4b11-47ec-abb0-4f8b7b1d72ea,
+> doi:10.2905/9F06F36F-4B11-47EC-ABB0-4F8B7B1D72EA
+
+Concept & Methodology:
+
+> European Commission GHSL Data Package 2023, Publications Office of the
+> European Union, Luxembourg, 2023, JRC133256, ISBN 978-92-68-02341-9
+> doi:10.2760/098587
+
+"""
+
+rule download_ghsl_built_s:
+    output:
+        "{OUTPUT_DIR}/input/ghsl/GHS_{RES_NRES}_E{YEAR}_GLOBE_R2023A_4326_3ss_V1_0.tif"
+    wildcard_constraints:
+        YEAR=range(1975, 2031, 5),
+        RES_NRES="BUILT_S|BUILT_S_NRES"
+    shell:
+        """
+        output_dir=$(dirname {output})
+
+        mkdir -p $output_dir
+
+        wget -nc https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/GHSL/GHS_{wildcards.RES_NRES}_GLOBE_R2023A/GHS_{wildcards.RES_NRES}_E{wildcards.YEAR}_GLOBE_R2023A_4326_3ss/V1-0/GHS_{wildcards.RES_NRES}_E{wildcards.YEAR}_GLOBE_R2023A_4326_3ss_V1_0.zip
+            --directory-prefix=$output_dir
+
+        unzip -o $output_dir/GHS_{wildcards.RES_NRES}_E{wildcards.YEAR}_GLOBE_R2023A_4326_3ss_V1_0.zip \
+            -d $output_dir
+        """
+
+rule download_ghsl_built_s_all:
+    input:
+        expand(
+            os.path.join(
+                "{{OUTPUT_DIR}}",
+                "input",
+                "ghsl",
+                "GHS_{RES_NRES}_E{YEAR}_GLOBE_R2023A_4326_3ss_V1_0.tif",
+            ),
+            RES_NRES=("BUILT_S", "BUILT_S_NRES"),
+            YEAR=(2020, )
+        )
diff --git a/workflow/buildings/giri-bem.smk b/workflow/buildings/giri-bem.smk
new file mode 100644
index 00000000..6d25d9bb
--- /dev/null
+++ b/workflow/buildings/giri-bem.smk
@@ -0,0 +1,35 @@
+"""
+Download GIRI Building Exposure Model
+
+Total Building Stock US$ (Resolution: 5km x 5km)
+
+The Building Exposure Model provides information on the building type and the
+economic value of the built environment for the non-residential (employment,
+health and education) sectors, for each country and territory of the world.
+
+This dataset was produced by UNEP/GRID-Geneva in May 2023.
+
+Source
+------
+https://giri.unepgrid.ch
+
+Reference
+---------
+Thomas Piller, Antonio Benvenuti & Andrea De Bono (2023) The GIRI global
+building exposure model (BEM)
+https://giri.unepgrid.ch/sites/default/files/2023-09/GIRI_BEM_report_UNIGE.pdf
+"""
+
+rule download_giri_bem:
+    output:
+        res="{OUTPUT_DIR}/input/giri/bem_5x5_valfis_res.tif",
+        nres="{OUTPUT_DIR}/input/giri/bem_5x5_valfis_nres.tif",
+    shell:
+        """
+        output_dir=$(dirname {output})
+
+        wget -nc https://hazards-data.unepgrid.ch/bem_5x5_valfis_res.tif \
+            --directory-prefix=$output_dir
+        wget -nc https://hazards-data.unepgrid.ch/bem_5x5_valfis_nres.tif \
+            --directory-prefix=$output_dir
+        """
diff --git a/workflow/buildings/overture.smk b/workflow/buildings/overture.smk
new file mode 100644
index 00000000..b2f746c5
--- /dev/null
+++ b/workflow/buildings/overture.smk
@@ -0,0 +1,24 @@
+"""
+Download Overture Maps
+
+Source
+------
+
+
+"""
+
+rule download_overture:
+  output:
+    release_dir=directory("{OUTPUT_DIR}/input/overture/{params.RELEASE}")
+  params:
+    RELEASE="2024-01-17-alpha.0"
+  shell:
+    """
+    pushd {output.release_dir}
+      pwd
+      # aws s3 sync \
+      #   --region us-west-2 \
+      #   --no-sign-request \
+      #   s3://overturemaps-us-west-2/release/{params.RELEASE}/ .
+    popd
+    """

From 1c91282c65015996593b362102eda4937ca8edeb Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Fri, 2 Feb 2024 17:29:04 +0000
Subject: [PATCH 02/27] Add rules to Snakefile

---
 workflow/Snakefile | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 0b31eb4d..5573d3bb 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -111,6 +111,10 @@ SAMPLES_PER_TRACKSET = {
 }
 
 ##### load rules #####
+include: "buildings/ghsl-built.smk"
+include: "buildings/giri-bem.smk"
+include: "buildings/overture.smk"
+
 include: "context/coastlines.smk"
 include: "context/gadm.smk"
 include: "context/natural-earth.smk"

From 02f8f6055007b4614fe2dc4ec4ef5fb7ae570fb3 Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Fri, 9 Feb 2024 11:44:21 +0000
Subject: [PATCH 03/27] Download GHSL built surface, volume and height

---
 workflow/buildings/ghsl-built.smk | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/workflow/buildings/ghsl-built.smk b/workflow/buildings/ghsl-built.smk
index e04827c2..28ab2397 100644
--- a/workflow/buildings/ghsl-built.smk
+++ b/workflow/buildings/ghsl-built.smk
@@ -26,20 +26,21 @@ Concept & Methodology:
 
 rule download_ghsl_built_s:
     output:
-        "{OUTPUT_DIR}/input/ghsl/GHS_{RES_NRES}_E{YEAR}_GLOBE_R2023A_4326_3ss_V1_0.tif"
-    wildcard_constraints:
-        YEAR=range(1975, 2031, 5),
-        RES_NRES="BUILT_S|BUILT_S_NRES"
+        "{OUTPUT_DIR}/input/ghsl/GHS_{VAR}_E{YEAR}_GLOBE_R2023A_4326_3ss_V1_0.tif"
+    params:
+        GROUP=lambda wildcards, output: wildcards.VAR.replace("_NRES", "").replace("_ANBH", "")
     shell:
         """
         output_dir=$(dirname {output})
 
         mkdir -p $output_dir
 
-        wget -nc https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/GHSL/GHS_{wildcards.RES_NRES}_GLOBE_R2023A/GHS_{wildcards.RES_NRES}_E{wildcards.YEAR}_GLOBE_R2023A_4326_3ss/V1-0/GHS_{wildcards.RES_NRES}_E{wildcards.YEAR}_GLOBE_R2023A_4326_3ss_V1_0.zip
-            --directory-prefix=$output_dir
+        wget \
+            -nc \
+            --directory-prefix=$output_dir \
+            https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/GHSL/GHS_{params.GROUP}_GLOBE_R2023A/GHS_{wildcards.VAR}_E{wildcards.YEAR}_GLOBE_R2023A_4326_3ss/V1-0/GHS_{wildcards.VAR}_E{wildcards.YEAR}_GLOBE_R2023A_4326_3ss_V1_0.zip
 
-        unzip -o $output_dir/GHS_{wildcards.RES_NRES}_E{wildcards.YEAR}_GLOBE_R2023A_4326_3ss_V1_0.zip \
+        unzip -o $output_dir/GHS_{wildcards.VAR}_E{wildcards.YEAR}_GLOBE_R2023A_4326_3ss_V1_0.zip \
             -d $output_dir
         """
 
@@ -47,11 +48,18 @@ rule download_ghsl_built_s_all:
     input:
         expand(
             os.path.join(
-                "{{OUTPUT_DIR}}",
+                "results",
                 "input",
                 "ghsl",
-                "GHS_{RES_NRES}_E{YEAR}_GLOBE_R2023A_4326_3ss_V1_0.tif",
+                "GHS_BUILT_{VAR}_E{YEAR}_GLOBE_R2023A_4326_3ss_V1_0.tif",
             ),
-            RES_NRES=("BUILT_S", "BUILT_S_NRES"),
+            VAR=("S", "S_NRES", "V", "V_NRES"),
             YEAR=(2020, )
-        )
+        ) + [
+            os.path.join(
+                "results",
+                "input",
+                "ghsl",
+                "GHS_BUILT_H_ANBH_E2018_GLOBE_R2023A_4326_3ss_V1_0.tif",
+            )
+        ]

From 5384cf6b967cd1cd85451bc44e9c15a98a87118f Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Fri, 9 Feb 2024 11:44:49 +0000
Subject: [PATCH 04/27] National and ADM1 summary of GIRI BEM values

---
 workflow/buildings/giri-bem.smk | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/workflow/buildings/giri-bem.smk b/workflow/buildings/giri-bem.smk
index 6d25d9bb..5e1a97b3 100644
--- a/workflow/buildings/giri-bem.smk
+++ b/workflow/buildings/giri-bem.smk
@@ -26,10 +26,35 @@ rule download_giri_bem:
         nres="{OUTPUT_DIR}/input/giri/bem_5x5_valfis_nres.tif",
     shell:
         """
-        output_dir=$(dirname {output})
+        output_dir=$(dirname {output.res})
 
         wget -nc https://hazards-data.unepgrid.ch/bem_5x5_valfis_res.tif \
             --directory-prefix=$output_dir
         wget -nc https://hazards-data.unepgrid.ch/bem_5x5_valfis_nres.tif \
             --directory-prefix=$output_dir
         """
+
+rule summarise_giri_bem_admin:
+    output:
+        adm1="{OUTPUT_DIR}/input/giri/bem_5x5_valfis_adm1.csv",
+        adm0="{OUTPUT_DIR}/input/giri/bem_5x5_valfis_adm0.csv",
+    shell:
+        """
+        exactextract \
+            -p ./results/input/admin-boundaries/adm1.shp \
+            -r "res:results/input/giri/bem_5x5_valfis_res.tif" \
+            -r "nres:results/input/giri/bem_5x5_valfis_nres.tif" \
+            -f GID_1 \
+            -o adm1_bem-res.csv \
+            -s "sum(res)" \
+            -s "sum(nres)"
+
+        exactextract \
+            -p ./results/input/admin-boundaries/adm0.shp \
+            -r "res:results/input/giri/bem_5x5_valfis_res.tif" \
+            -r "nres:results/input/giri/bem_5x5_valfis_nres.tif" \
+            -f GID_0 \
+            -o adm0_bem-res.csv \
+            -s "sum(res)" \
+            -s "sum(nres)"
+        """

From 910ac8186a5ff9ff1aa7f32816a443fcf05c6e7d Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Fri, 9 Feb 2024 11:45:24 +0000
Subject: [PATCH 05/27] Add capital stock datasets for comparison

---
 workflow/Snakefile                            | 11 ++-
 .../imf-investment-capital-stock.smk          | 80 +++++++++++++++++++
 workflow/buildings/penn-world-tables.smk      | 44 ++++++++++
 workflow/buildings/worldbank-cwon.smk         | 49 ++++++++++++
 4 files changed, 180 insertions(+), 4 deletions(-)
 create mode 100644 workflow/buildings/imf-investment-capital-stock.smk
 create mode 100644 workflow/buildings/penn-world-tables.smk
 create mode 100644 workflow/buildings/worldbank-cwon.smk

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 5573d3bb..3e7f5a24 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -111,14 +111,17 @@ SAMPLES_PER_TRACKSET = {
 }
 
 ##### load rules #####
-include: "buildings/ghsl-built.smk"
-include: "buildings/giri-bem.smk"
-include: "buildings/overture.smk"
-
 include: "context/coastlines.smk"
 include: "context/gadm.smk"
 include: "context/natural-earth.smk"
 
+include: "buildings/worldbank-cwon.smk"
+include: "buildings/penn-world-tables.smk"
+include: "buildings/imf-investment-capital-stock.smk"
+include: "buildings/ghsl-built.smk"
+include: "buildings/giri-bem.smk"
+include: "buildings/overture.smk"
+
 include: "nature-ecosystems/land-cover.smk"
 include: "population-economy/dryad-gdp.smk"
 include: "population-economy/ghsl-pop.smk"
diff --git a/workflow/buildings/imf-investment-capital-stock.smk b/workflow/buildings/imf-investment-capital-stock.smk
new file mode 100644
index 00000000..2b42a6ef
--- /dev/null
+++ b/workflow/buildings/imf-investment-capital-stock.smk
@@ -0,0 +1,80 @@
+"""
+IMF Investment and Capital Stock Dataset, 1960-2019
+
+(This version: May 2021)
+
+Developed by Expenditure Policy (EP) Division, Fiscal Affairs Department (FAD),
+International Monetary Fund (IMF)
+
+This file provides comprehensive data on public investment and capital stock
+(i.e. general government), private investment and capital stock, as well as
+investment and capital stock arising from public-private partnerships (PPPs),
+across the Fund membership countries.
+
+The accompanying 2021 Update of the Manual "Estimating Public, Private, and PPP
+Capital Stocks"
+(https://infrastructuregovern.imf.org/content/dam/PIMA/Knowledge-Hub/dataset/InvestmentandCapitalStockDatabaseUserManualandFAQ_May2021.pdf)
+describes in great detail the series' definitions, the investment series' data
+sources, as well as the methodology in constructing the stock series. The
+methodology follows the standard perpetual inventory equation and largely builds
+on Gupta and others (2014) "Efficiency-Adjusted Public Capital and Growth" and
+Kamps (2006) "New Estimates of Government Net Capital Stocks for 22 OECD
+Countries, 1960–2001".
+
+
+Please refer to it as "IMF Investment and Capital Stock Dataset, 2021” and add a
+reference to the above-mentioned IMF Board Paper.
+
+
+Key variables:
+
+kgov_rppp
+
+General government capital stock (constructed based on general government
+investment flows "igov_rppp"), in billions of constant 2017 international
+dollars.
+
+kpriv_rppp
+
+Private capital stock (constructed based on private investment flows
+"igov_rppp"), in billions of constant 2017 international dollars.
+
+kppp_rppp
+
+Public-private partnership (PPP) capital stock (constructed based on PPP
+investment flows "ippp_rppp"), in billions of constant 2017 international
+dollars.
+
+"""
+
+rule download_imf_icsd:
+    output:
+        xlsx="{OUTPUT_DIR}/input/capital-stocks/icsd_dataset_2021.xlsx",
+        doc="{OUTPUT_DIR}/input/capital-stocks/icsd_manual_2021.pdf",
+    shell:
+        """
+        wget \
+            -nc \
+            --output-document="{output.xlsx}" \
+            https://infrastructuregovern.imf.org/content/dam/PIMA/Knowledge-Hub/dataset/IMFInvestmentandCapitalStockDataset2021.xlsx
+
+        wget \
+            -nc \
+            --output-document="{output.doc}" \
+            https://infrastructuregovern.imf.org/content/dam/PIMA/Knowledge-Hub/dataset/InvestmentandCapitalStockDatabaseUserManualandFAQ_May2021.pdf
+        """
+
+rule extract_imf_icsd:
+    input:
+        xlsx=rules.download_imf_icsd.output.xlsx,
+    output:
+        csv="{OUTPUT_DIR}/input/capital-stocks/icsd.csv",
+    run:
+        import pandas
+        df = pandas.read_excel(
+            input.xlsx,
+            sheet_name='Dataset',
+        )
+        # cn is "Capital stock at current PPPs (in mil. 2017US$)"
+        df = df[['country','isocode','year','kgov_rppp', 'kpriv_rppp', 'kppp_rppp']]
+        df.to_csv(output.csv, index=False)
diff --git a/workflow/buildings/penn-world-tables.smk b/workflow/buildings/penn-world-tables.smk
new file mode 100644
index 00000000..219b43dc
--- /dev/null
+++ b/workflow/buildings/penn-world-tables.smk
@@ -0,0 +1,44 @@
+"""
+Penn World Table version 10.01
+
+PWT version 10.01 is a database with information on relative levels of income,
+output, input and productivity, covering 183 countries between 1950 and 2019
+
+Feenstra, Robert C., Robert Inklaar and Marcel P. Timmer (2015), "The Next
+Generation of the Penn World Table" American Economic Review, 105(10),
+3150-3182, available for download at www.ggdc.net/pwt
+
+Groningen Growth and Development Centre, 2023, "Penn World Table version 10.01",
+https://doi.org/10.34894/QT5BCC, DataverseNL, V1
+"""
+
+rule download_pwt:
+    output:
+        zip="{OUTPUT_DIR}/input/capital-stocks/pwt.zip",
+        xlsx="{OUTPUT_DIR}/input/capital-stocks/pwt/pwt1001.xlsx",
+    shell:
+        """
+        if [ -e {output.zip} ]
+        then
+            echo "Skipping download"
+        else
+            curl -L -o {output.zip} -J  https://dataverse.nl/api/access/dataset/:persistentId/?persistentId=doi:10.34894/QT5BCC
+        fi
+        unzip -n -d $(dirname {output.xlsx}) {output.zip}
+        """
+
+
+rule extract_pwt:
+    input:
+        xlsx=rules.download_pwt.output.xlsx,
+    output:
+        csv="{OUTPUT_DIR}/input/capital-stocks/pwt.csv",
+    run:
+        import pandas
+        df = pandas.read_excel(
+            input.xlsx,
+            sheet_name='Data',
+        )
+        # cn is "Capital stock at current PPPs (in mil. 2017US$)"
+        df = df[['country','countrycode','year','cn']]
+        df.to_csv(output.csv, index=False)
diff --git a/workflow/buildings/worldbank-cwon.smk b/workflow/buildings/worldbank-cwon.smk
new file mode 100644
index 00000000..a5522318
--- /dev/null
+++ b/workflow/buildings/worldbank-cwon.smk
@@ -0,0 +1,49 @@
+"""
+The Changing Wealth of Nations (CWON) 2021 presents the most comprehensive
+accounting of global wealth. Wealth — the stock of produced, natural, and human
+capital — has been firmly established as a key measure of economic prosperity.
+
+The wealth accounting approach provides two related sets of information:
+comprehensive wealth accounts (a stock measure in total and per capita values),
+and adjusted net savings (a flow measure). The wealth accounts were updated in
+2021, using a new methodology described in The Changing Wealth of Nations 2021.
+"""
+
+rule download_cwon:
+    output:
+        full="{OUTPUT_DIR}/input/capital-stocks/CWON2021_Country_Tool_Full.xlsx",
+        balanced="{OUTPUT_DIR}/input/capital-stocks/CWON2021_Country_Tool_Balanced.xlsx",
+        doc="{OUTPUT_DIR}/input/capital-stocks/CWON2021_Methodology_October_2021.pdf",
+    shell:
+        """
+        wget \
+            -nc \
+            --output-document="{output.full}" \
+            https://datacatalogfiles.worldbank.org/ddh-published/0042066/DR0084604/CWON2021%20Country%20Tool%20-%20Full%20Dataset.xlsx?versionId=2022-06-06T13:38:38.6998180Z
+
+        wget \
+            -nc \
+            --output-document="{output.balanced}" \
+            https://datacatalogfiles.worldbank.org/ddh-published/0042066/DR0084043/CWON2021%20Country%20Tool%20-%20Balanced%20Dataset.xlsx?versionId=2022-06-06T13:38:44.8802314Z
+
+        wget \
+            -nc \
+            --output-document="{output.doc}" \
+            https://datacatalogfiles.worldbank.org/ddh-published/0042066/DR0084161/CWON%202021%20Methodology%20-%20October%202021.pdf?versionId=2022-06-06T13:38:42.3087244Z
+        """
+
+rule extract_cwon_produced_capital:
+    input:
+        xlsx=rules.download_cwon.output.full
+    output:
+        csv="{OUTPUT_DIR}/input/capital-stocks/CWON2021.csv"
+    run:
+        import pandas
+        df = pandas.read_excel(
+            input.xlsx,
+            sheet_name='country',
+            skiprows=1,
+            na_values='..'
+        ).query('wb_name != 0')
+        df = df[['wb_name','wb_code','year','unit','pk']]
+        df.to_csv(output.csv, index=False)

From f98e54e30984ac897601e4a4e1ac948de7f63880 Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Fri, 9 Feb 2024 11:46:01 +0000
Subject: [PATCH 06/27] JRC building damage curves for asia

---
 config/damage_curves/flood/commercial_asia.csv  | 13 +++++++++++++
 config/damage_curves/flood/industrial_asia.csv  | 13 +++++++++++++
 config/damage_curves/flood/residential_asia.csv | 13 +++++++++++++
 3 files changed, 39 insertions(+)
 create mode 100644 config/damage_curves/flood/commercial_asia.csv
 create mode 100644 config/damage_curves/flood/industrial_asia.csv
 create mode 100644 config/damage_curves/flood/residential_asia.csv

diff --git a/config/damage_curves/flood/commercial_asia.csv b/config/damage_curves/flood/commercial_asia.csv
new file mode 100644
index 00000000..81222f28
--- /dev/null
+++ b/config/damage_curves/flood/commercial_asia.csv
@@ -0,0 +1,13 @@
+# Huizinga et al., 2017 European Commission JRC Techical Report
+# https://publications.jrc.ec.europa.eu/repository/handle/JRC105688
+# table 3-6: Average continental damage function for Asia - commerce.
+inundation_depth_(m),damage_fraction
+0 0
+0.5 0.38
+1 0.54
+1.5 0.66
+2 0.76
+3 0.88
+4 0.94
+5 0.98
+6 1
diff --git a/config/damage_curves/flood/industrial_asia.csv b/config/damage_curves/flood/industrial_asia.csv
new file mode 100644
index 00000000..a1f5c09e
--- /dev/null
+++ b/config/damage_curves/flood/industrial_asia.csv
@@ -0,0 +1,13 @@
+# Huizinga et al., 2017 European Commission JRC Techical Report
+# https://publications.jrc.ec.europa.eu/repository/handle/JRC105688
+# table 3-11: Average continental damage function for Asia - industry.
+inundation_depth_(m),damage_fraction
+0 0
+0.5 0.28
+1 0.48
+1.5 0.63
+2 0.72
+3 0.86
+4 0.91
+5 0.96
+6 1
diff --git a/config/damage_curves/flood/residential_asia.csv b/config/damage_curves/flood/residential_asia.csv
new file mode 100644
index 00000000..b9e15b85
--- /dev/null
+++ b/config/damage_curves/flood/residential_asia.csv
@@ -0,0 +1,13 @@
+# Huizinga et al., 2017 European Commission JRC Techical Report
+# https://publications.jrc.ec.europa.eu/repository/handle/JRC105688
+# table 3-2: average continental damage function for Asia - residential buildings
+inundation_depth_(m),damage_fraction
+0,0
+0.5,0.33
+1,0.49
+1.5,0.62
+2,0.72
+3,0.87
+4,0.93
+5,0.98
+6,1

From 889a70b88261addaf70745f923b718e1c5ded1cd Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Fri, 16 Feb 2024 10:04:29 +0000
Subject: [PATCH 07/27] WIP disaggregate BEM with GHSL_BUILT_V

---
 workflow/Snakefile                      |  1 +
 workflow/buildings/disaggregate_bem.smk | 98 +++++++++++++++++++++++++
 2 files changed, 99 insertions(+)
 create mode 100644 workflow/buildings/disaggregate_bem.smk

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 3e7f5a24..bc29db71 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -121,6 +121,7 @@ include: "buildings/imf-investment-capital-stock.smk"
 include: "buildings/ghsl-built.smk"
 include: "buildings/giri-bem.smk"
 include: "buildings/overture.smk"
+include: "buildings/disaggregate_bem.smk"
 
 include: "nature-ecosystems/land-cover.smk"
 include: "population-economy/dryad-gdp.smk"
diff --git a/workflow/buildings/disaggregate_bem.smk b/workflow/buildings/disaggregate_bem.smk
new file mode 100644
index 00000000..a6cbe1c5
--- /dev/null
+++ b/workflow/buildings/disaggregate_bem.smk
@@ -0,0 +1,98 @@
+"""
+Upscale (disaggregate) building exposure layers according to built volume
+"""
+
+rule disaggregate_bem:
+    input:
+        bem_res=rules.download_giri_bem.output.res,
+        ghsl_res="{OUTPUT_DIR}/input/ghsl/GHS_BUILT_V_E2020_GLOBE_R2023A_4326_3ss_V1_0.tif",
+        bem_nres=rules.download_giri_bem.output.nres,
+        ghsl_nres="{OUTPUT_DIR}/input/ghsl/GHS_BUILT_V_NRES_E2020_GLOBE_R2023A_4326_3ss_V1_0.tif",
+    output:
+        bem_res_3ss="{OUTPUT_DIR}/buildings/building_exposure_res_3ss.tif",
+        bem_nres_3ss="{OUTPUT_DIR}/buildings/building_exposure_nres_3ss.tif",
+    shell:
+        import rasterio
+        from rasterio.warp import reproject, Resampling
+
+        def disaggregate(value_150ss_ds, volume_3ss_ds):
+            # BEM gives ~5x5km value in USD => value_150ss
+            value_150ss = value_150ss_ds.read(1)
+            print("Read value", value_150ss.shape, value_150ss.dtype)
+
+            # GHSL gives ~100m  volume in m3 => volume_3ss
+            volume_3ss = volume_3ss_ds.read(1)
+            print("Read volume", volume_3ss.shape, volume_3ss.dtype)
+
+
+            # Resample GHSL to ~5x5km, "sum" => volume_150ss
+            factor = 0.02
+            volume_150ss = volume_3ss_ds.read(
+                out_shape=(
+                    volume_3ss.count,
+                    int(volume_3ss.height * factor),
+                    int(volume_3ss.width * factor)
+                ),
+                resampling=Resampling.sum
+            )
+            print("Read volume coarse", volume_150ss.shape, volume_150ss.dtype)
+
+
+            # Calculate (coarse) value per volume
+            value_per_volume_150ss = value_150ss / volume_150ss
+
+            # Resample to fine-scale value per volume, "nearest"
+            with rasterio.Env():
+                value_per_volume_3ss = np.zeros(volume_3ss.shape, np.float64)
+                reproject(
+                    value_per_volume_150ss,
+                    value_per_volume_3ss,
+                    src_transform=volume_150ss.transform,
+                    src_crs=volume_150ss.crs,
+                    dst_transform=volume_3ss_ds.transform,
+                    dst_crs=volume_3ss_ds.crs,
+                    resampling=Resampling.nearest)
+
+            # Calculate fine-scale value
+            value_3ss = value_per_volume_3ss * volume_3ss
+
+            return value_3ss
+
+        def write_raster_like(data, filename, template_ds):
+            with rasterio.open(
+                    filename,
+                    'w',
+                    driver='GTiff',
+                    width=template_ds.width,
+                    height=template_ds.height,
+                    count=1,
+                    dtype=np.float64,
+                    nodata=0,
+                    transform=template_ds.transform,
+                    crs=template_ds.transform) as output_ds:
+                output_ds.write(data, indexes=1)
+
+        # Residential
+        res_value_150ss_ds = rasterio.open(input.bem_res)
+        res_volume_3ss_ds = rasterio.open(input.ghsl_res)
+        res_value_3ss = disaggregate(res_value_150ss_ds, res_volume_3ss_ds)
+        write_raster_like(res_value_3ss, output.bem_res_3ss, res_volume_3ss_ds)
+        res_value_150ss_ds.close()
+        res_volume_3ss_ds.close()
+
+        # Non-residential
+        nres_value_150ss_ds = rasterio.open(input.bem_res)
+        nres_volume_3ss_ds = rasterio.open(input.ghsl_res)
+        nres_value_3ss = disaggregate(nres_value_150ss_ds, nres_volume_3ss_ds)
+        write_raster_like(nres_value_3ss, output.bem_nres_3ss, nres_volume_3ss_ds)
+        nres_value_150ss_ds.close()
+        nres_volume_3ss_ds.close()
+
+
+
+        # Then to assess flood damage
+
+        # JBA footprint gives ~30m depth in m => depth_30m
+        # Resample value_3ss to ~30m,  "nearest", divide by 9 => value_30m
+        # Apply damage curve to depth_30m => damage_fraction_30m
+        # Calculate (damage_fraction_30m * value_30m) => damage_30m

From 0de27241e77f6b8f547901f753c6a9438238a35e Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Fri, 5 Apr 2024 10:37:12 +0100
Subject: [PATCH 08/27] Sketch working with built-up area layers by country

---
 workflow/buildings/disaggregate_bem.smk | 97 ++-----------------------
 workflow/buildings/giri-bem.smk         | 13 ++++
 2 files changed, 21 insertions(+), 89 deletions(-)

diff --git a/workflow/buildings/disaggregate_bem.smk b/workflow/buildings/disaggregate_bem.smk
index a6cbe1c5..cfdaefb4 100644
--- a/workflow/buildings/disaggregate_bem.smk
+++ b/workflow/buildings/disaggregate_bem.smk
@@ -4,95 +4,14 @@ Upscale (disaggregate) building exposure layers according to built volume
 
 rule disaggregate_bem:
     input:
-        bem_res=rules.download_giri_bem.output.res,
-        ghsl_res="{OUTPUT_DIR}/input/ghsl/GHS_BUILT_V_E2020_GLOBE_R2023A_4326_3ss_V1_0.tif",
-        bem_nres=rules.download_giri_bem.output.nres,
-        ghsl_nres="{OUTPUT_DIR}/input/ghsl/GHS_BUILT_V_NRES_E2020_GLOBE_R2023A_4326_3ss_V1_0.tif",
+        bem_res="{OUTPUT_DIR}/input/giri/{ISO3}/bem_5x5_valfis_res.tif",
+        ghsl_res="{OUTPUT_DIR}/input/ghsl/{ISO3}/GHS_BUILT_V_E2020_GLOBE_R2023A_4326_3ss_V1_0.tif",
+        bem_nres="{OUTPUT_DIR}/input/giri/{ISO3}/bem_5x5_valfis_nres.tif",
+        ghsl_nres="{OUTPUT_DIR}/input/ghsl/{ISO3}/GHS_BUILT_V_NRES_E2020_GLOBE_R2023A_4326_3ss_V1_0.tif",
     output:
-        bem_res_3ss="{OUTPUT_DIR}/buildings/building_exposure_res_3ss.tif",
-        bem_nres_3ss="{OUTPUT_DIR}/buildings/building_exposure_nres_3ss.tif",
+        bem_res_3ss="{OUTPUT_DIR}/buildings/{ISO3}/building_exposure_res_3ss.tif",
+        bem_nres_3ss="{OUTPUT_DIR}/buildings/{ISO3}/building_exposure_nres_3ss.tif",
     shell:
-        import rasterio
-        from rasterio.warp import reproject, Resampling
+        """
 
-        def disaggregate(value_150ss_ds, volume_3ss_ds):
-            # BEM gives ~5x5km value in USD => value_150ss
-            value_150ss = value_150ss_ds.read(1)
-            print("Read value", value_150ss.shape, value_150ss.dtype)
-
-            # GHSL gives ~100m  volume in m3 => volume_3ss
-            volume_3ss = volume_3ss_ds.read(1)
-            print("Read volume", volume_3ss.shape, volume_3ss.dtype)
-
-
-            # Resample GHSL to ~5x5km, "sum" => volume_150ss
-            factor = 0.02
-            volume_150ss = volume_3ss_ds.read(
-                out_shape=(
-                    volume_3ss.count,
-                    int(volume_3ss.height * factor),
-                    int(volume_3ss.width * factor)
-                ),
-                resampling=Resampling.sum
-            )
-            print("Read volume coarse", volume_150ss.shape, volume_150ss.dtype)
-
-
-            # Calculate (coarse) value per volume
-            value_per_volume_150ss = value_150ss / volume_150ss
-
-            # Resample to fine-scale value per volume, "nearest"
-            with rasterio.Env():
-                value_per_volume_3ss = np.zeros(volume_3ss.shape, np.float64)
-                reproject(
-                    value_per_volume_150ss,
-                    value_per_volume_3ss,
-                    src_transform=volume_150ss.transform,
-                    src_crs=volume_150ss.crs,
-                    dst_transform=volume_3ss_ds.transform,
-                    dst_crs=volume_3ss_ds.crs,
-                    resampling=Resampling.nearest)
-
-            # Calculate fine-scale value
-            value_3ss = value_per_volume_3ss * volume_3ss
-
-            return value_3ss
-
-        def write_raster_like(data, filename, template_ds):
-            with rasterio.open(
-                    filename,
-                    'w',
-                    driver='GTiff',
-                    width=template_ds.width,
-                    height=template_ds.height,
-                    count=1,
-                    dtype=np.float64,
-                    nodata=0,
-                    transform=template_ds.transform,
-                    crs=template_ds.transform) as output_ds:
-                output_ds.write(data, indexes=1)
-
-        # Residential
-        res_value_150ss_ds = rasterio.open(input.bem_res)
-        res_volume_3ss_ds = rasterio.open(input.ghsl_res)
-        res_value_3ss = disaggregate(res_value_150ss_ds, res_volume_3ss_ds)
-        write_raster_like(res_value_3ss, output.bem_res_3ss, res_volume_3ss_ds)
-        res_value_150ss_ds.close()
-        res_volume_3ss_ds.close()
-
-        # Non-residential
-        nres_value_150ss_ds = rasterio.open(input.bem_res)
-        nres_volume_3ss_ds = rasterio.open(input.ghsl_res)
-        nres_value_3ss = disaggregate(nres_value_150ss_ds, nres_volume_3ss_ds)
-        write_raster_like(nres_value_3ss, output.bem_nres_3ss, nres_volume_3ss_ds)
-        nres_value_150ss_ds.close()
-        nres_volume_3ss_ds.close()
-
-
-
-        # Then to assess flood damage
-
-        # JBA footprint gives ~30m depth in m => depth_30m
-        # Resample value_3ss to ~30m,  "nearest", divide by 9 => value_30m
-        # Apply damage curve to depth_30m => damage_fraction_30m
-        # Calculate (damage_fraction_30m * value_30m) => damage_30m
+        """
diff --git a/workflow/buildings/giri-bem.smk b/workflow/buildings/giri-bem.smk
index 5e1a97b3..77540b19 100644
--- a/workflow/buildings/giri-bem.smk
+++ b/workflow/buildings/giri-bem.smk
@@ -58,3 +58,16 @@ rule summarise_giri_bem_admin:
             -s "sum(res)" \
             -s "sum(nres)"
         """
+
+rule clip_raster_by_country:
+    input:
+        raster="{OUTPUT_DIR}/input/{DATASET}/{FILENAME}.tif",
+        admin="{OUTPUT_DIR}/input/admin-boundaries/gadm36_levels.gpkg",
+    output:
+        raster="{OUTPUT_DIR}/input/{DATASET}/{ISO3}/{FILENAME}__{ISO3}.tif",
+    run:
+        import irv_datapkg
+        from open_gira.admin import get_administrative_data, boundary_geom
+        admin = get_administrative_data(input.admin)
+        # TODO difficult to use? includes 1-pixel buffer for "safety" in small areas - could be tightened
+        irv_datapkg.crop_raster(input.raster, output.raster, boundary_geom(admin, wildcards.ISO3))

From e7052ee3b05522d29ef1ede51413ae6fcb401236 Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Thu, 11 Apr 2024 14:42:38 +0100
Subject: [PATCH 09/27] Lift admin helper to package

---
 src/open_gira/admin.py                    | 11 +++--
 workflow/transport/create_rail_network.py | 30 ++++++++-----
 workflow/transport/create_road_network.py | 54 ++++++++++++++++-------
 workflow/transport/utils.py               | 20 ++++++---
 4 files changed, 79 insertions(+), 36 deletions(-)

diff --git a/src/open_gira/admin.py b/src/open_gira/admin.py
index 8c815f38..30150608 100644
--- a/src/open_gira/admin.py
+++ b/src/open_gira/admin.py
@@ -9,7 +9,9 @@
 import shapely
 
 
-def merge_gadm_admin_levels(preference: pd.DataFrame, alternative: pd.DataFrame) -> pd.DataFrame:
+def merge_gadm_admin_levels(
+    preference: pd.DataFrame, alternative: pd.DataFrame
+) -> pd.DataFrame:
     """
     Geospatial data is often aggregated at admin 'levels', as exemplified by the
     GADM project. These integer levels are 0 for nation states, 1 for the
@@ -35,7 +37,9 @@ def merge_gadm_admin_levels(preference: pd.DataFrame, alternative: pd.DataFrame)
     substitute_countries = set(alternative.ISO_A3) - set(preference.ISO_A3)
     logging.info(f"Gap filling with: {substitute_countries}")
 
-    substitute_regions: pd.DataFrame = alternative[alternative["ISO_A3"].isin(substitute_countries)]
+    substitute_regions: pd.DataFrame = alternative[
+        alternative["ISO_A3"].isin(substitute_countries)
+    ]
 
     merged = pd.concat([preference, substitute_regions])
 
@@ -76,6 +80,5 @@ def get_administrative_data(file_path: str, to_epsg: int = None) -> gpd.GeoDataF
 
 
 def boundary_geom(gdf: gpd.GeoDataFrame, iso_a3: str) -> shapely.Geometry:
-    """Given administrative data, return the boundary geometry for a given ISO3 country code
-    """
+    """Given administrative data, return the boundary geometry for a given ISO3 country code"""
     return gdf.set_index("iso_a3").loc[iso_a3, "geometry"]
diff --git a/workflow/transport/create_rail_network.py b/workflow/transport/create_rail_network.py
index 19376a94..dd684514 100644
--- a/workflow/transport/create_rail_network.py
+++ b/workflow/transport/create_rail_network.py
@@ -30,7 +30,9 @@
 
     osm_epsg = 4326
 
-    logging.basicConfig(format="%(asctime)s %(process)d %(filename)s %(message)s", level=logging.INFO)
+    logging.basicConfig(
+        format="%(asctime)s %(process)d %(filename)s %(message)s", level=logging.INFO
+    )
 
     # Ignore geopandas parquet implementation warnings
     # NB though that .geoparquet is not the format to use for archiving.
@@ -49,33 +51,41 @@
 
     # osm_to_pq.py creates these columns but we're not using them, so discard
     edges = edges.drop(
-        [col for col in edges.columns if col.startswith("start_node_") or col.startswith("end_node_")],
-        axis="columns"
+        [
+            col
+            for col in edges.columns
+            if col.startswith("start_node_") or col.startswith("end_node_")
+        ],
+        axis="columns",
     )
 
     # if present, filter nodes to stations
     if nodes is not None and not nodes.empty:
-        nodes = nodes.loc[nodes.tag_railway == 'station', :]
+        nodes = nodes.loc[nodes.tag_railway == "station", :]
 
     # pass an id_prefix containing the slice number to ensure edges and nodes
     # are uniquely identified across all slices in the network
-    network = create_network(edges=edges, nodes=nodes, id_prefix=f"{dataset_name}_{slice_number}")
+    network = create_network(
+        edges=edges, nodes=nodes, id_prefix=f"{dataset_name}_{slice_number}"
+    )
     logging.info(
         f"Network contains {len(network.edges)} edges and {len(network.nodes)} nodes"
     )
 
     # boolean bridge field
-    network.edges['bridge'] = str_to_bool(network.edges['tag_bridge'])
+    network.edges["bridge"] = str_to_bool(network.edges["tag_bridge"])
 
     # boolean station field
-    network.nodes['station'] = network.nodes.tag_railway == 'station'
+    network.nodes["station"] = network.nodes.tag_railway == "station"
 
     # select and label assets with their type
     # we will use the `asset_type` field to select damage curves
     # bridge overrides railway as asset class, tag last
-    network.nodes.loc[network.nodes.station == True, 'asset_type'] = RailAssets.STATION
-    network.edges.loc[network.edges.tag_railway == 'rail', 'asset_type'] = RailAssets.RAILWAY
-    network.edges.loc[network.edges.bridge == True, 'asset_type'] = RailAssets.BRIDGE
+    network.nodes.loc[network.nodes.station == True, "asset_type"] = RailAssets.STATION
+    network.edges.loc[network.edges.tag_railway == "rail", "asset_type"] = (
+        RailAssets.RAILWAY
+    )
+    network.edges.loc[network.edges.bridge == True, "asset_type"] = RailAssets.BRIDGE
 
     # manually set crs using geopandas rather than snkit to avoid 'init' style proj crs
     # and permit successful CRS deserializiation and methods such as edges.crs.to_epsg()
diff --git a/workflow/transport/create_road_network.py b/workflow/transport/create_road_network.py
index 3a76afc6..8273bbb5 100644
--- a/workflow/transport/create_road_network.py
+++ b/workflow/transport/create_road_network.py
@@ -46,13 +46,13 @@ def clean_edges(edges: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
 
     if "tag_highway" in edges.columns:
         # None -> empty string
-        edges.loc[edges['tag_highway'].isnull(), 'tag_highway'] = ''
+        edges.loc[edges["tag_highway"].isnull(), "tag_highway"] = ""
         # turn the <highway_type>_link entries into <highway_type>
         edges.tag_highway = edges.tag_highway.apply(strip_suffix)
 
     # boolean bridge field from tag_bridges
     if "tag_bridge" in edges.columns:
-        edges['bridge'] = str_to_bool(edges['tag_bridge'])
+        edges["bridge"] = str_to_bool(edges["tag_bridge"])
 
     return edges
 
@@ -160,7 +160,9 @@ def annotate_condition(network: snkit.network.Network) -> snkit.network.Network:
 
     osm_epsg = 4326
 
-    logging.basicConfig(format="%(asctime)s %(process)d %(filename)s %(message)s", level=logging.INFO)
+    logging.basicConfig(
+        format="%(asctime)s %(process)d %(filename)s %(message)s", level=logging.INFO
+    )
 
     # Ignore geopandas parquet implementation warnings
     # NB though that .geoparquet is not the format to use for archiving.
@@ -174,13 +176,19 @@ def annotate_condition(network: snkit.network.Network) -> snkit.network.Network:
 
     # osm_to_pq.py creates these columns but we're not using them, so discard
     edges = edges.drop(
-        [col for col in edges.columns if col.startswith("start_node_") or col.startswith("end_node_")],
-        axis="columns"
+        [
+            col
+            for col in edges.columns
+            if col.startswith("start_node_") or col.startswith("end_node_")
+        ],
+        axis="columns",
     )
 
     # for roads we do not currently use any nodes extracted from OSM (osm_nodes_path)
     logging.info("Creating road network")
-    network = create_network(edges=clean_edges(edges), nodes=None, id_prefix=f"{dataset_name}_{slice_number}")
+    network = create_network(
+        edges=clean_edges(edges), nodes=None, id_prefix=f"{dataset_name}_{slice_number}"
+    )
     logging.info(
         f"Network contains {len(network.edges)} edges and {len(network.nodes)} nodes"
     )
@@ -202,16 +210,30 @@ def annotate_condition(network: snkit.network.Network) -> snkit.network.Network:
     # select and label assets with their type
     # the asset_type is used to later select a damage curve
     # note that order is important here, if an edge is paved, motorway and a bridge, it will be tagged as a bridge only
-    network.edges.loc[network.edges.paved == False, 'asset_type'] = RoadAssets.UNPAVED
-    network.edges.loc[network.edges.paved == True, 'asset_type'] = RoadAssets.PAVED
-    network.edges.loc[network.edges.tag_highway == 'unclassified', 'asset_type'] = RoadAssets.UNCLASSIFIED
-    network.edges.loc[network.edges.tag_highway == 'residential', 'asset_type'] = RoadAssets.RESIDENTIAL
-    network.edges.loc[network.edges.tag_highway == 'tertiary', 'asset_type'] = RoadAssets.TERTIARY
-    network.edges.loc[network.edges.tag_highway == 'secondary', 'asset_type'] = RoadAssets.SECONDARY
-    network.edges.loc[network.edges.tag_highway == 'primary', 'asset_type'] = RoadAssets.PRIMARY
-    network.edges.loc[network.edges.tag_highway == 'trunk', 'asset_type'] = RoadAssets.TRUNK
-    network.edges.loc[network.edges.tag_highway == 'motorway', 'asset_type'] = RoadAssets.MOTORWAY
-    network.edges.loc[network.edges.bridge == True, 'asset_type'] = RoadAssets.BRIDGE
+    network.edges.loc[network.edges.paved == False, "asset_type"] = RoadAssets.UNPAVED
+    network.edges.loc[network.edges.paved == True, "asset_type"] = RoadAssets.PAVED
+    network.edges.loc[network.edges.tag_highway == "unclassified", "asset_type"] = (
+        RoadAssets.UNCLASSIFIED
+    )
+    network.edges.loc[network.edges.tag_highway == "residential", "asset_type"] = (
+        RoadAssets.RESIDENTIAL
+    )
+    network.edges.loc[network.edges.tag_highway == "tertiary", "asset_type"] = (
+        RoadAssets.TERTIARY
+    )
+    network.edges.loc[network.edges.tag_highway == "secondary", "asset_type"] = (
+        RoadAssets.SECONDARY
+    )
+    network.edges.loc[network.edges.tag_highway == "primary", "asset_type"] = (
+        RoadAssets.PRIMARY
+    )
+    network.edges.loc[network.edges.tag_highway == "trunk", "asset_type"] = (
+        RoadAssets.TRUNK
+    )
+    network.edges.loc[network.edges.tag_highway == "motorway", "asset_type"] = (
+        RoadAssets.MOTORWAY
+    )
+    network.edges.loc[network.edges.bridge == True, "asset_type"] = RoadAssets.BRIDGE
 
     logging.info("Writing network to disk")
     network.edges.to_parquet(edges_output_path)
diff --git a/workflow/transport/utils.py b/workflow/transport/utils.py
index 10557d9d..f72f3daa 100644
--- a/workflow/transport/utils.py
+++ b/workflow/transport/utils.py
@@ -60,7 +60,9 @@ def cast(x: Any, *, casting_function: Callable, nullable: bool) -> Any:
             raise ValueError("Couldn't recast to non-nullable value") from casting_error
 
 
-def annotate_country(network: snkit.network.Network, countries: gpd.GeoDataFrame) -> snkit.network.Network:
+def annotate_country(
+    network: snkit.network.Network, countries: gpd.GeoDataFrame
+) -> snkit.network.Network:
     """
     Label network edges and nodes with their country ISO code
 
@@ -93,19 +95,25 @@ def annotate_country(network: snkit.network.Network, countries: gpd.GeoDataFrame
 
     # shorthand for selecting certain column sets
     starting_node_columns = list(nodes.columns.values)
-    desired_node_columns = starting_node_columns + ['iso_a3']
+    desired_node_columns = starting_node_columns + ["iso_a3"]
 
     # spatial join nodes geometries to their containing country, retain only node geometries
     nodes_with_iso_a3 = nodes.sjoin(countries, how="left", predicate="within")
     # drop cruft from merge (i.e. "index_right")
     nodes_with_iso_a3 = nodes_with_iso_a3[desired_node_columns]
     interior_nodes = nodes_with_iso_a3[~nodes_with_iso_a3["iso_a3"].isna()]
-    logging.info(f"Found {len(interior_nodes)} nodes that are within a country geometry")
+    logging.info(
+        f"Found {len(interior_nodes)} nodes that are within a country geometry"
+    )
 
     # for any nodes where sjoin didn't work, drop the iso_a3 and try again with sjoin_nearest
     # reproject to web mercator CRS for this operation
-    exterior_nodes = nodes_with_iso_a3[nodes_with_iso_a3["iso_a3"].isna()].drop(["iso_a3"], axis="columns")
-    exterior_nodes = exterior_nodes.to_crs(WEB_MERC_EPSG).sjoin_nearest(countries.to_crs(WEB_MERC_EPSG))
+    exterior_nodes = nodes_with_iso_a3[nodes_with_iso_a3["iso_a3"].isna()].drop(
+        ["iso_a3"], axis="columns"
+    )
+    exterior_nodes = exterior_nodes.to_crs(WEB_MERC_EPSG).sjoin_nearest(
+        countries.to_crs(WEB_MERC_EPSG)
+    )
     exterior_nodes = exterior_nodes.to_crs(input_node_crs)
     if not exterior_nodes.empty:
         logging.info(
@@ -119,7 +127,7 @@ def annotate_country(network: snkit.network.Network, countries: gpd.GeoDataFrame
         # use the columns we were passed, plus the country code of each node
         nodes[desired_node_columns],
         geometry="geometry",
-        crs=input_node_crs
+        crs=input_node_crs,
     )
 
     # set edge.from_iso_a3 from node.iso_a3 of edge.from_id

From 6f2ebbfae35e31b4d8661c060a009202e094aac8 Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Mon, 22 Apr 2024 13:00:40 +0100
Subject: [PATCH 10/27] Fix CSV format

---
 config/damage_curves/flood/commercial_asia.csv | 18 +++++++++---------
 config/damage_curves/flood/industrial_asia.csv | 18 +++++++++---------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/config/damage_curves/flood/commercial_asia.csv b/config/damage_curves/flood/commercial_asia.csv
index 81222f28..7397aaca 100644
--- a/config/damage_curves/flood/commercial_asia.csv
+++ b/config/damage_curves/flood/commercial_asia.csv
@@ -2,12 +2,12 @@
 # https://publications.jrc.ec.europa.eu/repository/handle/JRC105688
 # table 3-6: Average continental damage function for Asia - commerce.
 inundation_depth_(m),damage_fraction
-0 0
-0.5 0.38
-1 0.54
-1.5 0.66
-2 0.76
-3 0.88
-4 0.94
-5 0.98
-6 1
+0,0
+0.5,0.38
+1,0.54
+1.5,0.66
+2,0.76
+3,0.88
+4,0.94
+5,0.98
+6,1
diff --git a/config/damage_curves/flood/industrial_asia.csv b/config/damage_curves/flood/industrial_asia.csv
index a1f5c09e..8ab62ffa 100644
--- a/config/damage_curves/flood/industrial_asia.csv
+++ b/config/damage_curves/flood/industrial_asia.csv
@@ -2,12 +2,12 @@
 # https://publications.jrc.ec.europa.eu/repository/handle/JRC105688
 # table 3-11: Average continental damage function for Asia - industry.
 inundation_depth_(m),damage_fraction
-0 0
-0.5 0.28
-1 0.48
-1.5 0.63
-2 0.72
-3 0.86
-4 0.91
-5 0.96
-6 1
+0,0
+0.5,0.28
+1,0.48
+1.5,0.63
+2,0.72
+3,0.86
+4,0.91
+5,0.96
+6,1

From 3457a53c8a2c7d22fd91b0eed5d22020492ab76e Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Wed, 5 Jun 2024 15:54:09 +0100
Subject: [PATCH 11/27] WIP exploring process in notebooks

---
 notebooks/capital-stocks.ipynb                | 117 +++
 notebooks/disaggregate-trade-to-adm1.ipynb    | 152 ++++
 notebooks/disaggregate-trade-to-network.ipynb | 352 +++++++++
 notebooks/explore-tha-baci.ipynb              | 428 ++++++++++
 notebooks/windowed-raster.ipynb               | 746 ++++++++++++++++++
 scripts/building_damage.py                    | 292 +++++++
 6 files changed, 2087 insertions(+)
 create mode 100644 notebooks/capital-stocks.ipynb
 create mode 100644 notebooks/disaggregate-trade-to-adm1.ipynb
 create mode 100644 notebooks/disaggregate-trade-to-network.ipynb
 create mode 100644 notebooks/explore-tha-baci.ipynb
 create mode 100644 notebooks/windowed-raster.ipynb
 create mode 100644 scripts/building_damage.py

diff --git a/notebooks/capital-stocks.ipynb b/notebooks/capital-stocks.ipynb
new file mode 100644
index 00000000..e75554a2
--- /dev/null
+++ b/notebooks/capital-stocks.ipynb
@@ -0,0 +1,117 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "217c7e17-6ad0-4299-b20a-efa027b38264",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "818203e3-bbd3-4001-889a-65213e140b7d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "icsd = pandas.read_csv(\"input/capital-stocks/icsd.csv\")\n",
+    "icsd.tail(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fbfa1254-c35b-41a6-b7b2-eaef9a2a3552",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pwt = pandas.read_csv(\"input/capital-stocks/pwt.csv\")\n",
+    "pwt.tail(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9135495e-606f-4353-939c-0647df24ffe4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cwon = pandas.read_csv(\"input/capital-stocks/CWON2021.csv\")\n",
+    "cwon.tail(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6f118d92-876a-4b35-b7bb-22106144a8c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bem = pandas.read_csv(\"input/giri/bem_5x5_valfis_adm0.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "da3c3275-e3d3-40b7-a86f-e659c3d582c6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = bem.set_index(\"GID_0\")\n",
+    "df = df.join(icsd.query('year == 2018').drop(columns=['year', 'country']).set_index('isocode'))\n",
+    "df = df.join(cwon.query('year == 2018').drop(columns=['year', 'wb_name', 'unit']).set_index('wb_code'))\n",
+    "df = df.join(pwt.query('year == 2018').drop(columns=['year', 'country']).set_index('countrycode'))\n",
+    "# df = df.dropna()\n",
+    "df.kppp_rppp = df.kppp_rppp.fillna(0)\n",
+    "\n",
+    "df[\"giri__valfis\"] = (df.res_sum + df.nres_sum) * 1e-9 # convert unit to billion\n",
+    "df[\"cwon__pk\"] = df.pk * 1e-9 # convert unit to billion\n",
+    "df[\"icsd__k\"] = (df.kgov_rppp + df.kpriv_rppp + df.kppp_rppp)  # assume in billions\n",
+    "df[\"pwt__cn\"] = df.cn * 1e-3 # convert from million to billion\n",
+    "\n",
+    "df.loc[[\"THA\", \"PHL\", \"VNM\", \"IDN\"]][[\"giri__valfis\",\"cwon__pk\",\"icsd__k\",\"pwt__cn\"]].plot(kind=\"bar\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1eaae232-0d25-4d62-86ff-e9d457a12e2e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[\"giri_cwon\"] = (df.giri__valfis / df.cwon__pk)\n",
+    "df[\"giri_icsd\"] = (df.giri__valfis / df.icsd__k)\n",
+    "df[\"giri_pwt\"] = (df.giri__valfis / df.pwt__cn)\n",
+    "\n",
+    "df[\"cwon_icsd\"] = (df.cwon__pk / df.icsd__k)\n",
+    "df[\"cwon_pwt\"] = (df.cwon__pk / df.pwt__cn)\n",
+    "\n",
+    "df[[\"giri_cwon\", \"giri_icsd\", \"giri_pwt\", \"cwon_icsd\", \"cwon_pwt\"]].plot.box(ylabel=\"Ratio\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/disaggregate-trade-to-adm1.ipynb b/notebooks/disaggregate-trade-to-adm1.ipynb
new file mode 100644
index 00000000..c02eeb9f
--- /dev/null
+++ b/notebooks/disaggregate-trade-to-adm1.ipynb
@@ -0,0 +1,152 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d00618be-7a19-461e-a8f4-f71eb1c13e77",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! exa -l"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4d009910-9845-49e5-9b0d-8605f090b722",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas\n",
+    "import geopandas\n",
+    "import rasterio"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "03c7a2e5-8c95-441e-aeac-1dd082c24780",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trade_adm0 = pandas.read_csv(\"baci_sector_trade_THA.csv\")\n",
+    "trade_adm0[\"GID_0\"] = \"THA\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "40cbed27-a124-4875-b2fb-c1c4eb8c246a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trade_adm0.groupby(\"sector\").count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "185671b2-9921-4ea7-9fd3-c8a22a9dcab5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gva_adm1_tha = pandas.read_csv(\"DOSE_V2.csv\").query('year == 2018 and GID_0 == \"THA\"')[[\n",
+    "    'GID_0', 'GID_1', 'year', 'pop',\n",
+    "    'ag_grp_pc_usd', 'man_grp_pc_usd', 'serv_grp_pc_usd'\n",
+    "]].dropna()\n",
+    "\n",
+    "for sector in (\"ag\", \"man\", \"serv\"):\n",
+    "    gva_adm1_tha[sector] = gva_adm1_tha[\"pop\"] * gva_adm1_tha[f\"{sector}_grp_pc_usd\"]\n",
+    "\n",
+    "gva_adm1_sector_tha = gva_adm1_tha.drop(columns=[\"GID_0\", \"year\", \"pop\", 'ag_grp_pc_usd', 'man_grp_pc_usd', 'serv_grp_pc_usd']) \\\n",
+    "    .set_index(\"GID_1\")\n",
+    "\n",
+    "gva_proportion_adm1_sector = (gva_adm1_sector_tha / gva_adm1_sector_tha.sum()).rename(columns={\"serv\":\"ser\"}) \\\n",
+    "    .reset_index().melt(id_vars=\"GID_1\", var_name=\"sector\", value_name=\"proportion\")\n",
+    "gva_proportion_adm1_sector[\"GID_0\"] = \"THA\"\n",
+    "gva_proportion_adm1_sector.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ae24c56d-1527-4a05-8d4c-1613f19321f2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trade_adm1 = trade_adm0.merge(gva_proportion_adm1_sector, left_on=(\"GID_0\", \"sector\"), right_on=(\"GID_0\", \"sector\"), how=\"outer\")\n",
+    "trade_adm1.trade_value_thousandUSD *= trade_adm1.proportion\n",
+    "trade_adm1.trade_quantity_tons *= trade_adm1.proportion\n",
+    "trade_adm1 = trade_adm1.drop(columns=[\"GID_0\", \"proportion\"])\n",
+    "\n",
+    "imports_adm1 = trade_adm1.query('import_country_code == \"THA\"').copy()\n",
+    "imports_adm1['export_zone'] = imports_adm1.export_country_code\n",
+    "imports_adm1['import_zone'] = imports_adm1.GID_1\n",
+    "imports_adm1['partner_GID_0'] = imports_adm1.export_country_code\n",
+    "\n",
+    "exports_adm1 = trade_adm1.query('export_country_code == \"THA\"').copy()\n",
+    "exports_adm1['export_zone'] = exports_adm1.GID_1\n",
+    "exports_adm1['import_zone'] = exports_adm1.import_country_code\n",
+    "exports_adm1['partner_GID_0'] = exports_adm1.import_country_code\n",
+    "\n",
+    "trade_adm1 = pandas.concat([exports_adm1, imports_adm1]).rename(columns={\"GID_1\": \"THA_GID_1\"})[[\n",
+    "    'export_country_code',\n",
+    "    'export_zone',\n",
+    "    'import_country_code',\n",
+    "    'import_zone',\n",
+    "    'THA_GID_1',\n",
+    "    'partner_GID_0',\n",
+    "    'sector',\n",
+    "    'trade_value_thousandUSD',\n",
+    "    'trade_quantity_tons',\n",
+    "]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6bb33d3b-ae53-47b9-9dce-403cdf0a2cd5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trade_adm1.to_csv(\"baci_sector_adm1_trade_THA.csv\", index=False)\n",
+    "trade_adm1.head(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5f13eb16-894f-4473-82ef-d76e6d467da3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trade_adm1_total = trade_adm1.drop(columns=['export_country_code','export_zone','import_country_code','import_zone','sector']) \\\n",
+    "    .groupby(['THA_GID_1','partner_GID_0']) \\\n",
+    "    .sum()\n",
+    "trade_adm1_total.to_csv(\"baci_total_adm1_trade_THA.csv\")\n",
+    "trade_adm1_total.head(2)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/disaggregate-trade-to-network.ipynb b/notebooks/disaggregate-trade-to-network.ipynb
new file mode 100644
index 00000000..ac126b43
--- /dev/null
+++ b/notebooks/disaggregate-trade-to-network.ipynb
@@ -0,0 +1,352 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12933702-03c2-4bd6-9991-2e495b37dfe5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import warnings\n",
+    "from glob import glob\n",
+    "from pathlib import Path\n",
+    "\n",
+    "import geopandas\n",
+    "import pandas\n",
+    "from tqdm.notebook import tqdm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1aa9fa5e-c061-406e-a4c7-9abe3ded3f1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "og_results_path = Path(\"~/projects/open-gira/results\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "342c37ad-10e5-4b83-8ffd-cde5f8c4b24e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trade_adm1_total = pandas.read_csv(\"baci_total_adm1_trade_THA.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b14a5abc-393c-482f-95fd-62a699d1d3a8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nodes_path = og_results_path / \"composite_network\" / \"south-east-asia-road\" / \"nodes.gpq\"\n",
+    "nodes = geopandas.read_parquet(nodes_path).query('iso_a3 == \"THA\"')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7ded9b64-d749-43fe-b7da-e0134e9d6607",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if not os.path.exists(\"adm1_tha.gpq\"):\n",
+    "    adm1_path = og_results_path / \"input\" / \"admin-boundaries\" / \"gadm36_levels.gpkg\"\n",
+    "    adm1 = geopandas.read_file(adm1_path, driver=\"pyogrio\", layer=\"level1\")\n",
+    "    adm1_tha = adm1.query('GID_0 == \"THA\"')\n",
+    "    adm1_tha.to_parquet(\"adm1_tha.gpq\")\n",
+    "else:\n",
+    "    adm1_tha = geopandas.read_parquet(\"adm1_tha.gpq\")\n",
+    "\n",
+    "adm1_tha = adm1_tha[['GID_1', 'geometry']].copy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f86a43fa-9c0b-4b52-a49d-fb21e2150786",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def group_to_nodes(basename, nodes):\n",
+    "    out_base = basename.replace('3ss','nodes')\n",
+    "    dfs = []\n",
+    "    warnings.filterwarnings(action='ignore', message=\"Geometry is in a geographic CRS\")\n",
+    "    for fname in tqdm(list(glob(f\"{basename}.parquet/*\"))):\n",
+    "        df = geopandas.read_parquet(fname)\n",
+    "        if not df.empty:\n",
+    "            df_nodes = df.sjoin_nearest(nodes[['id','geometry']], how='left')[['id','data']].groupby('id').sum().reset_index()\n",
+    "            dfs.append(df_nodes)\n",
+    "    data_nodes = pandas.concat(dfs).groupby('id').sum()\n",
+    "    data_nodes = nodes.set_index('id').join(data_nodes)\n",
+    "    data_nodes.to_parquet(f\"{out_base}.parquet\")\n",
+    "    data_nodes.to_file(f\"{out_base}.gpkg\", engine=\"pyogrio\", driver=\"GPKG\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "70d086a9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for sector in (\"ag\", \"man\", \"serv\"):\n",
+    "    if not os.path.exists(f\"gva_{sector}_3ss.parquet\"):\n",
+    "        group_to_nodes(f\"gva_{sector}_3ss\", nodes)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f7e99f3e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gva_man_nodes = geopandas.read_parquet(\"gva_man_nodes.parquet\")\n",
+    "gva_ag_nodes = geopandas.read_parquet(\"gva_ag_nodes.parquet\")\n",
+    "gva_serv_nodes = geopandas.read_parquet(\"gva_serv_nodes.parquet\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fcc20ff2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nodes_adm1 = nodes.sjoin(adm1_tha, how='left')\n",
+    "a = nodes_adm1.dropna()\n",
+    "b = nodes_adm1[nodes_adm1.GID_1.isna()].drop(columns=[\"index_right\", \"GID_1\"]) \\\n",
+    "    .sjoin_nearest(adm1_tha, how='left')\n",
+    "nodes_adm1 = pandas.concat([a,b]).fillna(0)\n",
+    "nodes_adm1.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1ebe8a51",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nodes_with_gva = nodes_adm1.set_index('id') \\\n",
+    "    .join(gva_ag_nodes[['data']]).rename(columns={\"data\": \"gva_ag_usd\"}) \\\n",
+    "    .join(gva_man_nodes[['data']]).rename(columns={\"data\": \"gva_man_usd\"}) \\\n",
+    "    .join(gva_serv_nodes[['data']]).rename(columns={\"data\": \"gva_ser_usd\"})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e7cbe108",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gva_from_nodes_adm1 = nodes_with_gva.fillna(0) \\\n",
+    "    .drop(columns=['geometry', 'iso_a3', 'component_id', 'index_right']) \\\n",
+    "    .groupby('GID_1') \\\n",
+    "    .sum() \\\n",
+    "    .rename(columns={\n",
+    "        \"gva_ag_usd\": \"gva_ag_adm1_usd\",\n",
+    "        \"gva_man_usd\": \"gva_man_adm1_usd\",\n",
+    "        \"gva_ser_usd\": \"gva_ser_adm1_usd\"\n",
+    "    })"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "46119b61",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nodes_weighted = nodes_with_gva.reset_index() \\\n",
+    "    .merge(gva_from_nodes_adm1.reset_index(), on='GID_1', how='left')\n",
+    "\n",
+    "for sector in ('ag', 'man', 'ser'):\n",
+    "    nodes_weighted[f'proportion_of_adm1_{sector}'] = (\n",
+    "        nodes_weighted[f'gva_{sector}_usd']\n",
+    "        / nodes_weighted[f'gva_{sector}_adm1_usd']\n",
+    "    )\n",
+    "\n",
+    "nodes_weighted = nodes_weighted[[\n",
+    "    'id',\n",
+    "    'GID_1',\n",
+    "    'proportion_of_adm1_ag',\n",
+    "    'proportion_of_adm1_man',\n",
+    "    'proportion_of_adm1_ser',\n",
+    "    'geometry'\n",
+    "]] \\\n",
+    "    .copy() \\\n",
+    "    .fillna(0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ede3ff0c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nodes_weighted.to_parquet(\"tha_road_nodes.parquet\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b2595a8d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nodes_weighted = geopandas.read_parquet(\"tha_road_nodes.parquet\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0a8406f6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nodes_weighted.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5d69ab2e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trade_adm1 = pandas.read_csv(\"baci_sector_adm1_trade_THA.csv\") \\\n",
+    "    .rename(columns={\n",
+    "        \"trade_value_thousandUSD\": \"value_kusd_adm1\",\n",
+    "        \"trade_quantity_tons\": \"volume_tons_adm1\"\n",
+    "    })\n",
+    "trade_adm1.head(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fecfeea0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nodes_with_prop = nodes_weighted[[\n",
+    "        'id',\n",
+    "        'GID_1',\n",
+    "        'proportion_of_adm1_ag',\n",
+    "        'proportion_of_adm1_man',\n",
+    "        'proportion_of_adm1_ser'\n",
+    "    ]] \\\n",
+    "    .rename(columns={\n",
+    "        'proportion_of_adm1_ag': 'ag',\n",
+    "        'proportion_of_adm1_man': 'man',\n",
+    "        'proportion_of_adm1_ser': 'ser'\n",
+    "    }) \\\n",
+    "    .melt(id_vars=['id', 'GID_1'], var_name=\"sector\", value_name=\"proportion_of_adm1\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8261f501",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def disaggregate_to_nodes(nodes_with_prop, trade_adm1, adm1):\n",
+    "    trade_nodes = nodes_with_prop.query(f'GID_1 == \"{adm1}\"') \\\n",
+    "        .merge(\n",
+    "            trade_adm1.query(f'THA_GID_1 == \"{adm1}\"'),\n",
+    "            left_on=(\"GID_1\", \"sector\"),\n",
+    "            right_on=(\"THA_GID_1\", \"sector\")\n",
+    "        )\n",
+    "    trade_nodes['value_kusd'] = trade_nodes.proportion_of_adm1 * trade_nodes.value_kusd_adm1\n",
+    "    trade_nodes['volume_tons'] = trade_nodes.proportion_of_adm1 * trade_nodes.volume_tons_adm1\n",
+    "    trade_nodes = trade_nodes[[\n",
+    "        'id',\n",
+    "        'THA_GID_1',\n",
+    "        'partner_GID_0',\n",
+    "        'sector',\n",
+    "        'export_country_code',\n",
+    "        'export_zone',\n",
+    "        'import_country_code',\n",
+    "        'import_zone',\n",
+    "        'value_kusd',\n",
+    "        'volume_tons'\n",
+    "    ]]\n",
+    "    return trade_nodes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "de04a130",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! rm -r trade_nodes.parquet\n",
+    "! rm -r trade_nodes_total.parquet\n",
+    "! mkdir -p trade_nodes.parquet\n",
+    "! mkdir -p trade_nodes_total.parquet"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1469481b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adm1s = trade_adm1.THA_GID_1.unique()\n",
+    "\n",
+    "for adm1 in tqdm(adm1s):\n",
+    "    trade_nodes_adm1 = disaggregate_to_nodes(nodes_with_prop, trade_adm1, adm1)\n",
+    "    os.mkdir(f\"trade_nodes.parquet/GID_1={adm1}\")\n",
+    "    trade_nodes_adm1.to_parquet(f\"trade_nodes.parquet/GID_1={adm1}/data.parquet\")\n",
+    "    trade_nodes_total = trade_nodes_adm1[[\n",
+    "            'id',\n",
+    "            'THA_GID_1',\n",
+    "            'partner_GID_0',\n",
+    "            'value_kusd',\n",
+    "            'volume_tons'\n",
+    "        ]] \\\n",
+    "        .groupby(['id', 'THA_GID_1', 'partner_GID_0']) \\\n",
+    "        .sum() \\\n",
+    "        .reset_index()\n",
+    "\n",
+    "    os.mkdir(f\"trade_nodes_total.parquet/GID_1={adm1}\")\n",
+    "    trade_nodes_total.to_parquet(f\"trade_nodes_total.parquet/GID_1={adm1}/data.parquet\")\n",
+    "    trade_nodes_adm1 = None\n",
+    "    trade_nodes_total = None\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/explore-tha-baci.ipynb b/notebooks/explore-tha-baci.ipynb
new file mode 100644
index 00000000..ec05dfa8
--- /dev/null
+++ b/notebooks/explore-tha-baci.ipynb
@@ -0,0 +1,428 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "147cb065-2113-4621-9fd3-b209db390f47",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import pandas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bbc70051-5828-4328-8f87-6d6e8d9c4e76",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! exa -l"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7baa3bfe-3efe-4df5-913f-25eaf6cc4c75",
+   "metadata": {},
+   "source": [
+    "# Thailand extract from Raghav"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8d84fcef-32d2-45e9-a284-a3ef44792a0d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pandas.read_csv('baci_trade_THA.csv', dtype={'product_code':'str'}) \\\n",
+    "    .drop(columns=[\n",
+    "        't',\n",
+    "        'i',\n",
+    "        'j',\n",
+    "        'export_country_name',\n",
+    "        'import_country_name',\n",
+    "        'refining_stage',\n",
+    "        'export_continent',\n",
+    "        'import_continent'\n",
+    "    ]) \\\n",
+    "    .rename(columns={'product_code': 'product_code_h5'})\n",
+    "\n",
+    "df.export_country_code = df.export_country_code.fillna('TWN')  # Other Asia, n.e.s.\n",
+    "df.import_country_code = df.import_country_code.fillna('TWN')  # Other Asia, n.e.s.\n",
+    "df.trade_quantity_tons = df.trade_quantity_tons.fillna(0)  # code 271600 is electrical energy, recorded as NaN tons\n",
+    "df.product_code_h5 = df.product_code_h5.str.zfill(6)\n",
+    "df.shape, df.dropna().shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f58f9514-989a-4f65-a65b-2a01b3a343c2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "product_codes_hs_to_isic_with_desc = pandas.read_csv('JobID-64_Concordance_HS_to_I3.CSV', encoding='latin-1', dtype='str').rename(columns={\n",
+    "    'HS - Combined  Product Code': 'product_code_hs',\n",
+    "    'HS - Combined  Product Description': 'product_desc_hs',\n",
+    "    'ISIC Revision 3 Product Code': 'isic_code',\n",
+    "    'ISIC Revision 3 Product Description': 'isic_desc'\n",
+    "})\n",
+    "product_codes_hs_to_isic= product_codes_hs_to_isic_with_desc[['product_code_hs','isic_code']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5e5caeaf-f041-4a1f-b138-79ce8cdb6128",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "product_codes_h5_to_h4_with_desc = pandas.read_csv('JobID-92_Concordance_H5_to_H4.CSV', encoding='latin-1', dtype='str').rename(columns={\n",
+    "    'HS 2017 Product Code': 'product_code_h5',\n",
+    "    'HS 2017 Product Description': 'product_desc_h5',\n",
+    "    'HS 2012 Product Code': 'product_code_h4',\n",
+    "    'HS 2012 Product Description': 'product_desc_h4'\n",
+    "})\n",
+    "product_codes_h5_to_h4 = product_codes_h5_to_h4_with_desc[['product_code_h5','product_code_h4']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "27357b96-f231-44e1-b232-808366f43cdd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "product_codes_h4_to_isic = pandas.read_csv('JobID-81_Concordance_H4_to_I3.CSV', encoding='latin-1', dtype='str').rename(columns={\n",
+    "    'HS 2012 Product Code': 'product_code_h4',\n",
+    "    'HS 2012 Product Description': 'product_desc_h4',\n",
+    "    'ISIC Revision 3 Product Code': 'isic_code',\n",
+    "    'ISIC Revision 3 Product Description': 'isic_desc'\n",
+    "})[['product_code_h4','isic_code']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "78c54962-458e-405d-8add-3a4676dd0a03",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "extra_codes_to_isic = pandas.read_csv('extra_concordance.csv', dtype='str')[['product_code','isic_code']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ce97e698-833e-4d74-a506-c7e78119dd7d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df \\\n",
+    "    .merge(product_codes_h5_to_h4, on='product_code_h5', validate='many_to_one', how='left') \\\n",
+    "    .merge(product_codes_h4_to_isic, on='product_code_h4', validate='many_to_one', how='left')\n",
+    "df.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "43c816a7-ab8b-425d-b5f2-20ff60a9d296",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df1 = df[~df.isic_code.isna()].copy()\n",
+    "df2 = df[df.isic_code.isna()] \\\n",
+    "    .drop(columns=['isic_code']) \\\n",
+    "    .copy() \\\n",
+    "    .merge(product_codes_hs_to_isic, left_on='product_code_h5', right_on='product_code_hs', validate='many_to_one', how='left')\n",
+    "df2_clean = df2[~df2.isic_code.isna()].copy()\n",
+    "df3 = df2[df2.isic_code.isna()] \\\n",
+    "    .drop(columns=['isic_code']) \\\n",
+    "    .copy() \\\n",
+    "    .merge(extra_codes_to_isic, left_on='product_code_h5', right_on='product_code', validate='many_to_one', how='left')\n",
+    "df3_clean = df3[~df3.isic_code.isna()].copy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b79154fd-d2e8-4e0c-9a6c-4ed1148a0fa5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len(df1), len(df2), len(df3), len(df3[df3.isic_code.isna()])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "93904422-2f72-4b26-8ad0-7e5de727fb58",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "all = pandas.concat([df1, df2_clean, df3])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0bde8db5-bbc2-4185-8a5a-4b81b7a0315a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "isic_trade = all \\\n",
+    "    [['isic_code', 'export_country_code', 'import_country_code', 'trade_value_thousandUSD', 'trade_quantity_tons']] \\\n",
+    "    .groupby(['isic_code', 'export_country_code', 'import_country_code']) \\\n",
+    "    .sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "819d515e-e8fe-4702-bae3-d149934c5d50",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "isic_trade.trade_value_thousandUSD.sum(), all.trade_value_thousandUSD.sum(), df.trade_value_thousandUSD.sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a1e6f1a5-f317-4f0f-99d5-b28374794115",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "partner_trade = all \\\n",
+    "    [['export_country_code', 'import_country_code', 'trade_value_thousandUSD', 'trade_quantity_tons']] \\\n",
+    "    .groupby(['export_country_code', 'import_country_code']) \\\n",
+    "    .sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d4054bb8-c619-4706-90ce-1a575477587b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# major_partner_trade = partner_trade.query('trade_value_thousandUSD > 1e6')\n",
+    "\n",
+    "tha_exports = partner_trade \\\n",
+    "    .loc[\"THA\"] \\\n",
+    "    .rename(columns={\n",
+    "        \"trade_value_thousandUSD\": \"THA_export_trade_value_thousandUSD\",\n",
+    "        \"trade_quantity_tons\": \"THA_export_trade_quantity_tons\"\n",
+    "    })\n",
+    "tha_exports.index.name = 'country_code'\n",
+    "tha_exports\n",
+    "\n",
+    "tha_imports = partner_trade.reset_index() \\\n",
+    "    .query(\"import_country_code == 'THA'\") \\\n",
+    "    .set_index(\"export_country_code\") \\\n",
+    "    .drop(columns=['import_country_code']) \\\n",
+    "    .rename(columns={\n",
+    "        \"trade_value_thousandUSD\": \"THA_import_trade_value_thousandUSD\",\n",
+    "        \"trade_quantity_tons\": \"THA_import_trade_quantity_tons\"\n",
+    "    })\n",
+    "\n",
+    "tha_imports.index.name = 'country_code'\n",
+    "tha_trade_balance = tha_exports.join(tha_imports)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c84cfe13-0c5b-4eb7-8d6e-87178f95806d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tha_trade_balance.query('THA_export_trade_value_thousandUSD > 1e6')[['THA_import_trade_value_thousandUSD', 'THA_export_trade_value_thousandUSD']].plot(kind='bar')\n",
+    "\n",
+    "plt.savefig('baci_major_trade_balance_tha.png')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "09a9984b-1fc0-483e-a6ff-b973d615eb62",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mat_df = partner_trade.query('trade_value_thousandUSD > 1e7').reset_index() \\\n",
+    "    .pivot(index='export_country_code', columns='import_country_code', values='trade_value_thousandUSD')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fab840c1-a0bf-45db-aafd-32aa28be39a2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots()\n",
+    "im = ax.imshow(mat_df)\n",
+    "cbar = ax.figure.colorbar(im, ax=ax, label='1e7 thousand USD')\n",
+    "\n",
+    "ax.set_xticks(np.arange(len(mat_df.columns)), labels=mat_df.columns, fontsize='small')\n",
+    "ax.set_xlabel('Country importing')\n",
+    "ax.set_yticks(np.arange(len(mat_df.index)), labels=mat_df.index, fontsize='small')\n",
+    "ax.set_ylabel('Country exporting')\n",
+    "# Rotate the tick labels and set their alignment.\n",
+    "plt.setp(ax.get_xticklabels(), rotation=90, ha=\"right\",\n",
+    "         rotation_mode=\"anchor\")\n",
+    "ax.set_title('Value of trade over 1bn USD, 2021')\n",
+    "\n",
+    "plt.savefig('baci_major_trade_tha.png')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "802d0c63-545f-4689-a8ad-46f946bef39e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "isic_structure = pandas.read_csv('ISIC_Rev_3_english_structure.csv')\n",
+    "isic_structure"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a40a7e83-ed87-4d73-afba-a10c33642716",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sector_trade = isic_trade.reset_index() \\\n",
+    "    .merge(isic_structure[['sector', 'code']], left_on='isic_code', right_on='code', how='left') \\\n",
+    "    .drop(columns=['isic_code', 'code']) \\\n",
+    "    .groupby(['sector','export_country_code','import_country_code']) \\\n",
+    "    .sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a6c6bb73-5bd0-4d38-9f0e-69f3ea2fa3a2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "summary = sector_trade.reset_index()\n",
+    "summary['export'] = (summary.export_country_code == 'THA')\n",
+    "summary.drop(columns=['export_country_code','import_country_code']).groupby(['export','sector']).sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "03db483c-9a1b-467b-b3c8-c3fdde2b5cb4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sector_trade.to_csv(\"baci_sector_trade_THA.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "de0f4e85-357d-4dcb-a196-66993f007881",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! exa ~/projects/open-gira/results/input/capital-stocks/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4c57d4b2-ab5a-4042-98d4-e90e90d689ed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! exa /data/incoming/wenz-2023-dose-reported-subnational-output/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bbdbb072-8455-4449-9f37-dcfeb05cef09",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1d18c1d4-d735-424f-8238-21931a7a9f7c",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "93cd370b-4304-4825-9cc5-d2e3c9136b3a",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "98509a43-1101-4c96-b627-cd7939115d75",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "511ab795-b254-44c1-8da1-dd20c89bb0bf",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7914a455-9a1b-4b44-a77b-b75618ccfb90",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5be4c1f2-e110-436b-85f2-042465cf8dec",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/windowed-raster.ipynb b/notebooks/windowed-raster.ipynb
new file mode 100644
index 00000000..06c85d38
--- /dev/null
+++ b/notebooks/windowed-raster.ipynb
@@ -0,0 +1,746 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import geopandas\n",
+    "import pandas\n",
+    "import rasterio\n",
+    "\n",
+    "from affine import Affine\n",
+    "from rasterio import features\n",
+    "from snail.damages import PiecewiseLinearDamageCurve"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "value_150ss_tif = \"input/giri/THA/bem_5x5_valfis_res__THA.tif\"\n",
+    "volume_3ss_tif = \"input/ghsl/THA/GHS_BUILT_V_E2020_GLOBE_R2023A_4326_3ss_V1_0__THA.tif\"\n",
+    "flood_1ss_tif = \"input/footprints/JBA/Raster/TH_FLRF_ChaoPhraya2011_RD_01.tif\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def clip_array(arr, block_size):\n",
+    "    clip_rows = (arr.shape[0] - (arr.shape[0] % block_size))\n",
+    "    clip_cols = (arr.shape[1] - (arr.shape[1] % block_size))\n",
+    "\n",
+    "    clipped = arr[0:clip_rows, 0:clip_cols]\n",
+    "    return clipped\n",
+    "\n",
+    "def resample_sum(arr, block_size):\n",
+    "    nblocks_0 = arr.shape[0] // block_size\n",
+    "    nblocks_1 = arr.shape[1] // block_size\n",
+    "\n",
+    "    blocks = arr.reshape(nblocks_0, block_size, nblocks_1, block_size)\n",
+    "\n",
+    "    return np.sum(blocks, axis=(1, 3))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# d = np.arange(12).reshape((3,4))\n",
+    "# d, resample_sum(d, 2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def repeat_2d(arr, block_size):\n",
+    "    \"\"\"Repeat each element from a 2d array, so each value fills a (block_size x block_size) area\n",
+    "    \"\"\"\n",
+    "    return np.repeat(np.repeat(arr, block_size, axis=0), block_size, axis=1)\n",
+    "\n",
+    "# repeat_2d(d, 2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_ds(ds, band=1, replace_nodata=False, nodata_fill=0):\n",
+    "    data = ds.read(band)\n",
+    "    if replace_nodata:\n",
+    "        data = np.where(data == ds.nodata, nodata_fill, data)\n",
+    "    return data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with rasterio.open(value_150ss_tif) as value_150ss_ds:\n",
+    "    value_150ss_all = read_ds(value_150ss_ds, replace_nodata=True)\n",
+    "\n",
+    "with rasterio.open(volume_3ss_tif) as volume_3ss_ds:\n",
+    "    volume_3ss_all = read_ds(volume_3ss_ds, replace_nodata=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def to_int(a):\n",
+    "    return np.floor(a).astype(int)\n",
+    "\n",
+    "# lon, lat of volume_3ss top left\n",
+    "volume_3ss_all_ul_xy = volume_3ss_ds.transform * (0,0)\n",
+    "# col, row in value_150ss_all, inset one extra\n",
+    "value_150ss_ul_cr = to_int(~value_150ss_ds.transform * (volume_3ss_all_ul_xy)) + 1\n",
+    "# lon, lat of that value_150ss_all pixel - this is our new top left\n",
+    "ul_xy_150ss = value_150ss_ds.transform * value_150ss_ul_cr\n",
+    "# col, row in volume_3ss_all\n",
+    "volume_3ss_ul_cr = to_int(~volume_3ss_ds.transform * ul_xy_150ss)\n",
+    "# lon, lat of that volume_3ss_all pixel - new top left for 3ss purposes (tiny bit offset)\n",
+    "ul_xy_3ss = volume_3ss_ds.transform * volume_3ss_ul_cr\n",
+    "ul_xy_150ss, ul_xy_3ss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Clip out volume array\n",
+    "col_idx, row_idx = volume_3ss_ul_cr\n",
+    "volume_3ss = volume_3ss_all[row_idx:, col_idx:]\n",
+    "volume_3ss = clip_array(volume_3ss, 50)\n",
+    "# Resample volume to coarse-scale, \"sum\"\n",
+    "volume_150ss = resample_sum(volume_3ss, 50)\n",
+    "volume_150ss.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Adapt transform to new top-left and resolution\n",
+    "a,b,c,d,e,f = volume_3ss_ds.transform[:6]\n",
+    "t_150ss = Affine(\n",
+    "    a * 50,\n",
+    "    b,\n",
+    "    ul_xy_150ss[0],\n",
+    "    d,\n",
+    "    e * 50,\n",
+    "    ul_xy_150ss[1]\n",
+    ")\n",
+    "t_3ss = Affine(\n",
+    "    a,\n",
+    "    b,\n",
+    "    ul_xy_3ss[0],\n",
+    "    d,\n",
+    "    e,\n",
+    "    ul_xy_3ss[1]\n",
+    ")\n",
+    "t_150ss, t_3ss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "col_idx, row_idx = value_150ss_ul_cr\n",
+    "ncols, nrows = volume_150ss.shape\n",
+    "value_150ss = value_150ss_all[col_idx:col_idx+ncols, row_idx:row_idx+nrows]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "value_150ss.shape, volume_150ss.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with rasterio.open(\"input/giri/THA/vol_150ss.tif\", 'w',\n",
+    "    driver='GTiff',\n",
+    "    height=volume_150ss.shape[0],\n",
+    "    width=volume_150ss.shape[1],\n",
+    "    count=1,\n",
+    "    dtype='float64',\n",
+    "    crs='+proj=latlong',\n",
+    "    transform=t_150ss\n",
+    ") as ds:\n",
+    "    ds.write(volume_150ss, indexes=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with rasterio.open(\"input/giri/THA/vol_3ss.tif\", 'w',\n",
+    "    driver='GTiff',\n",
+    "    height=volume_3ss.shape[0],\n",
+    "    width=volume_3ss.shape[1],\n",
+    "    count=1,\n",
+    "    dtype=volume_3ss.dtype,\n",
+    "    crs='+proj=latlong',\n",
+    "    transform=t_3ss\n",
+    ") as ds:\n",
+    "    ds.write(volume_3ss, indexes=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if value_150ss.shape != volume_150ss.shape:\n",
+    "    print(\"CHKS\", value_150ss.shape, volume_150ss.shape)\n",
+    "    assert False\n",
+    "\n",
+    "# Calculate value per unit volume\n",
+    "# value_per_volume_150ss = value_150ss / volume_150ss\n",
+    "value_per_volume_150ss = np.divide(value_150ss, volume_150ss, out=np.zeros_like(value_150ss), where=volume_150ss!=0)\n",
+    "# Resample to fine-scale value per volume, \"nearest\"\n",
+    "value_per_volume_3ss = repeat_2d(value_per_volume_150ss, 50)\n",
+    "# Calculate fine-scale value\n",
+    "value_3ss = value_per_volume_3ss * volume_3ss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with rasterio.open(\"input/giri/THA/val_vol_150ss.tif\", 'w',\n",
+    "    driver='GTiff',\n",
+    "    height=value_per_volume_150ss.shape[0],\n",
+    "    width=value_per_volume_150ss.shape[1],\n",
+    "    count=1,\n",
+    "    dtype=value_per_volume_150ss.dtype,\n",
+    "    crs='+proj=latlong',\n",
+    "    transform=t_150ss\n",
+    ") as ds:\n",
+    "    # Write to window\n",
+    "    ds.write(value_per_volume_150ss, indexes=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with rasterio.open(\"input/giri/THA/val_vol_3ss.tif\", 'w',\n",
+    "    driver='GTiff',\n",
+    "    height=value_per_volume_3ss.shape[0],\n",
+    "    width=value_per_volume_3ss.shape[1],\n",
+    "    count=1,\n",
+    "    dtype=value_per_volume_3ss.dtype,\n",
+    "    crs='+proj=latlong',\n",
+    "    transform=t_3ss\n",
+    ") as ds:\n",
+    "    # Write to window\n",
+    "    ds.write(value_per_volume_3ss, indexes=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with rasterio.open(\"input/giri/THA/val_3ss.tif\", 'w',\n",
+    "    driver='GTiff',\n",
+    "    height=value_3ss.shape[0],\n",
+    "    width=value_3ss.shape[1],\n",
+    "    count=1,\n",
+    "    dtype=value_3ss.dtype,\n",
+    "    crs='+proj=latlong',\n",
+    "    transform=t_3ss\n",
+    ") as ds:\n",
+    "    # Write to window\n",
+    "    ds.write(value_3ss, indexes=1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Flood intersection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with rasterio.open(flood_1ss_tif, 'r') as flood_1ss_ds:\n",
+    "    flood_1ss = read_ds(flood_1ss_ds, replace_nodata=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "flood_1ss_ds.transform"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# lon, lat of footprint top left\n",
+    "flood_1ss_ul_xy = flood_1ss_ds.transform * (0,0)\n",
+    "# col, row in value_3ss\n",
+    "t_3ss_ul_cr = to_int(~t_3ss * (flood_1ss_ul_xy))\n",
+    "# lon, lat of that pixel - this is our new top left\n",
+    "footprint_ul_xy_3ss = t_3ss * t_3ss_ul_cr\n",
+    "# col, row in flood_1ss\n",
+    "flood_1ss_ul_cr = to_int(~flood_1ss_ds.transform * footprint_ul_xy_3ss)\n",
+    "# lon, lat of that flood_1ss pixel - new top left for 1ss purposes (tiny bit offset)\n",
+    "ul_xy_1ss = flood_1ss_ds.transform * flood_1ss_ul_cr\n",
+    "flood_1ss_ul_xy, footprint_ul_xy_3ss, ul_xy_1ss\n",
+    "\n",
+    "# TODO should new top left be greater, not less, in both x and y values?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# clip to match coarser array extent\n",
+    "flood_1ss_clipped = clip_array(flood_1ss, 3)\n",
+    "flood_1ss_height, flood_1ss_width = flood_1ss_clipped.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# lon, lat of footprint lower right\n",
+    "flood_1ss_lr_xy = flood_1ss_ds.transform * (flood_1ss_width, flood_1ss_height)\n",
+    "# col, row in value_3ss\n",
+    "t_3ss_lr_cr = to_int(~t_3ss * (flood_1ss_lr_xy))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ulc, ulr = t_3ss_ul_cr\n",
+    "lrc, lrr = t_3ss_lr_cr\n",
+    "footprint_value_3ss = value_3ss[ulr:lrr, ulc:lrc]\n",
+    "\n",
+    "footprint_value_1ss = repeat_2d(footprint_value_3ss, 3) / 9"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "building_flood_depth_damage_curve = PiecewiseLinearDamageCurve.from_csv(\n",
+    "    \"../bundled_data/damage_curves/flood/residential_asia.csv\",\n",
+    "    intensity_col=\"inundation_depth_(m)\",\n",
+    "    damage_col=\"damage_fraction\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if footprint_value_1ss.shape != flood_1ss_clipped.shape:\n",
+    "    print(\"CHKS\", footprint_value_1ss.shape, flood_1ss_clipped.shape)\n",
+    "    assert False\n",
+    "\n",
+    "damage_fraction_1ss = building_flood_depth_damage_curve.damage_fraction(flood_1ss_clipped)\n",
+    "\n",
+    "damage_value_1ss = footprint_value_1ss * damage_fraction_1ss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Adapt transform to new top-left and resolution\n",
+    "a,b,c,d,e,f = flood_1ss_ds.transform[:6]\n",
+    "t_1ss = Affine(\n",
+    "    a,\n",
+    "    b,\n",
+    "    ul_xy_1ss[0],\n",
+    "    d,\n",
+    "    e,\n",
+    "    ul_xy_1ss[1]\n",
+    ")\n",
+    "t_1ss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with rasterio.open(\"input/giri/THA/dmg_frac_1ss.tif\", 'w',\n",
+    "    driver='GTiff',\n",
+    "    height=damage_fraction_1ss.shape[0],\n",
+    "    width=damage_fraction_1ss.shape[1],\n",
+    "    count=1,\n",
+    "    dtype=damage_fraction_1ss.dtype,\n",
+    "    crs='+proj=latlong',\n",
+    "    transform=t_1ss\n",
+    ") as ds:\n",
+    "    ds.write(damage_fraction_1ss, indexes=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with rasterio.open(\"input/giri/THA/dmg_val_1ss.tif\", 'w',\n",
+    "    driver='GTiff',\n",
+    "    height=damage_value_1ss.shape[0],\n",
+    "    width=damage_value_1ss.shape[1],\n",
+    "    count=1,\n",
+    "    dtype=damage_value_1ss.dtype,\n",
+    "    crs='+proj=latlong',\n",
+    "    transform=t_1ss\n",
+    ") as ds:\n",
+    "    ds.write(damage_value_1ss, indexes=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "ADM1 damage values:\n",
+    "\n",
+    "    exactextract \\\n",
+    "        -p ../../admin-boundaries/tha_adm1.shp \\\n",
+    "        -r dmg_val_1ss.tif \\\n",
+    "        -f GID_1 \\\n",
+    "        -s sum \\\n",
+    "        -o dmg_val_1ss.csv\n",
+    "\n",
+    "ADM1 total built volume:\n",
+    "\n",
+    "    exactextract \\\n",
+    "        -p ../../admin-boundaries/tha_adm1.shp \\\n",
+    "        -r ../../ghsl/THA/GHS_BUILT_V_E2020_GLOBE_R2023A_4326_3ss_V1_0__THA.tif \\\n",
+    "        -f GID_1 \\\n",
+    "        -s sum \\\n",
+    "        -o ghs_built_v_3ss.csv\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adm1_vol = pandas.read_csv(\"input/giri/THA/ghs_built_v_3ss.csv\") \\\n",
+    "    .rename(columns={\"sum\": \"built_volume\"})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adm1 = geopandas.read_file(\"input/admin-boundaries/tha_adm1.shp\") \\\n",
+    "    .merge(adm1_vol, on=\"GID_1\")[[\"GID_1\", \"NAME_1\", \"built_volume\", \"geometry\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adm1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with rasterio.open(\"input/ghsl/THA/GHS_BUILT_V_E2020_GLOBE_R2023A_4326_3ss_V1_0__THA.tif\") as vol_3ss_ds:\n",
+    "    vol_3ss = vol_3ss_ds.read(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def rasterize(gdf, column, template_ds):\n",
+    "    return features.rasterize(\n",
+    "        ((f['geometry'], f['properties'][column]) for f in gdf.__geo_interface__['features']),\n",
+    "        out_shape=template_ds.shape,\n",
+    "        transform=template_ds.transform\n",
+    "    )\n",
+    "\n",
+    "vol_adm1_3ss = rasterize(adm1, 'built_volume', vol_3ss_ds)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.imshow(vol_adm1_3ss)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adm1_gva = pandas.read_csv(\"/data/incoming/wenz-2023-dose-reported-subnational-output/DOSE_V2_THA.csv\")\n",
+    "adm1_gva[\"ag_grp\"] = adm1_gva[\"pop\"] * adm1_gva.ag_grp_pc_usd\n",
+    "adm1_gva[\"man_grp\"] = adm1_gva[\"pop\"] * adm1_gva.man_grp_pc_usd\n",
+    "adm1_gva[\"serv_grp\"] = adm1_gva[\"pop\"] * adm1_gva.serv_grp_pc_usd\n",
+    "\n",
+    "adm1_gva = geopandas.read_file(\"input/admin-boundaries/tha_adm1.shp\") \\\n",
+    "    .merge(adm1_gva, on=\"GID_1\")[[\"GID_1\", \"NAME_1\", \"ag_grp\", \"man_grp\", \"serv_grp\", \"geometry\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adm1_gva.drop(columns=\"geometry\").to_csv(\"input/giri/THA/DOSE_V2_THA_rgva.csv\")\n",
+    "adm1_gva.to_file(\"input/giri/THA/DOSE_V2_THA_rgva.gpkg\", driver=\"GPKG\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adm1_gva_ag_3ss = rasterize(adm1_gva, \"ag_grp\", vol_3ss_ds)\n",
+    "adm1_gva_man_3ss = rasterize(adm1_gva, \"man_grp\", vol_3ss_ds)\n",
+    "adm1_gva_serv_3ss = rasterize(adm1_gva, \"serv_grp\", vol_3ss_ds)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def zero_divide(a, b):\n",
+    "    return np.divide(a, b, out=np.zeros_like(a, dtype='float64'), where=(b!=0))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gva_ag_3ss = zero_divide(vol_3ss, vol_adm1_3ss) * adm1_gva_ag_3ss\n",
+    "gva_man_3ss = zero_divide(vol_3ss, vol_adm1_3ss) * adm1_gva_man_3ss\n",
+    "gva_serv_3ss = zero_divide(vol_3ss, vol_adm1_3ss) * adm1_gva_serv_3ss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def write_ds(fname, data, transform):\n",
+    "    with rasterio.open(fname, 'w',\n",
+    "        driver='GTiff',\n",
+    "        height=data.shape[0],\n",
+    "        width=data.shape[1],\n",
+    "        count=1,\n",
+    "        dtype=data.dtype,\n",
+    "        crs='+proj=latlong',\n",
+    "        transform=transform\n",
+    "    ) as ds:\n",
+    "        ds.write(data, indexes=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "write_ds(\"input/giri/THA/gva_ag_3ss.tif\", gva_ag_3ss, vol_3ss_ds.transform)\n",
+    "write_ds(\"input/giri/THA/gva_man_3ss.tif\", gva_man_3ss, vol_3ss_ds.transform)\n",
+    "write_ds(\"input/giri/THA/gva_serv_3ss.tif\", gva_serv_3ss, vol_3ss_ds.transform)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gva_ag_1ss = repeat_2d(gva_ag_3ss, 3) / 9\n",
+    "gva_man_1ss = repeat_2d(gva_man_3ss, 3) / 9\n",
+    "gva_serv_1ss = repeat_2d(gva_serv_3ss, 3) / 9"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TODO figure out transform, check we're on the right grid, write out to files\n",
+    "# TODO compare with damage fraction, write out interruption\n",
+    "# TODO calculate per day, sum back to zonal stats\n",
+    "# TODO check totals (re-aggregate after disaggregation) maybe rescale???"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "a,b,c,d,e,f = vol_3ss_ds.transform[:6]\n",
+    "gva_t_1ss = Affine(\n",
+    "    a / 3,\n",
+    "    b,\n",
+    "    c,\n",
+    "    d,\n",
+    "    e / 3,\n",
+    "    f\n",
+    ")\n",
+    "gva_t_1ss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "write_ds(\"input/giri/THA/gva_ag_1ss.tif\", gva_ag_1ss, gva_t_1ss)\n",
+    "write_ds(\"input/giri/THA/gva_man_1ss.tif\", gva_man_1ss, gva_t_1ss)\n",
+    "write_ds(\"input/giri/THA/gva_serv_1ss.tif\", gva_serv_1ss, gva_t_1ss)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "gdalwarp -te 99.2393056 13.2781945 101.5259723 17.6765279 gva_man_1ss.tif gva_man_1ss_clipped.tif\n",
+    "gdal_calc.py -A nres_dmg_frac_1ss.tif -B gva_man_1ss_clipped.tif --outfile=disruption_man_1ss.tif --calc=\"(A>0.1)*B\"\n",
+    "\n",
+    "\n",
+    "gdalwarp -te 99.2393056 13.2781945 101.5259723 17.6765279 gva_ag_1ss.tif gva_ag_1ss_clipped.tif\n",
+    "gdal_calc.py -A nres_dmg_frac_1ss.tif -B gva_ag_1ss_clipped.tif --outfile=disruption_ag_1ss.tif --calc=\"(A>0.1)*B\"\n",
+    "\n",
+    "\n",
+    "gdalwarp -te 99.2393056 13.2781945 101.5259723 17.6765279 gva_serv_1ss.tif gva_serv_1ss_clipped.tif\n",
+    "gdal_calc.py -A nres_dmg_frac_1ss.tif -B gva_serv_1ss_clipped.tif --outfile=disruption_serv_1ss.tif --calc=\"(A>0.1)*B\"\n",
+    "\n",
+    "\n",
+    "for sector in serv ag man\n",
+    "    exactextract \\\n",
+    "        -p ../../admin-boundaries/tha_adm1.shp \\\n",
+    "        -r disruption_{$sector}_1ss.tif \\\n",
+    "        -f GID_1 \\\n",
+    "        -s sum \\\n",
+    "        -o disruption_{$sector}_1ss.csv\n",
+    "end\n",
+    "\"\"\""
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/scripts/building_damage.py b/scripts/building_damage.py
new file mode 100644
index 00000000..72307cf5
--- /dev/null
+++ b/scripts/building_damage.py
@@ -0,0 +1,292 @@
+import numpy as np
+import rasterio
+
+from affine import Affine
+from snail.damages import PiecewiseLinearDamageCurve
+
+
+def clip_array(arr, block_size):
+    clip_rows = arr.shape[0] - (arr.shape[0] % block_size)
+    clip_cols = arr.shape[1] - (arr.shape[1] % block_size)
+
+    clipped = arr[0:clip_rows, 0:clip_cols]
+    return clipped
+
+
+def resample_sum(arr, block_size):
+    nblocks_0 = arr.shape[0] // block_size
+    nblocks_1 = arr.shape[1] // block_size
+
+    blocks = arr.reshape(nblocks_0, block_size, nblocks_1, block_size)
+
+    return np.sum(blocks, axis=(1, 3))
+
+
+def repeat_2d(arr, block_size):
+    """Repeat each element from a 2d array, so each value fills a (block_size x block_size) area"""
+    return np.repeat(np.repeat(arr, block_size, axis=0), block_size, axis=1)
+
+
+def read_ds(ds, band=1, replace_nodata=False, nodata_fill=0):
+    data = ds.read(band)
+    if replace_nodata:
+        data = np.where(data == ds.nodata, nodata_fill, data)
+    return data
+
+
+def to_int(a):
+    return np.floor(a).astype(int)
+
+
+def main(value_150ss_tif, volume_3ss_tif, flood_1ss_tif, prefix):
+    with rasterio.open(value_150ss_tif) as value_150ss_ds:
+        value_150ss_all = read_ds(value_150ss_ds, replace_nodata=True)
+
+    with rasterio.open(volume_3ss_tif) as volume_3ss_ds:
+        volume_3ss_all = read_ds(volume_3ss_ds, replace_nodata=True)
+
+    # lon, lat of volume_3ss top left
+    volume_3ss_all_ul_xy = volume_3ss_ds.transform * (0, 0)
+    # col, row in value_150ss_all, inset one extra
+    value_150ss_ul_cr = to_int(~value_150ss_ds.transform * (volume_3ss_all_ul_xy)) + 1
+    # lon, lat of that value_150ss_all pixel - this is our new top left
+    ul_xy_150ss = value_150ss_ds.transform * value_150ss_ul_cr
+    # col, row in volume_3ss_all
+    volume_3ss_ul_cr = to_int(~volume_3ss_ds.transform * ul_xy_150ss)
+    # lon, lat of that volume_3ss_all pixel - new top left for 3ss purposes (tiny bit offset)
+    ul_xy_3ss = volume_3ss_ds.transform * volume_3ss_ul_cr
+    ul_xy_150ss, ul_xy_3ss
+
+    # Clip out volume array
+    col_idx, row_idx = volume_3ss_ul_cr
+    volume_3ss = volume_3ss_all[row_idx:, col_idx:]
+    volume_3ss = clip_array(volume_3ss, 50)
+    # Resample volume to coarse-scale, "sum"
+    volume_150ss = resample_sum(volume_3ss, 50)
+    volume_150ss.shape
+
+    # Adapt transform to new top-left and resolution
+    a, b, c, d, e, f = volume_3ss_ds.transform[:6]
+    t_150ss = Affine(a * 50, b, ul_xy_150ss[0], d, e * 50, ul_xy_150ss[1])
+    t_3ss = Affine(a, b, ul_xy_3ss[0], d, e, ul_xy_3ss[1])
+    t_150ss, t_3ss
+
+    col_idx, row_idx = value_150ss_ul_cr
+    ncols, nrows = volume_150ss.shape
+    value_150ss = value_150ss_all[col_idx : col_idx + ncols, row_idx : row_idx + nrows]
+
+    with rasterio.open(
+        f"input/giri/THA/{prefix}_vol_150ss.tif",
+        "w",
+        driver="GTiff",
+        height=volume_150ss.shape[0],
+        width=volume_150ss.shape[1],
+        count=1,
+        dtype="float64",
+        crs="+proj=latlong",
+        transform=t_150ss,
+    ) as ds:
+        ds.write(volume_150ss, indexes=1)
+
+    with rasterio.open(
+        f"input/giri/THA/{prefix}_vol_3ss.tif",
+        "w",
+        driver="GTiff",
+        height=volume_3ss.shape[0],
+        width=volume_3ss.shape[1],
+        count=1,
+        dtype=volume_3ss.dtype,
+        crs="+proj=latlong",
+        transform=t_3ss,
+    ) as ds:
+        ds.write(volume_3ss, indexes=1)
+
+    if value_150ss.shape != volume_150ss.shape:
+        print("CHKS", value_150ss.shape, volume_150ss.shape)
+        assert False
+
+    # Calculate value per unit volume
+    # value_per_volume_150ss = value_150ss / volume_150ss
+    value_per_volume_150ss = np.divide(
+        value_150ss,
+        volume_150ss,
+        out=np.zeros_like(value_150ss),
+        where=volume_150ss != 0,
+    )
+    # Resample to fine-scale value per volume, "nearest"
+    value_per_volume_3ss = repeat_2d(value_per_volume_150ss, 50)
+    # Calculate fine-scale value
+    value_3ss = value_per_volume_3ss * volume_3ss
+
+    with rasterio.open(
+        f"input/giri/THA/{prefix}_val_vol_150ss.tif",
+        "w",
+        driver="GTiff",
+        height=value_per_volume_150ss.shape[0],
+        width=value_per_volume_150ss.shape[1],
+        count=1,
+        dtype=value_per_volume_150ss.dtype,
+        crs="+proj=latlong",
+        transform=t_150ss,
+    ) as ds:
+        # Write to window
+        ds.write(value_per_volume_150ss, indexes=1)
+
+    with rasterio.open(
+        f"input/giri/THA/{prefix}_val_vol_3ss.tif",
+        "w",
+        driver="GTiff",
+        height=value_per_volume_3ss.shape[0],
+        width=value_per_volume_3ss.shape[1],
+        count=1,
+        dtype=value_per_volume_3ss.dtype,
+        crs="+proj=latlong",
+        transform=t_3ss,
+    ) as ds:
+        # Write to window
+        ds.write(value_per_volume_3ss, indexes=1)
+
+    with rasterio.open(
+        f"input/giri/THA/{prefix}_val_3ss.tif",
+        "w",
+        driver="GTiff",
+        height=value_3ss.shape[0],
+        width=value_3ss.shape[1],
+        count=1,
+        dtype=value_3ss.dtype,
+        crs="+proj=latlong",
+        transform=t_3ss,
+    ) as ds:
+        # Write to window
+        ds.write(value_3ss, indexes=1)
+
+    #
+    # Flood intersection
+    #
+    with rasterio.open(flood_1ss_tif, "r") as flood_1ss_ds:
+        flood_1ss = read_ds(flood_1ss_ds, replace_nodata=True)
+
+    # lon, lat of footprint top left
+    flood_1ss_ul_xy = flood_1ss_ds.transform * (0, 0)
+    # col, row in value_3ss
+    t_3ss_ul_cr = to_int(~t_3ss * (flood_1ss_ul_xy))
+    # lon, lat of that pixel - this is our new top left
+    footprint_ul_xy_3ss = t_3ss * t_3ss_ul_cr
+    # col, row in flood_1ss
+    flood_1ss_ul_cr = to_int(~flood_1ss_ds.transform * footprint_ul_xy_3ss)
+    # lon, lat of that flood_1ss pixel - new top left for 1ss purposes (tiny bit offset)
+    ul_xy_1ss = flood_1ss_ds.transform * flood_1ss_ul_cr
+    flood_1ss_ul_xy, footprint_ul_xy_3ss, ul_xy_1ss
+
+    # TODO should new top left be greater, not less, in both x and y values?
+
+    # clip to match coarser array extent
+    flood_1ss_clipped = clip_array(flood_1ss, 3)
+    flood_1ss_height, flood_1ss_width = flood_1ss_clipped.shape
+
+    # lon, lat of footprint lower right
+    flood_1ss_lr_xy = flood_1ss_ds.transform * (flood_1ss_width, flood_1ss_height)
+    # col, row in value_3ss
+    t_3ss_lr_cr = to_int(~t_3ss * (flood_1ss_lr_xy))
+
+    ulc, ulr = t_3ss_ul_cr
+    lrc, lrr = t_3ss_lr_cr
+    footprint_value_3ss = value_3ss[ulr:lrr, ulc:lrc]
+
+    footprint_value_1ss = repeat_2d(footprint_value_3ss, 3) / 9
+
+    if prefix == "res":
+        curve_file = "../bundled_data/damage_curves/flood/residential_asia.csv"
+    else:
+        curve_file = "../bundled_data/damage_curves/flood/commercial_asia.csv"
+
+    building_flood_depth_damage_curve = PiecewiseLinearDamageCurve.from_csv(
+        curve_file,
+        intensity_col="inundation_depth_(m)",
+        damage_col="damage_fraction",
+    )
+
+    if footprint_value_1ss.shape != flood_1ss_clipped.shape:
+        print("CHKS", footprint_value_1ss.shape, flood_1ss_clipped.shape)
+        assert False
+
+    damage_fraction_1ss = building_flood_depth_damage_curve.damage_fraction(
+        flood_1ss_clipped
+    )
+    damage_value_1ss = footprint_value_1ss * damage_fraction_1ss
+
+    # Adapt transform to new top-left and resolution
+    a, b, c, d, e, f = flood_1ss_ds.transform[:6]
+    t_1ss = Affine(a, b, ul_xy_1ss[0], d, e, ul_xy_1ss[1])
+    t_1ss
+
+    with rasterio.open(
+        f"input/giri/THA/{prefix}_dmg_frac_1ss.tif",
+        "w",
+        driver="GTiff",
+        height=damage_fraction_1ss.shape[0],
+        width=damage_fraction_1ss.shape[1],
+        count=1,
+        dtype=damage_fraction_1ss.dtype,
+        crs="+proj=latlong",
+        transform=t_1ss,
+    ) as ds:
+        ds.write(damage_fraction_1ss, indexes=1)
+
+    with rasterio.open(
+        f"input/giri/THA/{prefix}_dmg_val_1ss.tif",
+        "w",
+        driver="GTiff",
+        height=damage_value_1ss.shape[0],
+        width=damage_value_1ss.shape[1],
+        count=1,
+        dtype=damage_value_1ss.dtype,
+        crs="+proj=latlong",
+        transform=t_1ss,
+    ) as ds:
+        ds.write(damage_value_1ss, indexes=1)
+
+    """
+    ADM1 damage values:
+
+        exactextract \
+            -p ../../admin-boundaries/tha_adm1.shp \
+            -r res_dmg_val_1ss.tif \
+            -f GID_1 \
+            -s sum \
+            -o res_dmg_val_1ss.csv
+
+    ADM1 total built volume:
+
+        exactextract \
+            -p ../../admin-boundaries/tha_adm1.shp \
+            -r ../../ghsl/THA/GHS_BUILT_V_E2020_GLOBE_R2023A_4326_3ss_V1_0__THA.tif \
+            -f GID_1 \
+            -s sum \
+            -o ghs_built_v_3ss.csv
+    """
+
+
+if __name__ == "__main__":
+    """
+    # all - nres = res
+    gdal_calc.py \
+        -A GHS_BUILT_V_E2020_GLOBE_R2023A_4326_3ss_V1_0__THA.tif \
+        -B GHS_BUILT_V_NRES_E2020_GLOBE_R2023A_4326_3ss_V1_0__THA.tif \
+        --outfile="ghs_built_v_res_3ss__THA.tif" \
+        --calc="A-B"
+
+    cp GHS_BUILT_V_NRES_E2020_GLOBE_R2023A_4326_3ss_V1_0__THA.tif \
+        ghs_built_v_nres_3ss__THA.tif
+    """
+    value_150ss_tif = "input/giri/THA/bem_5x5_valfis_res__THA.tif"
+    volume_3ss_tif = "input/ghsl/THA/ghs_built_v_res_3ss__THA.tif"
+    flood_1ss_tif = "input/footprints/JBA/Raster/TH_FLRF_ChaoPhraya2011_RD_01.tif"
+    prefix = "res"
+    main(value_150ss_tif, volume_3ss_tif, flood_1ss_tif, prefix)
+
+    value_150ss_tif = "input/giri/THA/bem_5x5_valfis_nres__THA.tif"
+    volume_3ss_tif = "input/ghsl/THA/ghs_built_v_nres_3ss__THA.tif"
+    flood_1ss_tif = "input/footprints/JBA/Raster/TH_FLRF_ChaoPhraya2011_RD_01.tif"
+    prefix = "nres"
+    main(value_150ss_tif, volume_3ss_tif, flood_1ss_tif, prefix)

From d8755b9f796a32dcca54b553ccddbed70e6a1a96 Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Wed, 5 Jun 2024 15:56:08 +0100
Subject: [PATCH 12/27] Ignore sync directories

---
 .gitignore | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.gitignore b/.gitignore
index 6cd74684..99ac05fa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,10 @@ build
 data/*
 results/*
 *.bak
+soge-sync
+push
+pull
+SCRATCH
 
 # snakemake
 .snakemake/

From 99a7ba005927f9964737cb5becb7eade1fedd5be Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Wed, 5 Jun 2024 15:56:27 +0100
Subject: [PATCH 13/27] Add helper package irv-datapkg

---
 environment.yml                               |  3 +-
 notebooks/capital-stocks.ipynb                | 14 ++--
 notebooks/disaggregate-trade-to-adm1.ipynb    | 16 ++---
 notebooks/disaggregate-trade-to-network.ipynb | 40 ++++++------
 notebooks/explore-tha-baci.ipynb              | 64 +++++++++----------
 5 files changed, 69 insertions(+), 68 deletions(-)

diff --git a/environment.yml b/environment.yml
index 0def2eaf..2f668239 100644
--- a/environment.yml
+++ b/environment.yml
@@ -13,7 +13,8 @@ dependencies:
     - snkit==1.8.1        # spatial network cleaning
     # required zenodo_get version not available via conda-forge
     - zenodo_get>=1.5.1   # data download client, zenodo API broken w/ <1.5.1
-    - .                   # open_gira helper package
+    - irv-datapkg==0.1.2  # data package helper
+    - -e .                # open_gira helper package
   - black               # formatter
   - cython==0.29.28     # c--python interface
   - contextily          # basemaps for plots
diff --git a/notebooks/capital-stocks.ipynb b/notebooks/capital-stocks.ipynb
index e75554a2..b9f3b183 100644
--- a/notebooks/capital-stocks.ipynb
+++ b/notebooks/capital-stocks.ipynb
@@ -3,7 +3,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "217c7e17-6ad0-4299-b20a-efa027b38264",
+   "id": "0",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -13,7 +13,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "818203e3-bbd3-4001-889a-65213e140b7d",
+   "id": "1",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -24,7 +24,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "fbfa1254-c35b-41a6-b7b2-eaef9a2a3552",
+   "id": "2",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -35,7 +35,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "9135495e-606f-4353-939c-0647df24ffe4",
+   "id": "3",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -46,7 +46,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6f118d92-876a-4b35-b7bb-22106144a8c0",
+   "id": "4",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -56,7 +56,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "da3c3275-e3d3-40b7-a86f-e659c3d582c6",
+   "id": "5",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -78,7 +78,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "1eaae232-0d25-4d62-86ff-e9d457a12e2e",
+   "id": "6",
    "metadata": {},
    "outputs": [],
    "source": [
diff --git a/notebooks/disaggregate-trade-to-adm1.ipynb b/notebooks/disaggregate-trade-to-adm1.ipynb
index c02eeb9f..3fc4d86a 100644
--- a/notebooks/disaggregate-trade-to-adm1.ipynb
+++ b/notebooks/disaggregate-trade-to-adm1.ipynb
@@ -3,7 +3,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "d00618be-7a19-461e-a8f4-f71eb1c13e77",
+   "id": "0",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -13,7 +13,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4d009910-9845-49e5-9b0d-8605f090b722",
+   "id": "1",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -25,7 +25,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "03c7a2e5-8c95-441e-aeac-1dd082c24780",
+   "id": "2",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -36,7 +36,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "40cbed27-a124-4875-b2fb-c1c4eb8c246a",
+   "id": "3",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -46,7 +46,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "185671b2-9921-4ea7-9fd3-c8a22a9dcab5",
+   "id": "4",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -70,7 +70,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ae24c56d-1527-4a05-8d4c-1613f19321f2",
+   "id": "5",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -105,7 +105,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6bb33d3b-ae53-47b9-9dce-403cdf0a2cd5",
+   "id": "6",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -116,7 +116,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5f13eb16-894f-4473-82ef-d76e6d467da3",
+   "id": "7",
    "metadata": {},
    "outputs": [],
    "source": [
diff --git a/notebooks/disaggregate-trade-to-network.ipynb b/notebooks/disaggregate-trade-to-network.ipynb
index ac126b43..cbefb92d 100644
--- a/notebooks/disaggregate-trade-to-network.ipynb
+++ b/notebooks/disaggregate-trade-to-network.ipynb
@@ -3,7 +3,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "12933702-03c2-4bd6-9991-2e495b37dfe5",
+   "id": "0",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -20,7 +20,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "1aa9fa5e-c061-406e-a4c7-9abe3ded3f1a",
+   "id": "1",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -30,7 +30,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "342c37ad-10e5-4b83-8ffd-cde5f8c4b24e",
+   "id": "2",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -40,7 +40,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "b14a5abc-393c-482f-95fd-62a699d1d3a8",
+   "id": "3",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -51,7 +51,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "7ded9b64-d749-43fe-b7da-e0134e9d6607",
+   "id": "4",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -69,7 +69,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "f86a43fa-9c0b-4b52-a49d-fb21e2150786",
+   "id": "5",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -91,7 +91,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "70d086a9",
+   "id": "6",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -103,7 +103,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "f7e99f3e",
+   "id": "7",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -115,7 +115,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "fcc20ff2",
+   "id": "8",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -130,7 +130,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "1ebe8a51",
+   "id": "9",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -143,7 +143,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e7cbe108",
+   "id": "10",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -161,7 +161,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "46119b61",
+   "id": "11",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -189,7 +189,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ede3ff0c",
+   "id": "12",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -199,7 +199,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "b2595a8d",
+   "id": "13",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -209,7 +209,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "0a8406f6",
+   "id": "14",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -219,7 +219,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5d69ab2e",
+   "id": "15",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -234,7 +234,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "fecfeea0",
+   "id": "16",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -256,7 +256,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "8261f501",
+   "id": "17",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -287,7 +287,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "de04a130",
+   "id": "18",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -300,7 +300,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "1469481b",
+   "id": "19",
    "metadata": {},
    "outputs": [],
    "source": [
diff --git a/notebooks/explore-tha-baci.ipynb b/notebooks/explore-tha-baci.ipynb
index ec05dfa8..e2aabc50 100644
--- a/notebooks/explore-tha-baci.ipynb
+++ b/notebooks/explore-tha-baci.ipynb
@@ -3,7 +3,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "147cb065-2113-4621-9fd3-b209db390f47",
+   "id": "0",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -15,7 +15,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "bbc70051-5828-4328-8f87-6d6e8d9c4e76",
+   "id": "1",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -24,7 +24,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "7baa3bfe-3efe-4df5-913f-25eaf6cc4c75",
+   "id": "2",
    "metadata": {},
    "source": [
     "# Thailand extract from Raghav"
@@ -33,7 +33,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "8d84fcef-32d2-45e9-a284-a3ef44792a0d",
+   "id": "3",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -60,7 +60,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "f58f9514-989a-4f65-a65b-2a01b3a343c2",
+   "id": "4",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -76,7 +76,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5e5caeaf-f041-4a1f-b138-79ce8cdb6128",
+   "id": "5",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -92,7 +92,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "27357b96-f231-44e1-b232-808366f43cdd",
+   "id": "6",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -107,7 +107,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "78c54962-458e-405d-8add-3a4676dd0a03",
+   "id": "7",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -117,7 +117,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ce97e698-833e-4d74-a506-c7e78119dd7d",
+   "id": "8",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -130,7 +130,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "43c816a7-ab8b-425d-b5f2-20ff60a9d296",
+   "id": "9",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -150,7 +150,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "b79154fd-d2e8-4e0c-9a6c-4ed1148a0fa5",
+   "id": "10",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -160,7 +160,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "93904422-2f72-4b26-8ad0-7e5de727fb58",
+   "id": "11",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -170,7 +170,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "0bde8db5-bbc2-4185-8a5a-4b81b7a0315a",
+   "id": "12",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -183,7 +183,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "819d515e-e8fe-4702-bae3-d149934c5d50",
+   "id": "13",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -193,7 +193,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "a1e6f1a5-f317-4f0f-99d5-b28374794115",
+   "id": "14",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -206,7 +206,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "d4054bb8-c619-4706-90ce-1a575477587b",
+   "id": "15",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -237,7 +237,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c84cfe13-0c5b-4eb7-8d6e-87178f95806d",
+   "id": "16",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -249,7 +249,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "09a9984b-1fc0-483e-a6ff-b973d615eb62",
+   "id": "17",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -260,7 +260,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "fab840c1-a0bf-45db-aafd-32aa28be39a2",
+   "id": "18",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -283,7 +283,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "802d0c63-545f-4689-a8ad-46f946bef39e",
+   "id": "19",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -294,7 +294,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "a40a7e83-ed87-4d73-afba-a10c33642716",
+   "id": "20",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -308,7 +308,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "a6c6bb73-5bd0-4d38-9f0e-69f3ea2fa3a2",
+   "id": "21",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -320,7 +320,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "03db483c-9a1b-467b-b3c8-c3fdde2b5cb4",
+   "id": "22",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -330,7 +330,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "de0f4e85-357d-4dcb-a196-66993f007881",
+   "id": "23",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -340,7 +340,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4c57d4b2-ab5a-4042-98d4-e90e90d689ed",
+   "id": "24",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -350,7 +350,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "bbdbb072-8455-4449-9f37-dcfeb05cef09",
+   "id": "25",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -358,7 +358,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "1d18c1d4-d735-424f-8238-21931a7a9f7c",
+   "id": "26",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -366,7 +366,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "93cd370b-4304-4825-9cc5-d2e3c9136b3a",
+   "id": "27",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -374,7 +374,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "98509a43-1101-4c96-b627-cd7939115d75",
+   "id": "28",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -382,7 +382,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "511ab795-b254-44c1-8da1-dd20c89bb0bf",
+   "id": "29",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -390,7 +390,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "7914a455-9a1b-4b44-a77b-b75618ccfb90",
+   "id": "30",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -398,7 +398,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5be4c1f2-e110-436b-85f2-042465cf8dec",
+   "id": "31",
    "metadata": {},
    "outputs": [],
    "source": []

From f46e1d39d0e634bdebab0929596e82ba5fb040f0 Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Wed, 12 Jun 2024 17:06:21 +0100
Subject: [PATCH 14/27] Explore, merge and group mode split trade data

---
 notebooks/explore-mode-split.ipynb | 439 +++++++++++++++++++++++++++++
 1 file changed, 439 insertions(+)
 create mode 100644 notebooks/explore-mode-split.ipynb

diff --git a/notebooks/explore-mode-split.ipynb b/notebooks/explore-mode-split.ipynb
new file mode 100644
index 00000000..cdc3105d
--- /dev/null
+++ b/notebooks/explore-mode-split.ipynb
@@ -0,0 +1,439 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import csv\n",
+    "import re\n",
+    "from pathlib import Path\n",
+    "\n",
+    "import pandas\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "base_path = Path(\"/data/incoming/\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note on columns:\n",
+    "- `v` is value in thousand USD.\n",
+    "- `q` is quantity in metric tons.\n",
+    "- `hs` codes are strings (can start with zero, if they get mangled into integers they can be left-zero-padded to recover the n-digit string, and can extract shorter codes from the first n digits of the longer code). There are different editions every few years. Assuming these are HS2017."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pandas.read_csv(\n",
+    "    base_path / \"verschuur-2024-baci-mode-prediction\" / \"baci_mode_prediction_2020_HS6.csv\",\n",
+    "    usecols=[\n",
+    "        'iso3_O', 'iso3_D', 'v', 'q', 'hs6', 'hs4', 'hs2',\n",
+    "        'v_air_predict', 'v_sea_predict', 'v_land_predict',\n",
+    "        'q_air_predict', 'q_sea_predict', 'q_land_predict'\n",
+    "    ],\n",
+    "    dtype={\n",
+    "        'hs6': 'str',\n",
+    "        'hs4': 'str',\n",
+    "        'hs2': 'str',\n",
+    "    }\n",
+    ")\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.hs6 = df.hs6.str.zfill(6)\n",
+    "df.hs4 = df.hs4.str.zfill(4)\n",
+    "df.hs2 = df.hs2.str.zfill(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# df = df.query(\"iso3_O == 'THA' | iso3_D == 'THA'\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! cd {base_path / \"BACI\"} && wget -q -nc https://unstats.un.org/unsd/classifications/Econ/tables/CPC/CPCv21_HS2017/CPC21-HS2017.csv\n",
+    "! cd {base_path / \"BACI\"} && wget -q -nc https://unstats.un.org/unsd/classifications/Econ/tables/ISIC/ISIC4_CPCv21/isic4-cpc21.txt\n",
+    "! cd {base_path / \"BACI\"} && wget -q -nc https://unstats.un.org/unsd/classifications/Econ/Download/In%20Text/ISIC_Rev_4_english_structure.Txt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "codes_cpc21_hs17 = pandas.read_csv(base_path / \"BACI\"/ \"CPC21-HS2017.csv\", dtype=\"str\") \\\n",
+    "    .rename(columns={\"HS 2017\": \"hs17_6\", \"CPC Ver. 2.1\": \"cpc21\"}) \\\n",
+    "    [[\"hs17_6\", \"cpc21\"]]\n",
+    "codes_cpc21_hs17.hs17_6 = codes_cpc21_hs17.hs17_6.str.replace(\".\", \"\")\n",
+    "codes_isic4_cpc21 = pandas.read_csv(base_path / \"BACI\"/ \"isic4-cpc21.txt\", dtype=\"str\")\\\n",
+    "    .rename(columns={\"ISIC4code\": \"isic_rev4_4\", \"CPC21code\": \"cpc21\"}) \\\n",
+    "    [[\"isic_rev4_4\", \"cpc21\"]]\n",
+    "codes_hs17_isic = codes_cpc21_hs17.merge(codes_isic4_cpc21, on=\"cpc21\", how=\"left\")\n",
+    "codes_hs17_isic[\"isic_rev4_2\"] = codes_hs17_isic.isic_rev4_4.str.slice(stop=2)\n",
+    "codes_hs17_isic[\"hs17_4\"] = codes_hs17_isic.hs17_6.str.slice(stop=4)\n",
+    "codes_hs17_4_isic = codes_hs17_isic.groupby(\"hs17_4\").first().reset_index().drop(columns=[\"hs17_6\", \"cpc21\"])\n",
+    "codes_hs17_isic[\"hs17_2\"] = codes_hs17_isic.hs17_6.str.slice(stop=2)\n",
+    "codes_hs17_2_isic = codes_hs17_isic.groupby(\"hs17_2\").first().reset_index().drop(columns=[\"hs17_6\", \"hs17_4\", \"cpc21\", \"isic_rev4_4\"])\n",
+    "codes_hs17_2_isic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "merge_via_cpc_hs4 = df \\\n",
+    "    .merge(codes_hs17_2_isic, left_on='hs2', right_on='hs17_2', how='left')\n",
+    "merge_via_cpc_hs4_success = merge_via_cpc_hs4[~merge_via_cpc_hs4.isic_rev4_2.isna()].drop(columns=[\"hs17_2\"])\n",
+    "merge_via_cpc_hs4_fail = merge_via_cpc_hs4[merge_via_cpc_hs4.isic_rev4_2.isna()].drop(columns=[\"isic_rev4_2\", \"hs17_2\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "merged = merge_via_cpc_hs4_success\n",
+    "assert len(merged) == len(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(base_path / \"BACI\" / \"ISIC_Rev_4_english_structure.Txt\") as fh:\n",
+    "    r = csv.DictReader(fh)\n",
+    "    sector = \"\"\n",
+    "    letter = \"\"\n",
+    "    two_digit = \"\"\n",
+    "    three_digit = \"\"\n",
+    "    sector = \"\"\n",
+    "    data = []\n",
+    "\n",
+    "    for row in r:\n",
+    "        code = row[\"Code\"]\n",
+    "        desc = row[\"Description\"]\n",
+    "        is_numeric = re.match(\"^[0-9]+$\", code) is not None\n",
+    "        num_digits = len(code)\n",
+    "        if not is_numeric:\n",
+    "            letter = code\n",
+    "            if letter == \"A\":\n",
+    "                sector = \"ag\"\n",
+    "            elif letter in (\"B\", \"C\", \"D\", \"E\", \"F\"):\n",
+    "                sector = \"man\" ## and other industry\n",
+    "            else:\n",
+    "                sector = \"serv\"\n",
+    "        elif num_digits == 2:\n",
+    "            two_digit = code\n",
+    "        elif num_digits == 3:\n",
+    "            three_digit = code\n",
+    "        else:\n",
+    "            assert num_digits == 4\n",
+    "            data.append({\"broad_sector\": sector, \"isic_rev4_1\": letter,\"isic_rev4_2\": two_digit, \"isic_rev4_4\": code, \"isic_rev4_desc\": desc})\n",
+    "\n",
+    "isic4_letter = pandas.DataFrame(data)\n",
+    "isic4_letter.to_csv(base_path / \"BACI\" / \"ISIC_Rev_4_english_structure_clean.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "isic4_letter_2 = isic4_letter[[\"broad_sector\", \"isic_rev4_1\", \"isic_rev4_2\"]].groupby(\"isic_rev4_2\").first()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "merged_with_sector = merged.merge(isic4_letter_2, on=\"isic_rev4_2\", how=\"left\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "merged_with_sector.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "merged_with_sector.to_parquet(\"../results/input/giri/THA/trade/baci_mode_split_matched_codes.parquet\", index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "grouped = merged_with_sector[[\n",
+    "    'iso3_O', 'iso3_D', 'broad_sector', 'v', 'q',\n",
+    "    'v_air_predict', 'v_sea_predict', 'v_land_predict',\n",
+    "    'q_air_predict', 'q_sea_predict', 'q_land_predict'\n",
+    "]] \\\n",
+    "    .groupby(['iso3_O', 'iso3_D']) \\\n",
+    "    .sum() \\\n",
+    "    .reset_index()\n",
+    "\n",
+    "grouped_sector = merged_with_sector[[\n",
+    "    'iso3_O', 'iso3_D', 'broad_sector', 'v', 'q',\n",
+    "    'v_air_predict', 'v_sea_predict', 'v_land_predict',\n",
+    "    'q_air_predict', 'q_sea_predict', 'q_land_predict'\n",
+    "]] \\\n",
+    "    .groupby(['iso3_O', 'iso3_D', 'broad_sector']) \\\n",
+    "    .sum() \\\n",
+    "    .reset_index()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tha_trade = grouped.query(\"iso3_O == 'THA' | iso3_D == 'THA'\")\n",
+    "tha_trade_sector = grouped_sector.query(\"iso3_O == 'THA' | iso3_D == 'THA'\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tmp = tha_trade_sector.copy()\n",
+    "def get_partner(row):\n",
+    "    if row.iso3_O == \"THA\":\n",
+    "        return row.iso3_D\n",
+    "    return row.iso3_O\n",
+    "tmp[\"iso3_partner\"] = tmp.apply(get_partner, axis=1)\n",
+    "tmp.groupby(['iso3_partner']).sum().query(\"(v > 1e9)\") \\\n",
+    "    [['v_air_predict', 'v_sea_predict', 'v_land_predict']] \\\n",
+    "    .plot(kind=\"bar\", title=\"THA trade mode split (value)\", ylabel=\"Value ('000 USD)\")\n",
+    "plt.savefig(\"../results/input/giri/THA/trade/mode_split_THA_v_partners.png\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tmp = tha_trade_sector.copy()\n",
+    "def get_partner(row):\n",
+    "    if row.iso3_O == \"THA\":\n",
+    "        return row.iso3_D\n",
+    "    return row.iso3_O\n",
+    "tmp[\"iso3_partner\"] = tmp.apply(get_partner, axis=1)\n",
+    "tmp.groupby(['iso3_partner']).sum().query(\"(q > 1e6)\") \\\n",
+    "    [['q_air_predict', 'q_sea_predict', 'q_land_predict']] \\\n",
+    "    .plot(kind=\"bar\", title=\"THA trade mode split (volume)\", ylabel=\"Volume (metric tons)\")\n",
+    "plt.savefig(\"../results/input/giri/THA/trade/mode_split_THA_q_partners.png\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for sector in (\"ag\", \"man\"):\n",
+    "    tmp = tha_trade_sector.query(f\"iso3_D != 'THA' & broad_sector == '{sector}' & (v > 1e9)\")\n",
+    "    tmp.set_index('iso3_D') \\\n",
+    "        [['broad_sector', 'v_air_predict', 'v_sea_predict', 'v_land_predict']] \\\n",
+    "        .plot(kind=\"bar\", title=f\"THA '{sector}' sector exports\", ylabel=\"Value ('000 USD)\", xlabel=\"Destination\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tmp = tha_trade.query(\"iso3_O != 'THA' & (v > 1e9)\")\n",
+    "tmp.set_index('iso3_O') \\\n",
+    "    [['v_air_predict', 'v_sea_predict', 'v_land_predict']] \\\n",
+    "    .plot(kind=\"bar\", title=\"THA imports\", ylabel=\"Value ('000 USD)\", xlabel=\"Source\")\n",
+    "\n",
+    "tmp = tha_trade.query(\"iso3_D != 'THA' & (v > 1e9)\")\n",
+    "tmp.set_index('iso3_D') \\\n",
+    "    [['v_air_predict', 'v_sea_predict', 'v_land_predict']] \\\n",
+    "    .plot(kind=\"bar\", title=\"THA exports\", ylabel=\"Value ('000 USD)\", xlabel=\"Destination\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# each way, value\n",
+    "tha_import_split_v = merged_with_sector.query(\"iso3_D == 'THA'\")[[\n",
+    "    'broad_sector',\n",
+    "    'v_air_predict', 'v_sea_predict', 'v_land_predict',\n",
+    "]] \\\n",
+    "    .groupby(['broad_sector']) \\\n",
+    "    .sum() \\\n",
+    "    .reset_index()\n",
+    "\n",
+    "tha_export_split_v = merged_with_sector.query(\"iso3_O == 'THA'\")[[\n",
+    "    'broad_sector',\n",
+    "    'v_air_predict', 'v_sea_predict', 'v_land_predict',\n",
+    "]] \\\n",
+    "    .groupby(['broad_sector']) \\\n",
+    "    .sum() \\\n",
+    "    .reset_index()\n",
+    "\n",
+    "# each way, volume\n",
+    "tha_import_split_q = merged_with_sector.query(\"iso3_D == 'THA'\")[[\n",
+    "    'broad_sector',\n",
+    "    'q_air_predict', 'q_sea_predict', 'q_land_predict'\n",
+    "]] \\\n",
+    "    .groupby(['broad_sector']) \\\n",
+    "    .sum() \\\n",
+    "    .reset_index()\n",
+    "\n",
+    "tha_export_split_q = merged_with_sector.query(\"iso3_O == 'THA'\")[[\n",
+    "    'broad_sector',\n",
+    "    'q_air_predict', 'q_sea_predict', 'q_land_predict'\n",
+    "]] \\\n",
+    "    .groupby(['broad_sector']) \\\n",
+    "    .sum() \\\n",
+    "    .reset_index()\n",
+    "\n",
+    "# either way\n",
+    "tha_trade_split_q = merged_with_sector.query(\"iso3_O == 'THA' | iso3_D == 'THA'\")[[\n",
+    "    'broad_sector',\n",
+    "    'q_air_predict', 'q_sea_predict', 'q_land_predict'\n",
+    "]] \\\n",
+    "    .groupby(['broad_sector']) \\\n",
+    "    .sum() \\\n",
+    "    .reset_index()\n",
+    "\n",
+    "tha_trade_split_v = merged_with_sector.query(\"iso3_O == 'THA' | iso3_D == 'THA'\")[[\n",
+    "    'broad_sector',\n",
+    "    'v_air_predict', 'v_sea_predict', 'v_land_predict',\n",
+    "]] \\\n",
+    "    .groupby(['broad_sector']) \\\n",
+    "    .sum() \\\n",
+    "    .reset_index()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tha_import_split_v.set_index(\"broad_sector\").plot(kind=\"bar\", title=\"THA import mode split (value)\", ylabel=\"Value ('000 USD)\")\n",
+    "tha_export_split_v.set_index(\"broad_sector\").plot(kind=\"bar\", title=\"THA export mode split (value)\", ylabel=\"Value ('000 USD)\")\n",
+    "tha_import_split_q.set_index(\"broad_sector\").plot(kind=\"bar\", title=\"THA import mode split (volume)\", ylabel=\"Volume (metric tons)\")\n",
+    "tha_export_split_q.set_index(\"broad_sector\").plot(kind=\"bar\", title=\"THA export mode split (volume)\", ylabel=\"Volume (metric tons)\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tha_trade_split_v.set_index(\"broad_sector\").plot(kind=\"bar\", title=\"THA trade mode split (value)\", ylabel=\"Value ('000 USD)\")\n",
+    "plt.savefig(\"../results/input/giri/THA/trade/mode_split_THA_v.png\")\n",
+    "tha_trade_split_q.set_index(\"broad_sector\").plot(kind=\"bar\", title=\"THA trade mode split (volume)\", ylabel=\"Volume (metric tons)\")\n",
+    "plt.savefig(\"../results/input/giri/THA/trade/mode_split_THA_q.png\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tha_trade_sector_to_save = tha_trade_sector \\\n",
+    "    .rename(columns={\n",
+    "        \"broad_sector\": \"sector\",\n",
+    "        \"iso3_O\": \"export_country_code\",\n",
+    "        \"iso3_D\": \"import_country_code\",\n",
+    "        \"v\": \"trade_value_thousandUSD\",\n",
+    "        \"q\": \"trade_quantity_tons\",\n",
+    "        \"v_air_predict\": \"trade_value_thousandUSD__air\",\n",
+    "        \"q_air_predict\": \"trade_quantity_tons__air\",\n",
+    "        \"v_land_predict\": \"trade_value_thousandUSD__land\",\n",
+    "        \"q_land_predict\": \"trade_quantity_tons__land\",\n",
+    "        \"v_sea_predict\": \"trade_value_thousandUSD__sea\",\n",
+    "        \"q_sea_predict\": \"trade_quantity_tons__sea\",\n",
+    "    })\n",
+    "\n",
+    "tha_trade_sector_to_save.to_csv(\"../results/input/giri/THA/trade/baci_sector_trade_THA_from_mode_split.csv\", index=False)\n",
+    "tha_trade_sector_to_save.head()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From ac0ffdf58a4ca3c6ad127aa13436d50621ab16ae Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Tue, 1 Oct 2024 09:32:11 +0100
Subject: [PATCH 15/27] Update damage curves

---
 config/damage_curves/flood/rail_railway.csv  | 246 +++++++++----------
 config/damage_curves/flood/road_motorway.csv | 246 +++++++++----------
 config/damage_curves/flood/road_paved.csv    | 246 +++++++++----------
 3 files changed, 369 insertions(+), 369 deletions(-)

diff --git a/config/damage_curves/flood/rail_railway.csv b/config/damage_curves/flood/rail_railway.csv
index 4914e8f5..60c66ebd 100644
--- a/config/damage_curves/flood/rail_railway.csv
+++ b/config/damage_curves/flood/rail_railway.csv
@@ -1,125 +1,125 @@
 # https://zenodo.org/records/10203846
 # Table_D2_Multi-Hazard_Fragility_and_Vulnerability_Curves_V1.0.0.xlsx
-# Nirandjan 2024, V1.0, F_Vuln_Depth, F8.6
-inundation_depth_(m),damage_fraction
-0.00,0.000
-0.05,0.021
-0.10,0.043
-0.15,0.064
-0.20,0.086
-0.25,0.107
-0.30,0.129
-0.35,0.150
-0.40,0.172
-0.45,0.193
-0.50,0.214
-0.55,0.230
-0.60,0.246
-0.65,0.262
-0.70,0.278
-0.75,0.294
-0.80,0.309
-0.85,0.325
-0.90,0.341
-0.95,0.357
-1.00,0.373
-1.05,0.396
-1.10,0.419
-1.15,0.442
-1.20,0.465
-1.25,0.488
-1.30,0.511
-1.35,0.535
-1.40,0.558
-1.45,0.581
-1.50,0.604
-1.55,0.615
-1.60,0.625
-1.65,0.636
-1.70,0.646
-1.75,0.657
-1.80,0.667
-1.85,0.678
-1.90,0.689
-1.95,0.699
-2.00,0.710
-2.05,0.715
-2.10,0.720
-2.15,0.724
-2.20,0.729
-2.25,0.734
-2.30,0.739
-2.35,0.744
-2.40,0.749
-2.45,0.754
-2.50,0.759
-2.55,0.764
-2.60,0.769
-2.65,0.774
-2.70,0.779
-2.75,0.784
-2.80,0.789
-2.85,0.794
-2.90,0.799
-2.95,0.803
-3.00,0.808
-3.05,0.812
-3.10,0.816
-3.15,0.820
-3.20,0.824
-3.25,0.828
-3.30,0.832
-3.35,0.836
-3.40,0.840
-3.45,0.844
-3.50,0.848
-3.55,0.852
-3.60,0.856
-3.65,0.860
-3.70,0.864
-3.75,0.867
-3.80,0.871
-3.85,0.875
-3.90,0.879
-3.95,0.883
-4.00,0.887
-4.05,0.891
-4.10,0.895
-4.15,0.899
-4.20,0.903
-4.25,0.908
-4.30,0.912
-4.35,0.916
-4.40,0.920
-4.45,0.924
-4.50,0.928
-4.55,0.932
-4.60,0.936
-4.65,0.940
-4.70,0.944
-4.75,0.948
-4.80,0.952
-4.85,0.957
-4.90,0.961
-4.95,0.965
-5.00,0.969
-5.05,0.970
-5.10,0.972
-5.15,0.973
-5.20,0.975
-5.25,0.977
-5.30,0.978
-5.35,0.980
-5.40,0.981
-5.45,0.983
-5.50,0.984
-5.55,0.986
-5.60,0.988
-5.65,0.989
-5.70,0.991
-5.75,0.992
-5.80,0.994
-5.85,0.995
-5.90,0.997
-5.95,0.998
-6.00,1.000
+# Nirandjan 2024, V1.0, F_Vuln_Depth, F8.6a,F8.6b
+inundation_depth_(m),damage_fraction,high
+0,0.000,0.000
+0.05,0.015,0.028
+0.1,0.029,0.057
+0.15,0.044,0.085
+0.2,0.058,0.114
+0.25,0.073,0.142
+0.3,0.087,0.170
+0.35,0.102,0.199
+0.4,0.116,0.227
+0.45,0.131,0.255
+0.5,0.145,0.284
+0.55,0.160,0.300
+0.6,0.176,0.316
+0.65,0.191,0.333
+0.7,0.207,0.349
+0.75,0.222,0.365
+0.8,0.237,0.382
+0.85,0.253,0.398
+0.9,0.268,0.414
+0.95,0.283,0.430
+1,0.299,0.447
+1.05,0.309,0.483
+1.1,0.319,0.519
+1.15,0.329,0.556
+1.2,0.339,0.592
+1.25,0.348,0.628
+1.3,0.358,0.665
+1.35,0.368,0.701
+1.4,0.378,0.737
+1.45,0.388,0.773
+1.5,0.398,0.810
+1.55,0.402,0.827
+1.6,0.406,0.844
+1.65,0.410,0.861
+1.7,0.415,0.878
+1.75,0.419,0.895
+1.8,0.423,0.912
+1.85,0.427,0.929
+1.9,0.431,0.946
+1.95,0.435,0.963
+2,0.439,0.980
+2.05,0.447,0.981
+2.1,0.454,0.982
+2.15,0.461,0.983
+2.2,0.469,0.984
+2.25,0.476,0.985
+2.3,0.484,0.986
+2.35,0.491,0.987
+2.4,0.498,0.988
+2.45,0.506,0.989
+2.5,0.513,0.990
+2.55,0.521,0.991
+2.6,0.528,0.992
+2.65,0.535,0.993
+2.7,0.543,0.994
+2.75,0.550,0.995
+2.8,0.558,0.996
+2.85,0.565,0.997
+2.9,0.572,0.998
+2.95,0.580,0.999
+3,0.587,1.000
+3.05,0.595,1.000
+3.1,0.604,1.000
+3.15,0.612,1.000
+3.2,0.620,1.000
+3.25,0.629,1.000
+3.3,0.637,1.000
+3.35,0.645,1.000
+3.4,0.654,1.000
+3.45,0.662,1.000
+3.5,0.670,1.000
+3.55,0.679,1.000
+3.6,0.687,1.000
+3.65,0.695,1.000
+3.7,0.704,1.000
+3.75,0.712,1.000
+3.8,0.720,1.000
+3.85,0.729,1.000
+3.9,0.737,1.000
+3.95,0.745,1.000
+4,0.754,1.000
+4.05,0.761,1.000
+4.1,0.769,1.000
+4.15,0.777,1.000
+4.2,0.784,1.000
+4.25,0.792,1.000
+4.3,0.799,1.000
+4.35,0.807,1.000
+4.4,0.815,1.000
+4.45,0.822,1.000
+4.5,0.830,1.000
+4.55,0.838,1.000
+4.6,0.845,1.000
+4.65,0.853,1.000
+4.7,0.860,1.000
+4.75,0.868,1.000
+4.8,0.876,1.000
+4.85,0.883,1.000
+4.9,0.891,1.000
+4.95,0.899,1.000
+5,0.906,1.000
+5.05,0.911,1.000
+5.1,0.916,1.000
+5.15,0.920,1.000
+5.2,0.925,1.000
+5.25,0.930,1.000
+5.3,0.934,1.000
+5.35,0.939,1.000
+5.4,0.944,1.000
+5.45,0.948,1.000
+5.5,0.953,1.000
+5.55,0.958,1.000
+5.6,0.962,1.000
+5.65,0.967,1.000
+5.7,0.972,1.000
+5.75,0.977,1.000
+5.8,0.981,1.000
+5.85,0.986,1.000
+5.9,0.991,1.000
+5.95,0.995,1.000
+6,1.000,1.000
diff --git a/config/damage_curves/flood/road_motorway.csv b/config/damage_curves/flood/road_motorway.csv
index f7709055..c22ec238 100644
--- a/config/damage_curves/flood/road_motorway.csv
+++ b/config/damage_curves/flood/road_motorway.csv
@@ -1,125 +1,125 @@
 # https://zenodo.org/records/10203846
 # Table_D2_Multi-Hazard_Fragility_and_Vulnerability_Curves_V1.0.0.xlsx
-# Nirandjan 2024, V1.0, F_Vuln_Depth, F7.4
-inundation_depth_(m),damage_fraction
-0.00,0.000
-0.05,0.001
-0.10,0.002
-0.15,0.003
-0.20,0.004
-0.25,0.005
-0.30,0.006
-0.35,0.007
-0.40,0.008
-0.45,0.009
-0.50,0.010
-0.55,0.012
-0.60,0.014
-0.65,0.016
-0.70,0.018
-0.75,0.020
-0.80,0.022
-0.85,0.024
-0.90,0.026
-0.95,0.028
-1.00,0.030
-1.05,0.035
-1.10,0.039
-1.15,0.044
-1.20,0.048
-1.25,0.053
-1.30,0.057
-1.35,0.062
-1.40,0.066
-1.45,0.071
-1.50,0.075
-1.55,0.078
-1.60,0.080
-1.65,0.083
-1.70,0.085
-1.75,0.088
-1.80,0.090
-1.85,0.093
-1.90,0.095
-1.95,0.098
-2.00,0.100
-2.05,0.101
-2.10,0.103
-2.15,0.104
-2.20,0.105
-2.25,0.106
-2.30,0.108
-2.35,0.109
-2.40,0.110
-2.45,0.111
-2.50,0.113
-2.55,0.114
-2.60,0.115
-2.65,0.116
-2.70,0.118
-2.75,0.119
-2.80,0.120
-2.85,0.121
-2.90,0.123
-2.95,0.124
-3.00,0.125
-3.05,0.126
-3.10,0.128
-3.15,0.129
-3.20,0.130
-3.25,0.131
-3.30,0.133
-3.35,0.134
-3.40,0.135
-3.45,0.136
-3.50,0.138
-3.55,0.139
-3.60,0.140
-3.65,0.141
-3.70,0.143
-3.75,0.144
-3.80,0.145
-3.85,0.146
-3.90,0.148
-3.95,0.149
-4.00,0.150
-4.05,0.151
-4.10,0.153
-4.15,0.154
-4.20,0.155
-4.25,0.156
-4.30,0.158
-4.35,0.159
-4.40,0.160
-4.45,0.161
-4.50,0.163
-4.55,0.164
-4.60,0.165
-4.65,0.166
-4.70,0.168
-4.75,0.169
-4.80,0.170
-4.85,0.171
-4.90,0.173
-4.95,0.174
-5.00,0.175
-5.05,0.176
-5.10,0.178
-5.15,0.179
-5.20,0.180
-5.25,0.181
-5.30,0.183
-5.35,0.184
-5.40,0.185
-5.45,0.186
-5.50,0.188
-5.55,0.189
-5.60,0.190
-5.65,0.191
-5.70,0.193
-5.75,0.194
-5.80,0.195
-5.85,0.196
-5.90,0.198
-5.95,0.199
-6.00,0.200
+# Nirandjan 2024, V1.0, F_Vuln_Depth, F7.4,F7.5
+inundation_depth_(m),damage_fraction,high flow
+0,0,0.000
+0.05,0.001,0.002
+0.1,0.002,0.004
+0.15,0.003,0.006
+0.2,0.004,0.008
+0.25,0.005,0.010
+0.3,0.006,0.012
+0.35,0.007,0.014
+0.4,0.008,0.016
+0.45,0.009,0.018
+0.5,0.01,0.020
+0.55,0.012,0.024
+0.6,0.014,0.028
+0.65,0.016,0.032
+0.7,0.018,0.036
+0.75,0.02,0.040
+0.8,0.022,0.044
+0.85,0.024,0.048
+0.9,0.026,0.052
+0.95,0.028,0.056
+1,0.03,0.060
+1.05,0.035,0.064
+1.1,0.039,0.068
+1.15,0.044,0.072
+1.2,0.048,0.076
+1.25,0.053,0.080
+1.3,0.057,0.084
+1.35,0.062,0.088
+1.4,0.066,0.092
+1.45,0.071,0.096
+1.5,0.075,0.100
+1.55,0.078,0.102
+1.6,0.08,0.104
+1.65,0.083,0.106
+1.7,0.085,0.108
+1.75,0.088,0.110
+1.8,0.09,0.112
+1.85,0.093,0.114
+1.9,0.095,0.116
+1.95,0.098,0.118
+2,0.1,0.120
+2.05,0.101,0.121
+2.1,0.103,0.123
+2.15,0.104,0.124
+2.2,0.105,0.125
+2.25,0.106,0.126
+2.3,0.108,0.128
+2.35,0.109,0.129
+2.4,0.11,0.130
+2.45,0.111,0.131
+2.5,0.113,0.133
+2.55,0.114,0.134
+2.6,0.115,0.135
+2.65,0.116,0.136
+2.7,0.118,0.138
+2.75,0.119,0.139
+2.8,0.12,0.140
+2.85,0.121,0.141
+2.9,0.123,0.143
+2.95,0.124,0.144
+3,0.125,0.145
+3.05,0.126,0.146
+3.1,0.128,0.148
+3.15,0.129,0.149
+3.2,0.13,0.150
+3.25,0.131,0.151
+3.3,0.133,0.153
+3.35,0.134,0.154
+3.4,0.135,0.155
+3.45,0.136,0.156
+3.5,0.138,0.158
+3.55,0.139,0.159
+3.6,0.14,0.160
+3.65,0.141,0.161
+3.7,0.143,0.163
+3.75,0.144,0.164
+3.8,0.145,0.165
+3.85,0.146,0.166
+3.9,0.148,0.168
+3.95,0.149,0.169
+4,0.15,0.170
+4.05,0.151,0.171
+4.1,0.153,0.173
+4.15,0.154,0.174
+4.2,0.155,0.175
+4.25,0.156,0.176
+4.3,0.158,0.178
+4.35,0.159,0.179
+4.4,0.16,0.180
+4.45,0.161,0.181
+4.5,0.163,0.183
+4.55,0.164,0.184
+4.6,0.165,0.185
+4.65,0.166,0.186
+4.7,0.168,0.188
+4.75,0.169,0.189
+4.8,0.17,0.190
+4.85,0.171,0.191
+4.9,0.173,0.193
+4.95,0.174,0.194
+5,0.175,0.195
+5.05,0.176,0.196
+5.1,0.178,0.198
+5.15,0.179,0.199
+5.2,0.18,0.200
+5.25,0.181,0.201
+5.3,0.183,0.203
+5.35,0.184,0.204
+5.4,0.185,0.205
+5.45,0.186,0.206
+5.5,0.188,0.208
+5.55,0.189,0.209
+5.6,0.19,0.210
+5.65,0.191,0.211
+5.7,0.193,0.213
+5.75,0.194,0.214
+5.8,0.195,0.215
+5.85,0.196,0.216
+5.9,0.198,0.218
+5.95,0.199,0.219
+6,0.2,0.220
diff --git a/config/damage_curves/flood/road_paved.csv b/config/damage_curves/flood/road_paved.csv
index eb549f3f..b8538bf0 100644
--- a/config/damage_curves/flood/road_paved.csv
+++ b/config/damage_curves/flood/road_paved.csv
@@ -1,125 +1,125 @@
 # https://zenodo.org/records/10203846
 # Table_D2_Multi-Hazard_Fragility_and_Vulnerability_Curves_V1.0.0.xlsx
-# Nirandjan 2024, V1.0, F_Vuln_Depth, F7.2a
-inundation_depth_(m),damage_fraction
-0.00,0.000
-0.05,0.015
-0.10,0.029
-0.15,0.044
-0.20,0.058
-0.25,0.073
-0.30,0.087
-0.35,0.102
-0.40,0.116
-0.45,0.131
-0.50,0.145
-0.55,0.160
-0.60,0.176
-0.65,0.191
-0.70,0.207
-0.75,0.222
-0.80,0.237
-0.85,0.253
-0.90,0.268
-0.95,0.283
-1.00,0.299
-1.05,0.309
-1.10,0.319
-1.15,0.329
-1.20,0.339
-1.25,0.348
-1.30,0.358
-1.35,0.368
-1.40,0.378
-1.45,0.388
-1.50,0.398
-1.55,0.402
-1.60,0.406
-1.65,0.410
-1.70,0.415
-1.75,0.419
-1.80,0.423
-1.85,0.427
-1.90,0.431
-1.95,0.435
-2.00,0.439
-2.05,0.447
-2.10,0.454
-2.15,0.461
-2.20,0.469
-2.25,0.476
-2.30,0.484
-2.35,0.491
-2.40,0.498
-2.45,0.506
-2.50,0.513
-2.55,0.521
-2.60,0.528
-2.65,0.535
-2.70,0.543
-2.75,0.550
-2.80,0.558
-2.85,0.565
-2.90,0.572
-2.95,0.580
-3.00,0.587
-3.05,0.595
-3.10,0.604
-3.15,0.612
-3.20,0.620
-3.25,0.629
-3.30,0.637
-3.35,0.645
-3.40,0.654
-3.45,0.662
-3.50,0.670
-3.55,0.679
-3.60,0.687
-3.65,0.695
-3.70,0.704
-3.75,0.712
-3.80,0.720
-3.85,0.729
-3.90,0.737
-3.95,0.745
-4.00,0.754
-4.05,0.761
-4.10,0.769
-4.15,0.777
-4.20,0.784
-4.25,0.792
-4.30,0.799
-4.35,0.807
-4.40,0.815
-4.45,0.822
-4.50,0.830
-4.55,0.838
-4.60,0.845
-4.65,0.853
-4.70,0.860
-4.75,0.868
-4.80,0.876
-4.85,0.883
-4.90,0.891
-4.95,0.899
-5.00,0.906
-5.05,0.911
-5.10,0.916
-5.15,0.920
-5.20,0.925
-5.25,0.930
-5.30,0.934
-5.35,0.939
-5.40,0.944
-5.45,0.948
-5.50,0.953
-5.55,0.958
-5.60,0.962
-5.65,0.967
-5.70,0.972
-5.75,0.977
-5.80,0.981
-5.85,0.986
-5.90,0.991
-5.95,0.995
-6.00,1.000
+# Nirandjan 2024, V1.0, F_Vuln_Depth, F7.2a,F7.2b,F7.2
+inundation_depth_(m),damage_fraction,upper,central
+0,0,0,0.000
+0.05,0.015,0.028,0.021
+0.1,0.029,0.057,0.043
+0.15,0.044,0.085,0.064
+0.2,0.058,0.114,0.086
+0.25,0.073,0.142,0.107
+0.3,0.087,0.17,0.129
+0.35,0.102,0.199,0.150
+0.4,0.116,0.227,0.172
+0.45,0.131,0.255,0.193
+0.5,0.145,0.284,0.214
+0.55,0.16,0.3,0.230
+0.6,0.176,0.316,0.246
+0.65,0.191,0.333,0.262
+0.7,0.207,0.349,0.278
+0.75,0.222,0.365,0.294
+0.8,0.237,0.382,0.309
+0.85,0.253,0.398,0.325
+0.9,0.268,0.414,0.341
+0.95,0.283,0.43,0.357
+1,0.299,0.447,0.373
+1.05,0.309,0.483,0.396
+1.1,0.319,0.519,0.419
+1.15,0.329,0.556,0.442
+1.2,0.339,0.592,0.465
+1.25,0.348,0.628,0.488
+1.3,0.358,0.665,0.511
+1.35,0.368,0.701,0.535
+1.4,0.378,0.737,0.558
+1.45,0.388,0.773,0.581
+1.5,0.398,0.81,0.604
+1.55,0.402,0.827,0.615
+1.6,0.406,0.844,0.625
+1.65,0.41,0.861,0.636
+1.7,0.415,0.878,0.646
+1.75,0.419,0.895,0.657
+1.8,0.423,0.912,0.667
+1.85,0.427,0.929,0.678
+1.9,0.431,0.946,0.689
+1.95,0.435,0.963,0.699
+2,0.439,0.98,0.710
+2.05,0.447,0.981,0.715
+2.1,0.454,0.982,0.720
+2.15,0.461,0.983,0.724
+2.2,0.469,0.984,0.729
+2.25,0.476,0.985,0.734
+2.3,0.484,0.986,0.739
+2.35,0.491,0.987,0.744
+2.4,0.498,0.988,0.749
+2.45,0.506,0.989,0.754
+2.5,0.513,0.99,0.759
+2.55,0.521,0.991,0.764
+2.6,0.528,0.992,0.769
+2.65,0.535,0.993,0.774
+2.7,0.543,0.994,0.779
+2.75,0.55,0.995,0.784
+2.8,0.558,0.996,0.789
+2.85,0.565,0.997,0.794
+2.9,0.572,0.998,0.799
+2.95,0.58,0.999,0.803
+3,0.587,1,0.808
+3.05,0.595,1,0.812
+3.1,0.604,1,0.816
+3.15,0.612,1,0.820
+3.2,0.62,1,0.824
+3.25,0.629,1,0.828
+3.3,0.637,1,0.832
+3.35,0.645,1,0.836
+3.4,0.654,1,0.840
+3.45,0.662,1,0.844
+3.5,0.67,1,0.848
+3.55,0.679,1,0.852
+3.6,0.687,1,0.856
+3.65,0.695,1,0.860
+3.7,0.704,1,0.864
+3.75,0.712,1,0.867
+3.8,0.72,1,0.871
+3.85,0.729,1,0.875
+3.9,0.737,1,0.879
+3.95,0.745,1,0.883
+4,0.754,1,0.887
+4.05,0.761,1,0.891
+4.1,0.769,1,0.895
+4.15,0.777,1,0.899
+4.2,0.784,1,0.903
+4.25,0.792,1,0.908
+4.3,0.799,1,0.912
+4.35,0.807,1,0.916
+4.4,0.815,1,0.920
+4.45,0.822,1,0.924
+4.5,0.83,1,0.928
+4.55,0.838,1,0.932
+4.6,0.845,1,0.936
+4.65,0.853,1,0.940
+4.7,0.86,1,0.944
+4.75,0.868,1,0.948
+4.8,0.876,1,0.952
+4.85,0.883,1,0.957
+4.9,0.891,1,0.961
+4.95,0.899,1,0.965
+5,0.906,1,0.969
+5.05,0.911,1,0.970
+5.1,0.916,1,0.972
+5.15,0.92,1,0.973
+5.2,0.925,1,0.975
+5.25,0.93,1,0.977
+5.3,0.934,1,0.978
+5.35,0.939,1,0.980
+5.4,0.944,1,0.981
+5.45,0.948,1,0.983
+5.5,0.953,1,0.984
+5.55,0.958,1,0.986
+5.6,0.962,1,0.988
+5.65,0.967,1,0.989
+5.7,0.972,1,0.991
+5.75,0.977,1,0.992
+5.8,0.981,1,0.994
+5.85,0.986,1,0.995
+5.9,0.991,1,0.997
+5.95,0.995,1,0.998
+6,1,1,1.000

From 40b28f861cf67e82924082ac6d84c735cc364db1 Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Tue, 1 Oct 2024 09:39:38 +0100
Subject: [PATCH 16/27] WIP capital stocks, activity and trade disaggregation

---
 notebooks/capital-stocks.ipynb             |  18 ++-
 notebooks/disaggregate-trade-to-adm1.ipynb | 105 +++++++++++--
 notebooks/plot-damage-curve.ipynb          | 170 +++++++++++++++++++++
 notebooks/summarise-total.ipynb            | 110 +++++++++++++
 notebooks/windowed-raster.ipynb            |  96 ++++++++++--
 5 files changed, 472 insertions(+), 27 deletions(-)
 create mode 100644 notebooks/plot-damage-curve.ipynb
 create mode 100644 notebooks/summarise-total.ipynb

diff --git a/notebooks/capital-stocks.ipynb b/notebooks/capital-stocks.ipynb
index b9f3b183..5bdd3125 100644
--- a/notebooks/capital-stocks.ipynb
+++ b/notebooks/capital-stocks.ipynb
@@ -17,7 +17,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "icsd = pandas.read_csv(\"input/capital-stocks/icsd.csv\")\n",
+    "icsd = pandas.read_csv(\"../results/input/capital-stocks/icsd.csv\")\n",
     "icsd.tail(2)"
    ]
   },
@@ -28,7 +28,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pwt = pandas.read_csv(\"input/capital-stocks/pwt.csv\")\n",
+    "pwt = pandas.read_csv(\"../results/input/capital-stocks/pwt.csv\")\n",
     "pwt.tail(2)"
    ]
   },
@@ -39,7 +39,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cwon = pandas.read_csv(\"input/capital-stocks/CWON2021.csv\")\n",
+    "cwon = pandas.read_csv(\"../results/input/capital-stocks/CWON2021.csv\")\n",
     "cwon.tail(2)"
    ]
   },
@@ -50,7 +50,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "bem = pandas.read_csv(\"input/giri/bem_5x5_valfis_adm0.csv\")"
+    "bem = pandas.read_csv(\"../results/input/giri/bem_5x5_valfis_adm0.csv\")"
    ]
   },
   {
@@ -91,6 +91,16 @@
     "\n",
     "df[[\"giri_cwon\", \"giri_icsd\", \"giri_pwt\", \"cwon_icsd\", \"cwon_pwt\"]].plot.box(ylabel=\"Ratio\")"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[[\"giri__valfis\", \"cwon__pk\", \"res_sum\", \"nres_sum\"]].loc[\"THA\"]"
+   ]
   }
  ],
  "metadata": {
diff --git a/notebooks/disaggregate-trade-to-adm1.ipynb b/notebooks/disaggregate-trade-to-adm1.ipynb
index 3fc4d86a..cd43eb1b 100644
--- a/notebooks/disaggregate-trade-to-adm1.ipynb
+++ b/notebooks/disaggregate-trade-to-adm1.ipynb
@@ -7,7 +7,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "! exa -l"
+    "from pathlib import Path\n",
+    "base_dir = Path(\"../results/input/giri/THA/trade\")"
    ]
   },
   {
@@ -17,9 +18,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pandas\n",
-    "import geopandas\n",
-    "import rasterio"
+    "! ls -lah {base_dir}"
    ]
   },
   {
@@ -29,8 +28,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "trade_adm0 = pandas.read_csv(\"baci_sector_trade_THA.csv\")\n",
-    "trade_adm0[\"GID_0\"] = \"THA\""
+    "import pandas"
    ]
   },
   {
@@ -40,7 +38,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "trade_adm0.groupby(\"sector\").count()"
+    "trade_adm0 = pandas.read_csv(base_dir / \"baci_sector_trade_THA.csv\")\n",
+    "trade_adm0[\"GID_0\"] = \"THA\""
    ]
   },
   {
@@ -50,17 +49,97 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "gva_adm1_tha = pandas.read_csv(\"DOSE_V2.csv\").query('year == 2018 and GID_0 == \"THA\"')[[\n",
+    "trade_adm0.groupby(\"sector\").count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adm0_codes = sorted(list(set(trade_adm0.export_country_code) | set(trade_adm0.import_country_code)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gva_adm1_all = pandas.read_csv(base_dir / \"DOSE_V2.csv\")\n",
+    "gva_adm1 = gva_adm1_all.query(f'year == 2018 and GID_0 in {adm0_codes}')[[\n",
     "    'GID_0', 'GID_1', 'year', 'pop',\n",
     "    'ag_grp_pc_usd', 'man_grp_pc_usd', 'serv_grp_pc_usd'\n",
     "]].dropna()\n",
     "\n",
     "for sector in (\"ag\", \"man\", \"serv\"):\n",
-    "    gva_adm1_tha[sector] = gva_adm1_tha[\"pop\"] * gva_adm1_tha[f\"{sector}_grp_pc_usd\"]\n",
+    "    gva_adm1[sector] = gva_adm1[\"pop\"] * gva_adm1[f\"{sector}_grp_pc_usd\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len(set(adm0_codes) - set(gva_adm1.GID_0.unique())), len(gva_adm1.GID_0.unique()), len(adm0_codes)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gva_adm1_all[['GID_0','country','year']].groupby(\"GID_0\").max()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wenz_codes = set(gva_adm1.GID_0.unique())\n",
+    "adm_codes_df = pandas.DataFrame({'GID_0': adm0_codes})\n",
+    "adm_codes_df['ADM1_available_in_wenz'] = adm_codes_df.GID_0.isin(wenz_codes)\n",
+    "adm_codes_df.to_csv(base_dir / \"gdp_adm1_availability_for_THA_trade_partners.csv\", index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "10",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gva_adm1.head(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "11",
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "\n",
     "gva_adm1_sector_tha = gva_adm1_tha.drop(columns=[\"GID_0\", \"year\", \"pop\", 'ag_grp_pc_usd', 'man_grp_pc_usd', 'serv_grp_pc_usd']) \\\n",
     "    .set_index(\"GID_1\")\n",
     "\n",
+    "TODO\n",
+    "# population-weighted (even gdp pc) for gap-filling\n",
+    "# downscale partner O/Ds to ADM1\n",
+    "# find weighted centroid to use as representative node\n",
+    "# global ADM1 land routing grid\n",
+    "TODO\n",
+    "\n",
     "gva_proportion_adm1_sector = (gva_adm1_sector_tha / gva_adm1_sector_tha.sum()).rename(columns={\"serv\":\"ser\"}) \\\n",
     "    .reset_index().melt(id_vars=\"GID_1\", var_name=\"sector\", value_name=\"proportion\")\n",
     "gva_proportion_adm1_sector[\"GID_0\"] = \"THA\"\n",
@@ -70,7 +149,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5",
+   "id": "12",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -105,7 +184,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6",
+   "id": "13",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -116,7 +195,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "7",
+   "id": "14",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -144,7 +223,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.14"
+   "version": "3.10.13"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/plot-damage-curve.ipynb b/notebooks/plot-damage-curve.ipynb
new file mode 100644
index 00000000..c4cec582
--- /dev/null
+++ b/notebooks/plot-damage-curve.ipynb
@@ -0,0 +1,170 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas\n",
+    "import matplotlib.pyplot as plt\n",
+    "from snail.damages import PiecewiseLinearDamageCurve"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "res = PiecewiseLinearDamageCurve.from_csv(\n",
+    "    \"../bundled_data/damage_curves/flood/residential_asia.csv\",\n",
+    "    intensity_col=\"inundation_depth_(m)\",\n",
+    "    damage_col=\"damage_fraction\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nres = PiecewiseLinearDamageCurve.from_csv(\n",
+    "    \"../bundled_data/damage_curves/flood/commercial_asia.csv\",\n",
+    "    intensity_col=\"inundation_depth_(m)\",\n",
+    "    damage_col=\"damage_fraction\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nres.damage_fraction(0.15)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots()\n",
+    "df = pandas.DataFrame({'depth': nres.intensity, 'Residential (3-2)': res.damage, 'Commercial (3-6)': nres.damage}).set_index('depth')\n",
+    "df.plot(ax=ax)\n",
+    "ax.set_ylabel('Damage Fraction')\n",
+    "ax.set_xlabel('Inundation Depth (m)')\n",
+    "ax.set_title(\"\"\"Average continental damage functions for flooding\n",
+    "of buildings in Asia, Huizinga et al. (2017)\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig.savefig('../bundled_data/damage_curves/asia-buildings.png')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "paved = PiecewiseLinearDamageCurve.from_csv(\n",
+    "    \"../bundled_data/damage_curves/flood/road_paved.csv\",\n",
+    "    intensity_col=\"inundation_depth_(m)\",\n",
+    "    damage_col=\"central\")\n",
+    "lower = PiecewiseLinearDamageCurve.from_csv(\n",
+    "    \"../bundled_data/damage_curves/flood/road_paved.csv\",\n",
+    "    intensity_col=\"inundation_depth_(m)\",\n",
+    "    damage_col=\"damage_fraction\")\n",
+    "upper = PiecewiseLinearDamageCurve.from_csv(\n",
+    "    \"../bundled_data/damage_curves/flood/road_paved.csv\",\n",
+    "    intensity_col=\"inundation_depth_(m)\",\n",
+    "    damage_col=\"upper\")\n",
+    "motorway = PiecewiseLinearDamageCurve.from_csv(\n",
+    "    \"../bundled_data/damage_curves/flood/road_motorway.csv\",\n",
+    "    intensity_col=\"inundation_depth_(m)\",\n",
+    "    damage_col=\"damage_fraction\")\n",
+    "motorway_high = PiecewiseLinearDamageCurve.from_csv(\n",
+    "    \"../bundled_data/damage_curves/flood/road_motorway.csv\",\n",
+    "    intensity_col=\"inundation_depth_(m)\",\n",
+    "    damage_col=\"high flow\")\n",
+    "rail_a = PiecewiseLinearDamageCurve.from_csv(\n",
+    "    \"../bundled_data/damage_curves/flood/rail_railway.csv\",\n",
+    "    intensity_col=\"inundation_depth_(m)\",\n",
+    "    damage_col=\"damage_fraction\")\n",
+    "rail_b = PiecewiseLinearDamageCurve.from_csv(\n",
+    "    \"../bundled_data/damage_curves/flood/rail_railway.csv\",\n",
+    "    intensity_col=\"inundation_depth_(m)\",\n",
+    "    damage_col=\"high\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots()\n",
+    "df = pandas.DataFrame({\n",
+    "    'depth': paved.intensity, 'Roads (F7.2a)': lower.damage, 'Roads (F7.2b)': upper.damage,\n",
+    "    'Trunk roads (F7.4)': motorway.damage,\n",
+    "    'Trunk roads (F7.5)': motorway_high.damage,\n",
+    "}).set_index('depth')\n",
+    "df.plot(ax=ax)\n",
+    "ax.set_ylabel('Damage Fraction')\n",
+    "ax.set_xlabel('Inundation Depth (m)')\n",
+    "ax.set_title(\"\"\"Continental damage functions for flooding\n",
+    "of roads in Asia, Huizinga et al. (2017)\"\"\")\n",
+    "fig.savefig('../bundled_data/damage_curves/asia-road.png')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "fig, ax = plt.subplots()\n",
+    "df = pandas.DataFrame({\n",
+    "    'depth': paved.intensity,\n",
+    "    'Rail (F8.6a)': rail_a.damage,\n",
+    "    'Rail (F8.6b)': rail_b.damage,\n",
+    "}).set_index('depth')\n",
+    "df.plot(ax=ax)\n",
+    "ax.set_ylabel('Damage Fraction')\n",
+    "ax.set_xlabel('Inundation Depth (m)')\n",
+    "ax.set_title(\"\"\"Continental damage functions for flooding\n",
+    "of rail in Asia, Huizinga et al. (2017)\"\"\")\n",
+    "fig.savefig('../bundled_data/damage_curves/asia-rail.png')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/summarise-total.ipynb b/notebooks/summarise-total.ipynb
new file mode 100644
index 00000000..affc604e
--- /dev/null
+++ b/notebooks/summarise-total.ipynb
@@ -0,0 +1,110 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "import pandas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! ls '/data/sharepoint/SharePoint_Geography_-_Jim_Halls_group_-_World_Bank_project_(ext)/World Bank project (ext)/Data/Thai Floods 2011/results/buildings_flood'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "base_dir = Path('/data/sharepoint/SharePoint_Geography_-_Jim_Halls_group_-_World_Bank_project_(ext)/World Bank project (ext)/Data/Thai Floods 2011/results/buildings_flood')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adm1_building_damage = pandas.read_csv(base_dir / 'adm1_building_damage.csv')\n",
+    "disruption_ag_1ss = pandas.read_csv(base_dir / 'disruption_ag_1ss.csv')\n",
+    "disruption_serv_1ss = pandas.read_csv(base_dir / 'disruption_serv_1ss.csv')\n",
+    "disruption_man_1ss = pandas.read_csv(base_dir / 'disruption_man_1ss.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adm1_building_damage[[c for c in adm1_building_damage.columns if 'usd' in c]].sum() / 1e9"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "{\n",
+    "\"ag\": disruption_ag_1ss['sum'].sum() / (1e6 * 365),\n",
+    "\"man\": disruption_man_1ss['sum'].sum() / (1e6 * 365),\n",
+    "\"serv\": disruption_serv_1ss['sum'].sum() / (1e6 * 365),\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dose = pandas.read_csv('../results/input/giri/THA/trade/DOSE_V2.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dose_tha = dose.query('GID_0 == \"THA\" & year == 2018').copy()\n",
+    "\n",
+    "for sector in (\"ag\", \"man\", \"serv\"):\n",
+    "    dose_tha[sector] = dose_tha[\"pop\"] * dose_tha[f\"{sector}_grp_pc_usd_2015\"]\n",
+    "\n",
+    "(dose_tha[[\"ag\", \"man\", \"serv\"]].sum() / 1e9)#.sum(), dose_tha.shape\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/windowed-raster.ipynb b/notebooks/windowed-raster.ipynb
index 06c85d38..99fe0296 100644
--- a/notebooks/windowed-raster.ipynb
+++ b/notebooks/windowed-raster.ipynb
@@ -23,9 +23,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "value_150ss_tif = \"input/giri/THA/bem_5x5_valfis_res__THA.tif\"\n",
-    "volume_3ss_tif = \"input/ghsl/THA/GHS_BUILT_V_E2020_GLOBE_R2023A_4326_3ss_V1_0__THA.tif\"\n",
-    "flood_1ss_tif = \"input/footprints/JBA/Raster/TH_FLRF_ChaoPhraya2011_RD_01.tif\""
+    "value_150ss_tif = \"../results/input/giri/THA/bem_5x5_valfis_res__THA.tif\"\n",
+    "volume_3ss_tif = \"../results/input/ghsl/THA/GHS_BUILT_V_E2020_GLOBE_R2023A_4326_3ss_V1_0__THA.tif\"\n",
+    "flood_1ss_tif = \"../results/input/footprints/JBA/Raster/TH_FLRF_ChaoPhraya2011_RD_01.tif\""
    ]
   },
   {
@@ -190,7 +190,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with rasterio.open(\"input/giri/THA/vol_150ss.tif\", 'w',\n",
+    "with rasterio.open(\"../results/input/giri/THA/vol_150ss.tif\", 'w',\n",
     "    driver='GTiff',\n",
     "    height=volume_150ss.shape[0],\n",
     "    width=volume_150ss.shape[1],\n",
@@ -208,7 +208,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with rasterio.open(\"input/giri/THA/vol_3ss.tif\", 'w',\n",
+    "with rasterio.open(\"../results/input/giri/THA/vol_3ss.tif\", 'w',\n",
     "    driver='GTiff',\n",
     "    height=volume_3ss.shape[0],\n",
     "    width=volume_3ss.shape[1],\n",
@@ -245,7 +245,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with rasterio.open(\"input/giri/THA/val_vol_150ss.tif\", 'w',\n",
+    "with rasterio.open(\"../results/input/giri/THA/val_vol_150ss.tif\", 'w',\n",
     "    driver='GTiff',\n",
     "    height=value_per_volume_150ss.shape[0],\n",
     "    width=value_per_volume_150ss.shape[1],\n",
@@ -264,7 +264,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with rasterio.open(\"input/giri/THA/val_vol_3ss.tif\", 'w',\n",
+    "with rasterio.open(\"../results/input/giri/THA/val_vol_3ss.tif\", 'w',\n",
     "    driver='GTiff',\n",
     "    height=value_per_volume_3ss.shape[0],\n",
     "    width=value_per_volume_3ss.shape[1],\n",
@@ -283,7 +283,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with rasterio.open(\"input/giri/THA/val_3ss.tif\", 'w',\n",
+    "with rasterio.open(\"../results/input/giri/THA/val_3ss.tif\", 'w',\n",
     "    driver='GTiff',\n",
     "    height=value_3ss.shape[0],\n",
     "    width=value_3ss.shape[1],\n",
@@ -431,7 +431,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with rasterio.open(\"input/giri/THA/dmg_frac_1ss.tif\", 'w',\n",
+    "with rasterio.open(\"../results/input/giri/THA/dmg_frac_1ss.tif\", 'w',\n",
     "    driver='GTiff',\n",
     "    height=damage_fraction_1ss.shape[0],\n",
     "    width=damage_fraction_1ss.shape[1],\n",
@@ -449,7 +449,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with rasterio.open(\"input/giri/THA/dmg_val_1ss.tif\", 'w',\n",
+    "with rasterio.open(\"../results/input/giri/THA/dmg_val_1ss.tif\", 'w',\n",
     "    driver='GTiff',\n",
     "    height=damage_value_1ss.shape[0],\n",
     "    width=damage_value_1ss.shape[1],\n",
@@ -461,6 +461,44 @@
     "    ds.write(damage_value_1ss, indexes=1)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "damage_value_1ss.sum() / 1e9"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "value_150ss.sum() / 1e9, value_3ss.sum() / 1e9"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "footprint_value_3ss.sum() / 1e9, footprint_value_1ss.sum() / 1e9"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with rasterio.open(\"../results/input/giri/THA/nres_dmg_val_1ss.tif\") as nres_dmg_val_1ss_ds:\n",
+    "    nres_dmg_val_1ss = read_ds(nres_dmg_val_1ss_ds)\n",
+    "nres_dmg_val_1ss.sum() / 1e9"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -720,6 +758,44 @@
     "end\n",
     "\"\"\""
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "gdalwarp -te 99.2393056 13.2781945 101.5259723 17.6765279  ../../footprints/JBA/Raster/TH_FLRF_ChaoPhraya2011_RD_01.tif ../../footprints/JBA/Raster/TH_FLRF_ChaoPhraya2011_RD_01_clipped.tif\n",
+    "for sector in serv ag man\n",
+    "    gdal_calc.py \\\n",
+    "        -A ../../footprints/JBA/Raster/TH_FLRF_ChaoPhraya2011_RD_01_clipped.tif \\\n",
+    "        -B gva_{$sector}_1ss_clipped.tif \\\n",
+    "        --outfile=disruption_0.3m_{$sector}_1ss.tif \\\n",
+    "        --calc=\"(A>0.3)*B\"\n",
+    "    exactextract \\\n",
+    "        -p ../../admin-boundaries/tha_adm1.shp \\\n",
+    "        -r disruption_0.3m_{$sector}_1ss.tif \\\n",
+    "        -f GID_1 \\\n",
+    "        -s sum \\\n",
+    "        -o disruption_0.3m_{$sector}_1ss.csv\n",
+    "end\n",
+    "\n",
+    "for sector in serv ag man\n",
+    "    gdal_calc.py \\\n",
+    "        -A nres_dmg_frac_1ss.tif \\\n",
+    "        -B gva_{$sector}_1ss_clipped.tif \\\n",
+    "        --outfile=disruption_dmg_{$sector}_1ss.tif \\\n",
+    "        --calc=\"A*B\"\n",
+    "    exactextract \\\n",
+    "        -p ../../admin-boundaries/tha_adm1.shp \\\n",
+    "        -r disruption_dmg_{$sector}_1ss.tif \\\n",
+    "        -f GID_1 \\\n",
+    "        -s sum \\\n",
+    "        -o disruption_dmg_{$sector}_1ss.csv\n",
+    "end\n",
+    "\"\"\""
+   ]
   }
  ],
  "metadata": {

From 24e00511bd8c781e88c78c3818a65c4a0013a4ef Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Tue, 1 Oct 2024 09:40:12 +0100
Subject: [PATCH 17/27] Drop overture download

---
 workflow/buildings/overture.smk | 24 ------------------------
 1 file changed, 24 deletions(-)
 delete mode 100644 workflow/buildings/overture.smk

diff --git a/workflow/buildings/overture.smk b/workflow/buildings/overture.smk
deleted file mode 100644
index b2f746c5..00000000
--- a/workflow/buildings/overture.smk
+++ /dev/null
@@ -1,24 +0,0 @@
-"""
-Download Overture Maps
-
-Source
-------
-
-
-"""
-
-rule download_overture:
-  output:
-    release_dir=directory("{OUTPUT_DIR}/input/overture/{params.RELEASE}")
-  params:
-    RELEASE="2024-01-17-alpha.0"
-  shell:
-    """
-    pushd {output.release_dir}
-      pwd
-      # aws s3 sync \
-      #   --region us-west-2 \
-      #   --no-sign-request \
-      #   s3://overturemaps-us-west-2/release/{params.RELEASE}/ .
-    popd
-    """

From 0f48c46a092d4fc86c533717a56c35b1a2b342ba Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Tue, 1 Oct 2024 09:40:33 +0100
Subject: [PATCH 18/27] Update variable name comment

---
 workflow/buildings/imf-investment-capital-stock.smk | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/workflow/buildings/imf-investment-capital-stock.smk b/workflow/buildings/imf-investment-capital-stock.smk
index 2b42a6ef..53a479ed 100644
--- a/workflow/buildings/imf-investment-capital-stock.smk
+++ b/workflow/buildings/imf-investment-capital-stock.smk
@@ -75,6 +75,7 @@ rule extract_imf_icsd:
             input.xlsx,
             sheet_name='Dataset',
         )
-        # cn is "Capital stock at current PPPs (in mil. 2017US$)"
+        # k{sector}_rppp variables are capital stock in billions of constant
+        # 2017 international dollars for government, private and public-private
         df = df[['country','isocode','year','kgov_rppp', 'kpriv_rppp', 'kppp_rppp']]
         df.to_csv(output.csv, index=False)

From dac5df56df36e72036ea2bdcf116372d885a1fef Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Tue, 1 Oct 2024 09:40:51 +0100
Subject: [PATCH 19/27] Add nbstripout for notebooks

---
 environment.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/environment.yml b/environment.yml
index 2f668239..5476aeb8 100644
--- a/environment.yml
+++ b/environment.yml
@@ -31,6 +31,7 @@ dependencies:
   - jq                  # JSON processing tool
   - matplotlib==3.7.1   # basic plotting
   - nb_conda_kernels    # notebook support
+  - nbstripout          # notebooks in git
   - netCDF4             # NetCDF file format
   - networkx            # networks
   - numpy               # data arrays

From 6b5dea408b675bea27ce9ff4645cf4cc84b20f19 Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Tue, 1 Oct 2024 15:50:46 +0100
Subject: [PATCH 20/27] Allow multiple columns in damage curves, default to
 second column for damage fraction

---
 src/open_gira/io.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/open_gira/io.py b/src/open_gira/io.py
index 137d4d61..bda74fa8 100644
--- a/src/open_gira/io.py
+++ b/src/open_gira/io.py
@@ -255,7 +255,6 @@ def read_damage_curves(damage_curves_dir: str, hazard_type: str, asset_types: se
     }
 
     for asset_type, damage_curve in damage_curves.items():
-        assert len(damage_curve.columns) == 2
         # check hazard intensity and damage fraction values are 0 or positive real
         assert ((damage_curve >= 0).all()).all()
         # check damage fraction is less than or equal to 1

From 3da75a2647e8b5d6464982e151c7a0e25600aa69 Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Tue, 1 Oct 2024 15:51:35 +0100
Subject: [PATCH 21/27] Drop overture include

---
 workflow/Snakefile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index bc29db71..3bee647d 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -120,7 +120,6 @@ include: "buildings/penn-world-tables.smk"
 include: "buildings/imf-investment-capital-stock.smk"
 include: "buildings/ghsl-built.smk"
 include: "buildings/giri-bem.smk"
-include: "buildings/overture.smk"
 include: "buildings/disaggregate_bem.smk"
 
 include: "nature-ecosystems/land-cover.smk"

From d48c51c8a9dc9201d69dba3e6ab443078f8d5b4f Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Tue, 1 Oct 2024 15:51:47 +0100
Subject: [PATCH 22/27] Format script

---
 src/open_gira/io.py | 37 ++++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/src/open_gira/io.py b/src/open_gira/io.py
index bda74fa8..c9757e6a 100644
--- a/src/open_gira/io.py
+++ b/src/open_gira/io.py
@@ -78,22 +78,22 @@ def defaults() -> Tuple[int, int, int]:
         scale_factor, add_offset, fill_value = defaults()
     else:
         scale_factor, add_offset, fill_value = netcdf_packing_parameters(
-            da.min().item(),
-            da.max().item(),
-            n_bits
+            da.min().item(), da.max().item(), n_bits
         )
 
     return {
         da.name: {
-            'dtype': f'int{n_bits:d}',
-            'scale_factor': scale_factor,
-            'add_offset': add_offset,
-            '_FillValue': fill_value
+            "dtype": f"int{n_bits:d}",
+            "scale_factor": scale_factor,
+            "add_offset": add_offset,
+            "_FillValue": fill_value,
         }
     }
 
 
-def netcdf_packing_parameters(minimum: float, maximum: float, n_bits: int) -> Tuple[float, float]:
+def netcdf_packing_parameters(
+    minimum: float, maximum: float, n_bits: int
+) -> Tuple[float, float]:
     """
     Given (floating point) data within a certain range, find the best scale
     factor and offset to use to pack as signed integer values, using most of
@@ -135,7 +135,7 @@ def netcdf_packing_parameters(minimum: float, maximum: float, n_bits: int) -> Tu
 
     # _FillValue used to representing NaN as serialised integer
     # we have kept room at the ends of the integer bit space to avoid a collision
-    fill_value = -2 ** (n_bits - 1)
+    fill_value = -(2 ** (n_bits - 1))
 
     # if there is no variance in the data, return unscaled
     if minimum == maximum:
@@ -216,7 +216,9 @@ def write_empty_frames(edges_path: str, nodes_path: Optional[str] = None) -> Non
     """
 
     # write with a CRS, makes it easier to concatenate dataframes later
-    empty_gdf = gpd.GeoDataFrame({"geometry": []}, crs=pyproj.CRS.from_user_input(WGS84_EPSG))
+    empty_gdf = gpd.GeoDataFrame(
+        {"geometry": []}, crs=pyproj.CRS.from_user_input(WGS84_EPSG)
+    )
     empty_gdf.to_parquet(edges_path)
 
     # some parts of the workflow only consider edges, not nodes
@@ -227,7 +229,9 @@ def write_empty_frames(edges_path: str, nodes_path: Optional[str] = None) -> Non
     return
 
 
-def read_damage_curves(damage_curves_dir: str, hazard_type: str, asset_types: set) -> dict[str, pd.DataFrame]:
+def read_damage_curves(
+    damage_curves_dir: str, hazard_type: str, asset_types: set
+) -> dict[str, pd.DataFrame]:
     """
     Load damage curves from CSVs on disk
 
@@ -251,7 +255,8 @@ def read_damage_curves(damage_curves_dir: str, hazard_type: str, asset_types: se
     damage_curves: dict[str, pd.DataFrame] = {
         # curves expected to be named as a value of Asset class, e.g. RoadAssets.BRIDGE -> road_bridge.csv
         # dict is asset_type: dataframe with hazard intensity [0, inf] and damage fraction [0, 1]
-        splitext(basename(path))[0]: pd.read_csv(path, comment=COMMENT_PREFIX) for path in damage_curve_paths
+        splitext(basename(path))[0]: pd.read_csv(path, comment=COMMENT_PREFIX)
+        for path in damage_curve_paths
     }
 
     for asset_type, damage_curve in damage_curves.items():
@@ -261,7 +266,9 @@ def read_damage_curves(damage_curves_dir: str, hazard_type: str, asset_types: se
         assert (damage_curve.iloc[:, 1] <= 1).all()
 
     if not set(damage_curves.keys()).issuperset(asset_types):
-        raise RuntimeError(f"requested {asset_types=} not all found: {damage_curves.keys()=}")
+        raise RuntimeError(
+            f"requested {asset_types=} not all found: {damage_curves.keys()=}"
+        )
 
     return damage_curves
 
@@ -284,11 +291,11 @@ def read_rehab_costs(path: str) -> pd.DataFrame:
     assert len(costs.columns) == 2
 
     # check asset_type
-    assert 'asset_type' == costs.columns[0]
+    assert "asset_type" == costs.columns[0]
     assert costs.asset_type.dtype == object
 
     # check costs
     assert costs.iloc[:, 1].dtype == float
     assert (costs.iloc[:, 1] >= 0).all()
 
-    return costs
\ No newline at end of file
+    return costs

From e9391867d72fcf91f3cb947e75e52baadbd87165 Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Thu, 17 Oct 2024 12:45:24 +0100
Subject: [PATCH 23/27] Download JRC river flood rp maps

---
 workflow/Snakefile     |  1 +
 workflow/flood/jrc.smk | 51 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+)
 create mode 100644 workflow/flood/jrc.smk

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 3bee647d..d7222333 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -144,6 +144,7 @@ include: "transport/multi-modal/multi_modal.smk"
 include: "transport/flow_allocation/allocate.smk"
 
 include: "flood/aqueduct.smk"
+include: "flood/jrc.smk"
 include: "flood/trim_hazard_data.smk"
 
 include: "tropical-cyclone/IBTrACS.smk"
diff --git a/workflow/flood/jrc.smk b/workflow/flood/jrc.smk
new file mode 100644
index 00000000..173c055b
--- /dev/null
+++ b/workflow/flood/jrc.smk
@@ -0,0 +1,51 @@
+"""Download and extract JRC river flood return period maps
+
+https://data.jrc.ec.europa.eu/dataset/jrc-floods-floodmapgl_rp50y-tif
+
+The global river flood hazard maps are a gridded data set representing
+inundation along the river network, for seven different flood return periods
+(from 1-in-10-years to 1-in-500-years). The input river flow data for the new
+maps are produced by means of the open-source hydrological model LISFLOOD, while
+inundation simulations are performed with the hydrodynamic model LISFLOOD-FP.
+The extent comprises the entire world with the exception of Greenland and
+Antarctica and small islands with river basins smaller than 500km2.
+
+Cell values indicate water depth (in m). The maps can be used to assess the
+exposure of population and economic assets to river floods, and to perform flood
+risk assessments. The dataset is created as part of the Copernicus Emergency
+Management Service. NOTE: this dataset is not an official flood hazard map (for
+details and limitations please refer to related publications).
+
+Citation:
+
+Baugh, Calum; Colonese, Juan; D'Angelo, Claudia; Dottori, Francesco; Neal,
+Jeffrey; Prudhomme, Christel; Salamon, Peter (2024): Global river flood hazard
+maps. European Commission, Joint Research Centre (JRC) [Dataset] PID:
+http://data.europa.eu/89h/jrc-floods-floodmapgl_rp50y-tif
+"""
+
+rule download_jrc_flood:
+    output:
+        zip="{OUTPUT_DIR}/input/jrc_flood/floodMapGL_rp{RP}y.zip"
+    shell:
+        """
+        output_dir=$(dirname {output.zip})
+
+        wget -q -nc \
+            --directory-prefix=$output_dir \
+            https://cidportal.jrc.ec.europa.eu/ftp/jrc-opendata/FLOODS/GlobalMaps/floodMapGL_rp{wildcards.RP}y.zip
+        """
+rule extract_jrc_flood:
+    input:
+        tiff="{OUTPUT_DIR}/input/jrc_flood/floodMapGL_rp{RP}y.zip"
+    output:
+        tiff="{OUTPUT_DIR}/input/jrc_flood/floodMapGL_rp{RP}y.tif"
+    shell:
+        """
+        output_dir=$(dirname {output.tiff})
+        unzip $output_dir/floodMapGL_rp{wildcards.RP}y.zip floodMapGL_rp{wildcards.RP}y.tif -d $output_dir
+        """
+
+rule all_jrc_flood:
+    input:
+        tiffs=expand("results/input/jrc_flood/floodMapGL_rp{RP}y.tif", RP=[10, 20, 50, 100, 200, 500])

From 81e2905475a5e3da7a87a9d45a14ff9b6a0e2cc7 Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Fri, 18 Oct 2024 17:28:31 +0100
Subject: [PATCH 24/27] Start RP demo

---
 notebooks/return-period-demo.ipynb | 299 +++++++++++++++++++++++++++++
 1 file changed, 299 insertions(+)
 create mode 100644 notebooks/return-period-demo.ipynb

diff --git a/notebooks/return-period-demo.ipynb b/notebooks/return-period-demo.ipynb
new file mode 100644
index 00000000..b8541433
--- /dev/null
+++ b/notebooks/return-period-demo.ipynb
@@ -0,0 +1,299 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import subprocess\n",
+    "from pathlib import Path\n",
+    "\n",
+    "import geopandas\n",
+    "import pandas\n",
+    "from open_gira.geometry import buffer_via_reprojection\n",
+    "from open_gira.exposure import max_vector_rasters_intersection\n",
+    "from snail.damages import PiecewiseLinearDamageCurve"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "base_dir = Path(\"../cluster\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adm0 = geopandas.read_parquet(\n",
+    "    base_dir / \"processed_data/admin-boundaries/admin-level-0.geoparquet\"\n",
+    ").query('GID_0 == \"THA\"')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tha_bounds = dict(adm0.bounds.iloc[0, :])\n",
+    "minx, miny, maxx, maxy = (\n",
+    "    tha_bounds[\"minx\"],\n",
+    "    tha_bounds[\"miny\"],\n",
+    "    tha_bounds[\"maxx\"],\n",
+    "    tha_bounds[\"maxy\"],\n",
+    ")\n",
+    "minx, miny, maxx, maxy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "raster_paths = sorted(Path(\"../results/input/jrc_flood/\").glob(\"*.tif\"))\n",
+    "for global_tiff in raster_paths:\n",
+    "    tha_tiff = base_dir / \"processed_data\" / \"hazard\" / global_tiff.name\n",
+    "    if tha_tiff.exists:\n",
+    "        continue\n",
+    "    print(tha_tiff)\n",
+    "    subprocess.check_output(\n",
+    "        [\n",
+    "            \"gdalwarp\",\n",
+    "            \"-te\",\n",
+    "            str(minx),\n",
+    "            str(miny),\n",
+    "            str(maxx),\n",
+    "            str(maxy),\n",
+    "            str(global_tiff),\n",
+    "            str(tha_tiff),\n",
+    "        ]\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "raster_paths = sorted(\n",
+    "    str(p) for p in (base_dir / \"processed_data\" / \"hazard\").glob(\"*.tif\")\n",
+    ")\n",
+    "rasters = pandas.DataFrame(data={\"path\": raster_paths})\n",
+    "rasters[\"rp\"] = rasters.path.str.extract(\"(\\d+)\").astype(int)\n",
+    "rasters[\"key\"] = rasters.rp.apply(lambda rp: f\"jrc_flood_{rp:03d}\")\n",
+    "rasters.sort_values(by=\"rp\", inplace=True)\n",
+    "rasters"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rail_edges = geopandas.read_parquet(\n",
+    "    base_dir\n",
+    "    / \"transport_disruption\"\n",
+    "    / \"input\"\n",
+    "    / \"networks\"\n",
+    "    / \"rail\"\n",
+    "    / \"project-thailand\"\n",
+    "    / \"edges.gpq\"\n",
+    ").query('from_iso_a3 == \"THA\" | to_iso_a3 == \"THA\"')\n",
+    "rail_edges.head(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "road_edges = geopandas.read_parquet(\n",
+    "    base_dir\n",
+    "    / \"transport_disruption\"\n",
+    "    / \"input\"\n",
+    "    / \"networks\"\n",
+    "    / \"road\"\n",
+    "    / \"project-thailand\"\n",
+    "    / \"edges.gpq\"\n",
+    ").query('from_iso_a3 == \"THA\" | to_iso_a3 == \"THA\"')\n",
+    "road_edges.head(1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load powerplants"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "powerplants = geopandas.read_parquet(\n",
+    "    base_dir / \"power_flood\" / \"data\" / \"powerplants.geoparquet\"\n",
+    ").cx[minx:maxx, miny:maxy]\n",
+    "powerplants[\"geom_point\"] = powerplants.geometry\n",
+    "powerplants.geometry = buffer_via_reprojection(powerplants.geometry, 250)\n",
+    "powerplants.head(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# US power plant construction costs\n",
+    "# https://www.eia.gov/electricity/generatorcosts/xls/generator_costs_2021.xlsx\n",
+    "# e.g. for gas...\n",
+    "# 920 [USD 2021] / capacity [kW]\n",
+    "# 920000 [USD 2021] / capacity [MW]\n",
+    "# N.B. not deflated to 2011, and no accounting for learning effects between 2011 and 2021\n",
+    "\n",
+    "construction_cost_usd_per_mw = {\n",
+    "    \"Biomass\": 2592000,\n",
+    "    \"Gas\": 920000,\n",
+    "    \"Solar\": 1561000,\n",
+    "    \"Wind\": 1428000,\n",
+    "}\n",
+    "powerplants[\"construction_cost\"] = powerplants.apply(lambda row: construction_cost_usd_per_mw[row.primary_fuel] * row.power_mw, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Powerplant RP damages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "powerplants = max_vector_rasters_intersection(powerplants, rasters).fillna(0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "powerplants.head(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "damage_curves = {\n",
+    "    \"paved\": PiecewiseLinearDamageCurve.from_csv(\n",
+    "        \"../config/damage_curves/flood/road_paved.csv\",\n",
+    "        intensity_col=\"inundation_depth_(m)\",\n",
+    "        damage_col=\"central\",\n",
+    "    ),\n",
+    "    \"lower\": PiecewiseLinearDamageCurve.from_csv(\n",
+    "        \"../config/damage_curves/flood/road_paved.csv\",\n",
+    "        intensity_col=\"inundation_depth_(m)\",\n",
+    "        damage_col=\"damage_fraction\",\n",
+    "    ),\n",
+    "    \"upper\": PiecewiseLinearDamageCurve.from_csv(\n",
+    "        \"../config/damage_curves/flood/road_paved.csv\",\n",
+    "        intensity_col=\"inundation_depth_(m)\",\n",
+    "        damage_col=\"upper\",\n",
+    "    ),\n",
+    "    \"motorway\": PiecewiseLinearDamageCurve.from_csv(\n",
+    "        \"../config/damage_curves/flood/road_motorway.csv\",\n",
+    "        intensity_col=\"inundation_depth_(m)\",\n",
+    "        damage_col=\"damage_fraction\",\n",
+    "    ),\n",
+    "    \"motorway_high\": PiecewiseLinearDamageCurve.from_csv(\n",
+    "        \"../config/damage_curves/flood/road_motorway.csv\",\n",
+    "        intensity_col=\"inundation_depth_(m)\",\n",
+    "        damage_col=\"high flow\",\n",
+    "    ),\n",
+    "    \"rail_a\": PiecewiseLinearDamageCurve.from_csv(\n",
+    "        \"../config/damage_curves/flood/rail_railway.csv\",\n",
+    "        intensity_col=\"inundation_depth_(m)\",\n",
+    "        damage_col=\"damage_fraction\",\n",
+    "    ),\n",
+    "    \"rail_b\": PiecewiseLinearDamageCurve.from_csv(\n",
+    "        \"../config/damage_curves/flood/rail_railway.csv\",\n",
+    "        intensity_col=\"inundation_depth_(m)\",\n",
+    "        damage_col=\"high\",\n",
+    "    ),\n",
+    "    \"powerplants\": PiecewiseLinearDamageCurve.from_csv(\n",
+    "        \"../config/damage_curves/flood/powerplants.csv\",\n",
+    "        intensity_col=\"depth_m\",\n",
+    "        damage_col=\"damage_fraction\"\n",
+    "    ),\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "powerplants"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "exposure = powerplants.loc[:, rasters.key]\n",
+    "damage_fraction = pandas.DataFrame(\n",
+    "    damage_curves[\"powerplants\"].damage_fraction(exposure),\n",
+    "    index=exposure.index,\n",
+    "    columns=exposure.columns\n",
+    ")\n",
+    "damage_value = damage_fraction \\\n",
+    "    .multiply(powerplants[fields.REHAB_COST], axis=\"index\") \\\n",
+    "    .multiply(powerplants[fields.SPLIT_LENGTH], axis=\"index\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 9c146c57b3bed1c5fb93dde2e3fc88ee94ccf932 Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Fri, 18 Oct 2024 17:28:45 +0100
Subject: [PATCH 25/27] Add helper functions

---
 src/open_gira/exposure.py | 44 +++++++++++++++++++++++++++++++++++++++
 src/open_gira/geometry.py | 20 ++++++++++++++++++
 2 files changed, 64 insertions(+)
 create mode 100644 src/open_gira/exposure.py
 create mode 100644 src/open_gira/geometry.py

diff --git a/src/open_gira/exposure.py b/src/open_gira/exposure.py
new file mode 100644
index 00000000..5d6a1a2d
--- /dev/null
+++ b/src/open_gira/exposure.py
@@ -0,0 +1,44 @@
+import geopandas
+import numpy
+import pandas
+import rasterstats
+
+
+def max_vector_rasters_intersection(
+    vector: geopandas.GeoDataFrame, rasters: pandas.DataFrame
+) -> geopandas.GeoDataFrame:
+    """Intersect vector geometries with raster files, adding columns with
+    maximum values from the intersection
+
+    Parameters
+    ----------
+    vector: GeoDataFrame
+        vector geometries. Output columns will be added to this GeoDataFrame.
+    rasters: DataFrame
+        metadata table with "key" and "path" columns. "key" is used for output
+        column names. "path" is used to specify raster file paths.
+    """
+    for raster in rasters.itertuples():
+        vector[raster.key] = max_vector_raster_intersection(
+            vector.geometry, raster.path
+        )
+    return vector
+
+
+def max_vector_raster_intersection(
+    vector: geopandas.GeoSeries, raster: str
+) -> numpy.array:
+    """
+    Parameters
+    ----------
+    vector: GeoSeries
+        vector (point/line/polygon) geometries
+    raster: str
+        path to raster file
+    """
+    maxs = numpy.zeros(len(vector.geometry))
+    for i, stats in enumerate(
+        rasterstats.gen_zonal_stats(vector, raster, stats="max", all_touched=True)
+    ):
+        maxs[i] = stats["max"]
+    return maxs
diff --git a/src/open_gira/geometry.py b/src/open_gira/geometry.py
new file mode 100644
index 00000000..95144fc8
--- /dev/null
+++ b/src/open_gira/geometry.py
@@ -0,0 +1,20 @@
+import geopandas
+
+
+def buffer_via_reprojection(
+    geoms: geopandas.GeoSeries, buffer_radius_m
+) -> geopandas.GeoSeries:
+    """Buffer geographical geometries
+
+    First project into a UTM CRS, estimated based on the bounds of the dataset,
+    then buffer, then project back into original CRS.
+
+    Parameters
+    ----------
+    geoms: GeoDataFrame
+    buffer_radius_m: float
+    """
+    projected_crs = geoms.estimate_utm_crs()
+    return (
+        geoms.geometry.to_crs(projected_crs).buffer(buffer_radius_m).to_crs(geoms.crs)
+    )

From 8a564b70452c37c79dfa207065f9139a8a5dacb2 Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Fri, 18 Oct 2024 17:29:00 +0100
Subject: [PATCH 26/27] Add powerplant damage curve

---
 config/damage_curves/flood/powerplants.csv | 66 ++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 config/damage_curves/flood/powerplants.csv

diff --git a/config/damage_curves/flood/powerplants.csv b/config/damage_curves/flood/powerplants.csv
new file mode 100644
index 00000000..ee292749
--- /dev/null
+++ b/config/damage_curves/flood/powerplants.csv
@@ -0,0 +1,66 @@
+# https://zenodo.org/records/10203846
+# Table_D2_Multi-Hazard_Fragility_and_Vulnerability_Curves_V1.0.0.xlsx
+# Nirandjan 2024, V1.0, F_Vuln_Depth, F1.1, F1.2, F1.3 (identical)
+depth_m,damage_fraction
+0.00,0.000
+0.05,0.004
+0.10,0.008
+0.15,0.012
+0.20,0.016
+0.25,0.021
+0.30,0.025
+0.35,0.029
+0.40,0.033
+0.45,0.037
+0.50,0.041
+0.55,0.045
+0.60,0.049
+0.65,0.053
+0.70,0.057
+0.75,0.062
+0.80,0.066
+0.85,0.070
+0.90,0.074
+0.95,0.078
+1.00,0.082
+1.05,0.086
+1.10,0.090
+1.15,0.094
+1.20,0.098
+1.25,0.103
+1.30,0.107
+1.35,0.111
+1.40,0.115
+1.45,0.119
+1.50,0.123
+1.55,0.127
+1.60,0.131
+1.65,0.135
+1.70,0.139
+1.75,0.144
+1.80,0.148
+1.85,0.152
+1.90,0.156
+1.95,0.160
+2.00,0.164
+2.05,0.168
+2.10,0.172
+2.15,0.176
+2.20,0.180
+2.25,0.185
+2.30,0.189
+2.35,0.193
+2.40,0.197
+2.45,0.201
+2.50,0.205
+2.55,0.209
+2.60,0.213
+2.65,0.217
+2.70,0.243
+2.75,0.251
+2.80,0.259
+2.85,0.268
+2.90,0.276
+2.95,0.284
+3.00,0.292
+3.05,0.300

From 7eae18d2b959ba08d35c006287ba73e7b263257d Mon Sep 17 00:00:00 2001
From: Tom Russell <tom.russell@ouce.ox.ac.uk>
Date: Fri, 1 Nov 2024 12:31:00 +0000
Subject: [PATCH 27/27] Return period JRC damage demo, factor out some helpers

---
 notebooks/built-capital-gva.ipynb  |  514 ++++++++++
 notebooks/return-period-demo.ipynb | 1473 ++++++++++++++++++++++++++--
 notebooks/windowed-raster.ipynb    |  822 ----------------
 src/open_gira/exposure.py          |    3 +-
 src/open_gira/geometry.py          |   55 ++
 src/open_gira/io.py                |   36 +-
 6 files changed, 2003 insertions(+), 900 deletions(-)
 create mode 100644 notebooks/built-capital-gva.ipynb
 delete mode 100644 notebooks/windowed-raster.ipynb

diff --git a/notebooks/built-capital-gva.ipynb b/notebooks/built-capital-gva.ipynb
new file mode 100644
index 00000000..922874b3
--- /dev/null
+++ b/notebooks/built-capital-gva.ipynb
@@ -0,0 +1,514 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import geopandas\n",
+    "import pandas\n",
+    "import rasterio\n",
+    "\n",
+    "from affine import Affine\n",
+    "from open_gira.io import write_raster_ds as write_ds, read_raster_ds as read_ds\n",
+    "from open_gira.geometry import resample_sum, clip_array, repeat_2d, floor_int, zero_divide, rasterize\n",
+    "from snail.damages import PiecewiseLinearDamageCurve"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "base_dir = Path(\"../results/input\")\n",
+    "# see scripts/building_damage.py for alternative implementation\n",
+    "\n",
+    "# inputs\n",
+    "rr_val_150ss_tif = base_dir / \"giri/THA/bem_5x5_valfis_res__THA.tif\"\n",
+    "rr_vol_3ss_tif = base_dir / \"ghsl/THA/ghs_built_v_res_3ss__THA.tif\"\n",
+    "nr_val_150ss_tif = base_dir / \"giri/THA/bem_5x5_valfis_nres__THA.tif\"\n",
+    "nr_vol_3ss_tif = base_dir / \"ghsl/THA/ghs_built_v_nres_3ss__THA.tif\"\n",
+    "flood_1ss_tif = base_dir / \"footprints/JBA/Raster/TH_FLRF_ChaoPhraya2011_RD_01.tif\"\n",
+    "\n",
+    "# outputs\n",
+    "rr_val_3ss_tif = base_dir / \"giri/THA/bem_3ss_valfis_res__THA.tif\"\n",
+    "nr_val_3ss_tif = base_dir / \"giri/THA/bem_3ss_valfis_nres__THA.tif\"\n",
+    "rr_dmg_val_1ss_tif = base_dir / \"giri/THA/dmg_val_res_1ss.tif\"\n",
+    "nr_dmg_val_1ss_tif = base_dir / \"giri/THA/dmg_val_nres_1ss.tif\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def process_val_3ss(val_150ss_tif, vol_3ss_tif, val_3ss_tif):\n",
+    "    val_150ss, val_150ss_ds = read_ds(val_150ss_tif, replace_nodata=True)\n",
+    "    vol_3ss, vol_3ss_ds = read_ds(vol_3ss_tif, replace_nodata=True)\n",
+    "    volume_3ss, t_3ss = calculate_volume_3ss(val_150ss, val_150ss_ds, vol_3ss, vol_3ss_ds)\n",
+    "    write_ds(val_3ss_tif, volume_3ss, t_3ss)\n",
+    "\n",
+    "def calculate_volume_3ss(val_150ss, val_150ss_ds, vol_3ss, vol_3ss_ds):\n",
+    "    # lon, lat of volume_3ss top left\n",
+    "    vol_3ss_ul_xy = vol_3ss_ds.transform * (0, 0)\n",
+    "    # col, row in value_150ss_all, inset one extra\n",
+    "    val_150ss_ul_cr = floor_int(~val_150ss_ds.transform * (vol_3ss_ul_xy)) + 1\n",
+    "    # lon, lat of that val_150ss_all pixel - this is our new top left\n",
+    "    ul_xy_150ss = val_150ss_ds.transform * val_150ss_ul_cr\n",
+    "    # col, row in vol_3ss_all\n",
+    "    vol_3ss_ul_cr = floor_int(~vol_3ss_ds.transform * ul_xy_150ss)\n",
+    "    # lon, lat of that vol_3ss_all pixel - new top left for 3ss purposes (tiny bit offset)\n",
+    "    ul_xy_3ss = vol_3ss_ds.transform * vol_3ss_ul_cr\n",
+    "    ul_xy_150ss, ul_xy_3ss\n",
+    "\n",
+    "    # Clip out vol array\n",
+    "    col_idx, row_idx = vol_3ss_ul_cr\n",
+    "    vol_3ss = vol_3ss[row_idx:, col_idx:]\n",
+    "    vol_3ss = clip_array(vol_3ss, 50)\n",
+    "    # Resample vol to coarse-scale, \"sum\"\n",
+    "    vol_150ss = resample_sum(vol_3ss, 50)\n",
+    "\n",
+    "    a, b, _, d, e, _ = vol_3ss_ds.transform[:6]\n",
+    "    # t_150ss = Affine(a * 50, b, ul_xy_150ss[0], d, e * 50, ul_xy_150ss[1])\n",
+    "    t_3ss = Affine(a, b, ul_xy_3ss[0], d, e, ul_xy_3ss[1])\n",
+    "\n",
+    "    col_idx, row_idx = val_150ss_ul_cr\n",
+    "    ncols, nrows = vol_150ss.shape\n",
+    "    val_150ss = val_150ss[col_idx : col_idx + ncols, row_idx : row_idx + nrows]\n",
+    "\n",
+    "    if val_150ss.shape != vol_150ss.shape:\n",
+    "        print(\"CHKS\", val_150ss.shape, vol_150ss.shape)\n",
+    "        assert False\n",
+    "\n",
+    "    # Calculate val per unit vol\n",
+    "    # val_per_vol_150ss = val_150ss / vol_150ss\n",
+    "    val_per_vol_150ss = np.divide(\n",
+    "        val_150ss, vol_150ss, out=np.zeros_like(val_150ss), where=vol_150ss != 0\n",
+    "    )\n",
+    "    # Resample to fine-scale val per vol, \"nearest\"\n",
+    "    val_per_vol_3ss = repeat_2d(val_per_vol_150ss, 50)\n",
+    "    # Calculate fine-scale val\n",
+    "    val_3ss = val_per_vol_3ss * vol_3ss\n",
+    "\n",
+    "    return val_3ss, t_3ss\n",
+    "\n",
+    "# ~15s\n",
+    "process_val_3ss(rr_val_150ss_tif, rr_vol_3ss_tif, rr_val_3ss_tif)\n",
+    "process_val_3ss(nr_val_150ss_tif, nr_vol_3ss_tif, nr_val_3ss_tif)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Flood intersection\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "flood_1ss, flood_1ss_ds = read_ds(flood_1ss_tif, replace_nodata=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nr_val_3ss, nr_val_3ss_ds = read_ds(nr_val_3ss_tif)\n",
+    "rr_val_3ss, rr_val_3ss_ds = read_ds(rr_val_3ss_tif)\n",
+    "t_3ss = rr_val_3ss_ds.transform"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rr_damage_curve = PiecewiseLinearDamageCurve.from_csv(\n",
+    "    \"../config/damage_curves/flood/residential_asia.csv\",\n",
+    "    intensity_col=\"inundation_depth_(m)\",\n",
+    "    damage_col=\"damage_fraction\",\n",
+    ")\n",
+    "nr_damage_curve = PiecewiseLinearDamageCurve.from_csv(\n",
+    "    \"../config/damage_curves/flood/commercial_asia.csv\",\n",
+    "    intensity_col=\"inundation_depth_(m)\",\n",
+    "    damage_col=\"damage_fraction\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def clip_rescale_1ss_3ss(t_1ss, t_3ss, data_1ss, data_3ss):\n",
+    "    # lon, lat of footprint top left\n",
+    "    t_1ss_ul_xy = t_1ss * (0, 0)\n",
+    "    # col, row in value_3ss\n",
+    "    t_3ss_ul_cr = floor_int(~t_3ss * (t_1ss_ul_xy))\n",
+    "    # lon, lat of that pixel - this is our new top left\n",
+    "    footprint_ul_xy_3ss = t_3ss * t_3ss_ul_cr\n",
+    "    # col, row in flood_1ss\n",
+    "    t_1ss_ul_cr = floor_int(~t_1ss * footprint_ul_xy_3ss)\n",
+    "\n",
+    "    # lon, lat of that 1ss pixel - new top left for 1ss purposes (tiny bit offset)\n",
+    "    ul_xy_1ss = t_1ss * t_1ss_ul_cr\n",
+    "\n",
+    "    # clip to match coarser array extent\n",
+    "    data_1ss_clipped = clip_array(data_1ss, 3)\n",
+    "    data_1ss_height, data_1ss_width = data_1ss_clipped.shape\n",
+    "\n",
+    "    # lon, lat of footprint lower right\n",
+    "    t_1ss_lr_xy = t_1ss * (data_1ss_width, data_1ss_height)\n",
+    "    # col, row in value_3ss\n",
+    "    t_3ss_lr_cr = floor_int(~t_3ss * (t_1ss_lr_xy))\n",
+    "\n",
+    "    ulc, ulr = t_3ss_ul_cr\n",
+    "    lrc, lrr = t_3ss_lr_cr\n",
+    "\n",
+    "    data_3ss_clipped = data_3ss[ulr:lrr, ulc:lrc]\n",
+    "    data_3ss_as_1ss = repeat_2d(data_3ss_clipped, 3) / 9\n",
+    "\n",
+    "    # Adapt transform to new top-left and resolution\n",
+    "    a, b, _, d, e, _ = t_1ss[:6]\n",
+    "    t_1ss_clipped = Affine(a, b, ul_xy_1ss[0], d, e, ul_xy_1ss[1])\n",
+    "\n",
+    "    return data_1ss_clipped, data_3ss_as_1ss, t_1ss_clipped\n",
+    "\n",
+    "def calculate_damage_val(flood_1ss, t_1ss, val_3ss, t_3ss, damage_curve):\n",
+    "    flood_1ss_clipped, val_1ss, t_1ss_clipped = clip_rescale_1ss_3ss(t_1ss, t_3ss, flood_1ss, val_3ss)\n",
+    "\n",
+    "    if val_1ss.shape != flood_1ss_clipped.shape:\n",
+    "        print(\"CHKS\", val_1ss.shape, flood_1ss_clipped.shape)\n",
+    "        assert False\n",
+    "\n",
+    "    damage_fraction_1ss = damage_curve.damage_fraction(\n",
+    "        flood_1ss_clipped\n",
+    "    )\n",
+    "    damage_value_1ss = val_1ss * damage_fraction_1ss\n",
+    "    return damage_value_1ss, t_1ss_clipped\n",
+    "\n",
+    "rr_dmg_val_1ss, t_1ss_clipped = calculate_damage_val(flood_1ss, flood_1ss_ds.transform, rr_val_3ss, rr_val_3ss_ds.transform, rr_damage_curve)\n",
+    "nr_dmg_val_1ss, t_1ss_clipped = calculate_damage_val(flood_1ss, flood_1ss_ds.transform, nr_val_3ss, nr_val_3ss_ds.transform, nr_damage_curve)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "write_ds(rr_dmg_val_1ss_tif, rr_dmg_val_1ss, t_1ss_clipped)\n",
+    "write_ds(nr_dmg_val_1ss_tif, nr_dmg_val_1ss, t_1ss_clipped)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rr_dmg_val_1ss.sum() / 1e9, nr_dmg_val_1ss.sum() / 1e9,"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rr_val_3ss.sum() / 1e9, nr_val_3ss.sum() / 1e9"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# GVA downscaling"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "ADM1 damage values:\n",
+    "\n",
+    "    exactextract \\\n",
+    "        -p ../../admin-boundaries/tha_adm1.shp \\\n",
+    "        -r dmg_val_1ss.tif \\\n",
+    "        -f GID_1 \\\n",
+    "        -s sum \\\n",
+    "        -o dmg_val_1ss.csv\n",
+    "\n",
+    "ADM1 total built volume:\n",
+    "\n",
+    "    exactextract \\\n",
+    "        -p ../../admin-boundaries/tha_adm1.shp \\\n",
+    "        -r ../../ghsl/THA/GHS_BUILT_V_E2020_GLOBE_R2023A_4326_3ss_V1_0__THA.tif \\\n",
+    "        -f GID_1 \\\n",
+    "        -s sum \\\n",
+    "        -o ghs_built_v_3ss.csv\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adm1_vol = pandas.read_csv(\"input/giri/THA/ghs_built_v_3ss.csv\").rename(\n",
+    "    columns={\"sum\": \"built_volume\"}\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adm1 = geopandas.read_file(\"input/admin-boundaries/tha_adm1.shp\").merge(\n",
+    "    adm1_vol, on=\"GID_1\"\n",
+    ")[[\"GID_1\", \"NAME_1\", \"built_volume\", \"geometry\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with rasterio.open(\"input/ghsl/THA/GHS_BUILT_V_E2020_GLOBE_R2023A_4326_3ss_V1_0__THA.tif\") as vol_3ss_ds:\n",
+    "    vol_3ss = vol_3ss_ds.read(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vol_adm1_3ss = rasterize(adm1, \"built_volume\", vol_3ss_ds)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.imshow(vol_adm1_3ss)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adm1_gva = pandas.read_csv(\"/data/incoming/wenz-2023-dose-reported-subnational-output/DOSE_V2_THA.csv\")\n",
+    "adm1_gva[\"ag_grp\"] = adm1_gva[\"pop\"] * adm1_gva.ag_grp_pc_usd\n",
+    "adm1_gva[\"man_grp\"] = adm1_gva[\"pop\"] * adm1_gva.man_grp_pc_usd\n",
+    "adm1_gva[\"serv_grp\"] = adm1_gva[\"pop\"] * adm1_gva.serv_grp_pc_usd\n",
+    "\n",
+    "adm1_gva = geopandas.read_file(\"input/admin-boundaries/tha_adm1.shp\") \\\n",
+    "    .merge(adm1_gva, on=\"GID_1\")[[\"GID_1\", \"NAME_1\", \"ag_grp\", \"man_grp\", \"serv_grp\", \"geometry\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adm1_gva.drop(columns=\"geometry\").to_csv(\"input/giri/THA/DOSE_V2_THA_rgva.csv\")\n",
+    "adm1_gva.to_file(\"input/giri/THA/DOSE_V2_THA_rgva.gpkg\", driver=\"GPKG\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adm1_gva_ag_3ss = rasterize(adm1_gva, \"ag_grp\", vol_3ss_ds)\n",
+    "adm1_gva_man_3ss = rasterize(adm1_gva, \"man_grp\", vol_3ss_ds)\n",
+    "adm1_gva_serv_3ss = rasterize(adm1_gva, \"serv_grp\", vol_3ss_ds)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gva_ag_3ss = zero_divide(vol_3ss, vol_adm1_3ss) * adm1_gva_ag_3ss\n",
+    "gva_man_3ss = zero_divide(vol_3ss, vol_adm1_3ss) * adm1_gva_man_3ss\n",
+    "gva_serv_3ss = zero_divide(vol_3ss, vol_adm1_3ss) * adm1_gva_serv_3ss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "write_ds(\"input/giri/THA/gva_ag_3ss.tif\", gva_ag_3ss, vol_3ss_ds.transform)\n",
+    "write_ds(\"input/giri/THA/gva_man_3ss.tif\", gva_man_3ss, vol_3ss_ds.transform)\n",
+    "write_ds(\"input/giri/THA/gva_serv_3ss.tif\", gva_serv_3ss, vol_3ss_ds.transform)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gva_ag_1ss = repeat_2d(gva_ag_3ss, 3) / 9\n",
+    "gva_man_1ss = repeat_2d(gva_man_3ss, 3) / 9\n",
+    "gva_serv_1ss = repeat_2d(gva_serv_3ss, 3) / 9"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TODO figure out transform, check we're on the right grid, write out to files\n",
+    "# TODO compare with damage fraction, write out interruption\n",
+    "# TODO calculate per day, sum back to zonal stats\n",
+    "# TODO check totals (re-aggregate after disaggregation) maybe rescale???"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "a, b, c, d, e, f = vol_3ss_ds.transform[:6]\n",
+    "gva_t_1ss = Affine(a / 3, b, c, d, e / 3, f)\n",
+    "gva_t_1ss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "write_ds(\"input/giri/THA/gva_ag_1ss.tif\", gva_ag_1ss, gva_t_1ss)\n",
+    "write_ds(\"input/giri/THA/gva_man_1ss.tif\", gva_man_1ss, gva_t_1ss)\n",
+    "write_ds(\"input/giri/THA/gva_serv_1ss.tif\", gva_serv_1ss, gva_t_1ss)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "gdalwarp -te 99.2393056 13.2781945 101.5259723 17.6765279 gva_man_1ss.tif gva_man_1ss_clipped.tif\n",
+    "gdal_calc.py -A nres_dmg_frac_1ss.tif -B gva_man_1ss_clipped.tif --outfile=disruption_man_1ss.tif --calc=\"(A>0.1)*B\"\n",
+    "\n",
+    "\n",
+    "gdalwarp -te 99.2393056 13.2781945 101.5259723 17.6765279 gva_ag_1ss.tif gva_ag_1ss_clipped.tif\n",
+    "gdal_calc.py -A nres_dmg_frac_1ss.tif -B gva_ag_1ss_clipped.tif --outfile=disruption_ag_1ss.tif --calc=\"(A>0.1)*B\"\n",
+    "\n",
+    "\n",
+    "gdalwarp -te 99.2393056 13.2781945 101.5259723 17.6765279 gva_serv_1ss.tif gva_serv_1ss_clipped.tif\n",
+    "gdal_calc.py -A nres_dmg_frac_1ss.tif -B gva_serv_1ss_clipped.tif --outfile=disruption_serv_1ss.tif --calc=\"(A>0.1)*B\"\n",
+    "\n",
+    "\n",
+    "for sector in serv ag man\n",
+    "    exactextract \\\n",
+    "        -p ../../admin-boundaries/tha_adm1.shp \\\n",
+    "        -r disruption_{$sector}_1ss.tif \\\n",
+    "        -f GID_1 \\\n",
+    "        -s sum \\\n",
+    "        -o disruption_{$sector}_1ss.csv\n",
+    "end\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "gdalwarp -te 99.2393056 13.2781945 101.5259723 17.6765279  ../../footprints/JBA/Raster/TH_FLRF_ChaoPhraya2011_RD_01.tif ../../footprints/JBA/Raster/TH_FLRF_ChaoPhraya2011_RD_01_clipped.tif\n",
+    "for sector in serv ag man\n",
+    "    gdal_calc.py \\\n",
+    "        -A ../../footprints/JBA/Raster/TH_FLRF_ChaoPhraya2011_RD_01_clipped.tif \\\n",
+    "        -B gva_{$sector}_1ss_clipped.tif \\\n",
+    "        --outfile=disruption_0.3m_{$sector}_1ss.tif \\\n",
+    "        --calc=\"(A>0.3)*B\"\n",
+    "    exactextract \\\n",
+    "        -p ../../admin-boundaries/tha_adm1.shp \\\n",
+    "        -r disruption_0.3m_{$sector}_1ss.tif \\\n",
+    "        -f GID_1 \\\n",
+    "        -s sum \\\n",
+    "        -o disruption_0.3m_{$sector}_1ss.csv\n",
+    "end\n",
+    "\n",
+    "for sector in serv ag man\n",
+    "    gdal_calc.py \\\n",
+    "        -A nres_dmg_frac_1ss.tif \\\n",
+    "        -B gva_{$sector}_1ss_clipped.tif \\\n",
+    "        --outfile=disruption_dmg_{$sector}_1ss.tif \\\n",
+    "        --calc=\"A*B\"\n",
+    "    exactextract \\\n",
+    "        -p ../../admin-boundaries/tha_adm1.shp \\\n",
+    "        -r disruption_dmg_{$sector}_1ss.tif \\\n",
+    "        -f GID_1 \\\n",
+    "        -s sum \\\n",
+    "        -o disruption_dmg_{$sector}_1ss.csv\n",
+    "end\n",
+    "\"\"\""
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/return-period-demo.ipynb b/notebooks/return-period-demo.ipynb
index b8541433..0a78b1d8 100644
--- a/notebooks/return-period-demo.ipynb
+++ b/notebooks/return-period-demo.ipynb
@@ -8,11 +8,21 @@
    "source": [
     "import subprocess\n",
     "from pathlib import Path\n",
+    "from typing import Optional\n",
     "\n",
     "import geopandas\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy\n",
     "import pandas\n",
-    "from open_gira.geometry import buffer_via_reprojection\n",
+    "import rasterio\n",
+    "import scipy.integrate\n",
+    "import snail.intersection\n",
+    "from matplotlib.colors import LogNorm\n",
+    "from open_gira.direct_damages import ReturnPeriodMap\n",
     "from open_gira.exposure import max_vector_rasters_intersection\n",
+    "from open_gira.geometry import buffer_via_reprojection\n",
+    "from open_gira.io import read_raster_ds, write_raster_ds\n",
+    "from pyproj import Geod\n",
     "from snail.damages import PiecewiseLinearDamageCurve"
    ]
   },
@@ -58,10 +68,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "raster_paths = sorted(Path(\"../results/input/jrc_flood/\").glob(\"*.tif\"))\n",
-    "for global_tiff in raster_paths:\n",
+    "global_raster_paths = sorted(Path(\"../results/input/jrc_flood/\").glob(\"floodMapGL*.tif\"))\n",
+    "for global_tiff in global_raster_paths:\n",
     "    tha_tiff = base_dir / \"processed_data\" / \"hazard\" / global_tiff.name\n",
-    "    if tha_tiff.exists:\n",
+    "    if tha_tiff.exists():\n",
     "        continue\n",
     "    print(tha_tiff)\n",
     "    subprocess.check_output(\n",
@@ -85,7 +95,7 @@
    "outputs": [],
    "source": [
     "raster_paths = sorted(\n",
-    "    str(p) for p in (base_dir / \"processed_data\" / \"hazard\").glob(\"*.tif\")\n",
+    "    str(p) for p in (base_dir / \"processed_data\" / \"hazard\").glob(\"floodMapGL*.tif\")\n",
     ")\n",
     "rasters = pandas.DataFrame(data={\"path\": raster_paths})\n",
     "rasters[\"rp\"] = rasters.path.str.extract(\"(\\d+)\").astype(int)\n",
@@ -94,6 +104,112 @@
     "rasters"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Resample to built capital raster resolution\n",
+    "for tha_tiff in rasters.path:\n",
+    "    resampled_tiff = base_dir / \"processed_data\" / \"hazard\" / \"3ss\" / Path(tha_tiff).name\n",
+    "    print(resampled_tiff)\n",
+    "    if resampled_tiff.exists():\n",
+    "        resampled_tiff.unlink()\n",
+    "        # continue\n",
+    "    subprocess.check_output(\n",
+    "        [\n",
+    "            \"gdalwarp\",\n",
+    "            \"-t_srs\", \"EPSG:4326\",\n",
+    "            \"-te_srs\", \"EPSG:4326\",\n",
+    "            \"-te\", \"97.3745830\", \"5.6170835\", \"105.6245829\", \"20.4504168\",\n",
+    "            \"-ts\", \"9900\", \"17800\",\n",
+    "            str(tha_tiff),\n",
+    "            str(resampled_tiff),\n",
+    "        ]\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "raster_paths_resampled = sorted(\n",
+    "    str(p) for p in (base_dir / \"processed_data\" / \"hazard\" / \"3ss\").glob(\"floodMapGL*.tif\")\n",
+    ")\n",
+    "rasters_3ss = pandas.DataFrame(data={\"path\": raster_paths_resampled})\n",
+    "rasters_3ss[\"rp\"] = rasters_3ss.path.str.extract(\"_rp(\\d+)y\").astype(int)\n",
+    "rasters_3ss[\"key\"] = rasters_3ss.rp.apply(lambda rp: f\"jrc_flood_{rp:03d}\")\n",
+    "rasters_3ss.sort_values(by=\"rp\", inplace=True)\n",
+    "rasters_3ss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "grids = set(\n",
+    "    snail.intersection.GridDefinition.from_raster(path) for path in rasters.path\n",
+    ")\n",
+    "assert len(grids) == 1\n",
+    "grid = grids.pop()\n",
+    "grid"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "damage_curves = {\n",
+    "    \"paved\": PiecewiseLinearDamageCurve.from_csv(\n",
+    "        \"../config/damage_curves/flood/road_paved.csv\",\n",
+    "        intensity_col=\"inundation_depth_(m)\",\n",
+    "        damage_col=\"central\",\n",
+    "    ),\n",
+    "    \"paved_lower\": PiecewiseLinearDamageCurve.from_csv(\n",
+    "        \"../config/damage_curves/flood/road_paved.csv\",\n",
+    "        intensity_col=\"inundation_depth_(m)\",\n",
+    "        damage_col=\"damage_fraction\",\n",
+    "    ),\n",
+    "    \"paved_upper\": PiecewiseLinearDamageCurve.from_csv(\n",
+    "        \"../config/damage_curves/flood/road_paved.csv\",\n",
+    "        intensity_col=\"inundation_depth_(m)\",\n",
+    "        damage_col=\"upper\",\n",
+    "    ),\n",
+    "    \"motorway_lower\": PiecewiseLinearDamageCurve.from_csv(\n",
+    "        \"../config/damage_curves/flood/road_motorway.csv\",\n",
+    "        intensity_col=\"inundation_depth_(m)\",\n",
+    "        damage_col=\"damage_fraction\",\n",
+    "    ),\n",
+    "    \"motorway_upper\": PiecewiseLinearDamageCurve.from_csv(\n",
+    "        \"../config/damage_curves/flood/road_motorway.csv\",\n",
+    "        intensity_col=\"inundation_depth_(m)\",\n",
+    "        damage_col=\"high flow\",\n",
+    "    ),\n",
+    "    \"rail_a\": PiecewiseLinearDamageCurve.from_csv(\n",
+    "        \"../config/damage_curves/flood/rail_railway.csv\",\n",
+    "        intensity_col=\"inundation_depth_(m)\",\n",
+    "        damage_col=\"damage_fraction\",\n",
+    "    ),\n",
+    "    \"rail_b\": PiecewiseLinearDamageCurve.from_csv(\n",
+    "        \"../config/damage_curves/flood/rail_railway.csv\",\n",
+    "        intensity_col=\"inundation_depth_(m)\",\n",
+    "        damage_col=\"high\",\n",
+    "    ),\n",
+    "    \"powerplants\": PiecewiseLinearDamageCurve.from_csv(\n",
+    "        \"../config/damage_curves/flood/powerplants.csv\",\n",
+    "        intensity_col=\"depth_m\",\n",
+    "        damage_col=\"damage_fraction\",\n",
+    "    ),\n",
+    "}"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -134,7 +250,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Load powerplants"
+    "## Load powerplants\n"
    ]
   },
   {
@@ -144,40 +260,194 @@
    "outputs": [],
    "source": [
     "powerplants = geopandas.read_parquet(\n",
-    "    base_dir / \"power_flood\" / \"data\" / \"powerplants.geoparquet\"\n",
-    ").cx[minx:maxx, miny:maxy]\n",
+    "    base_dir / \"power_flood\" / \"data\" / \"powerplants_tha.geoparquet\"\n",
+    ")\n",
     "powerplants[\"geom_point\"] = powerplants.geometry\n",
     "powerplants.geometry = buffer_via_reprojection(powerplants.geometry, 250)\n",
     "powerplants.head(1)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Powerplant RP damages\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "powerplants = max_vector_rasters_intersection(powerplants, rasters).fillna(0)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# US power plant construction costs\n",
-    "# https://www.eia.gov/electricity/generatorcosts/xls/generator_costs_2021.xlsx\n",
-    "# e.g. for gas...\n",
-    "# 920 [USD 2021] / capacity [kW]\n",
-    "# 920000 [USD 2021] / capacity [MW]\n",
-    "# N.B. not deflated to 2011, and no accounting for learning effects between 2011 and 2021\n",
+    "powerplants.head(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def damage_value(\n",
+    "    assets_with_exposure, exposure_columns, damage_curve, cost_column, damage_cost_unit\n",
+    "):\n",
+    "    exposure = assets_with_exposure.loc[:, exposure_columns]\n",
+    "    damage_fraction = pandas.DataFrame(\n",
+    "        damage_curve.damage_fraction(exposure),\n",
+    "        index=exposure.index,\n",
+    "        columns=exposure.columns,\n",
+    "    )\n",
+    "    damage_value = damage_fraction.multiply(\n",
+    "        assets_with_exposure[cost_column], axis=\"index\"\n",
+    "    )\n",
+    "    damage_value.columns = [\n",
+    "        f\"damage_{damage_cost_unit}__{c}\" for c in damage_value.columns\n",
+    "    ]\n",
+    "    return damage_value"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "powerplants_damages = damage_value(\n",
+    "    powerplants, rasters.key, damage_curves[\"powerplants\"], \"construction_cost\", \"usd\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class JRCFloodMap(ReturnPeriodMap):\n",
+    "    def __init__(self, tuple):\n",
+    "        # identifying string containing other, inferred attributes, should be\n",
+    "        # unique among any collection of maps\n",
+    "        self.name = tuple.key\n",
+    "        # name of scenario, e.g. rcp4p5, historical\n",
+    "        self.scenario = \"historical\"\n",
+    "        # year for which hazard map is valid (may be past, present or future)\n",
+    "        self.year = 2020\n",
+    "        # expect hazard to recur on average every return_period years\n",
+    "        self.return_period_years = tuple.rp\n",
     "\n",
-    "construction_cost_usd_per_mw = {\n",
-    "    \"Biomass\": 2592000,\n",
-    "    \"Gas\": 920000,\n",
-    "    \"Solar\": 1561000,\n",
-    "    \"Wind\": 1428000,\n",
-    "}\n",
-    "powerplants[\"construction_cost\"] = powerplants.apply(lambda row: construction_cost_usd_per_mw[row.primary_fuel] * row.power_mw, axis=1)"
+    "    @property\n",
+    "    def without_RP(self) -> str:\n",
+    "        return \"jrc_flood\"\n",
+    "\n",
+    "    @property\n",
+    "    def without_model(self) -> str:\n",
+    "        return \"jrc_flood\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def ead(assets_with_damages, damage_cost_unit, rp_maps):\n",
+    "    # sort by least to most probable\n",
+    "    sorted_rp_maps: list[ReturnPeriodMap] = sorted(rp_maps)\n",
+    "\n",
+    "    # [0, 1] valued decimal probabilities\n",
+    "    probabilities: list[float] = [\n",
+    "        rp_map.annual_probability for rp_map in sorted_rp_maps\n",
+    "    ]\n",
+    "    # family subset of grouped_direct_damages\n",
+    "    family_column_names: list[str] = [\n",
+    "        f\"damage_{damage_cost_unit}__{rp_map.name}\" for rp_map in sorted_rp_maps\n",
+    "    ]\n",
+    "    family_direct_damages: pandas.DataFrame = assets_with_damages[family_column_names]\n",
+    "\n",
+    "    # integrate the damage as a function of probability curve using Simpson's rule\n",
+    "    # Simpson's rule as the function to be integrated is non-linear\n",
+    "    return scipy.integrate.simpson(family_direct_damages, x=probabilities, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rp_maps = [JRCFloodMap(r) for r in rasters.itertuples()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "powerplants_damages[\"ead_usd__jrc_flood\"] = ead(powerplants_damages, \"usd\", rp_maps)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "powerplants_damages.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "powerplants_with_damages = powerplants.join(powerplants_damages)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for depth_col in rasters.key:\n",
+    "    powerplants_with_damages[f\"exposure__{depth_col}\"] = (\n",
+    "        powerplants_with_damages[depth_col] > 0\n",
+    "    )\n",
+    "    powerplants_with_damages[f\"exposure_usd__{depth_col}\"] = (\n",
+    "        powerplants_with_damages.construction_cost\n",
+    "        * (powerplants_with_damages[depth_col] > 0)\n",
+    "    )\n",
+    "powerplants_with_damages.filter(like=\"exposure__\").sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "powerplants_with_damages.to_parquet(\n",
+    "    base_dir / \"power_flood\" / \"powerplant_damages_jrc.gpq\"\n",
+    ")"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Powerplant RP damages"
+    "# Rail damages\n"
    ]
   },
   {
@@ -186,7 +456,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "powerplants = max_vector_rasters_intersection(powerplants, rasters).fillna(0)"
+    "rail_costs = pandas.read_csv(\"../config/rehab_costs/rail.csv\", comment=\"#\").set_index(\n",
+    "    \"asset_type\"\n",
+    ")\n",
+    "rail_costs  # .loc[\"rail_railway\", \"rehab_cost_USD_per_km\"]"
    ]
   },
   {
@@ -195,7 +468,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "powerplants.head(2)"
+    "rail_edges.asset_type.unique()"
    ]
   },
   {
@@ -204,48 +477,92 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "damage_curves = {\n",
-    "    \"paved\": PiecewiseLinearDamageCurve.from_csv(\n",
-    "        \"../config/damage_curves/flood/road_paved.csv\",\n",
-    "        intensity_col=\"inundation_depth_(m)\",\n",
-    "        damage_col=\"central\",\n",
-    "    ),\n",
-    "    \"lower\": PiecewiseLinearDamageCurve.from_csv(\n",
-    "        \"../config/damage_curves/flood/road_paved.csv\",\n",
-    "        intensity_col=\"inundation_depth_(m)\",\n",
-    "        damage_col=\"damage_fraction\",\n",
-    "    ),\n",
-    "    \"upper\": PiecewiseLinearDamageCurve.from_csv(\n",
-    "        \"../config/damage_curves/flood/road_paved.csv\",\n",
-    "        intensity_col=\"inundation_depth_(m)\",\n",
-    "        damage_col=\"upper\",\n",
-    "    ),\n",
-    "    \"motorway\": PiecewiseLinearDamageCurve.from_csv(\n",
-    "        \"../config/damage_curves/flood/road_motorway.csv\",\n",
-    "        intensity_col=\"inundation_depth_(m)\",\n",
-    "        damage_col=\"damage_fraction\",\n",
-    "    ),\n",
-    "    \"motorway_high\": PiecewiseLinearDamageCurve.from_csv(\n",
-    "        \"../config/damage_curves/flood/road_motorway.csv\",\n",
-    "        intensity_col=\"inundation_depth_(m)\",\n",
-    "        damage_col=\"high flow\",\n",
-    "    ),\n",
-    "    \"rail_a\": PiecewiseLinearDamageCurve.from_csv(\n",
-    "        \"../config/damage_curves/flood/rail_railway.csv\",\n",
-    "        intensity_col=\"inundation_depth_(m)\",\n",
-    "        damage_col=\"damage_fraction\",\n",
-    "    ),\n",
-    "    \"rail_b\": PiecewiseLinearDamageCurve.from_csv(\n",
-    "        \"../config/damage_curves/flood/rail_railway.csv\",\n",
-    "        intensity_col=\"inundation_depth_(m)\",\n",
-    "        damage_col=\"high\",\n",
-    "    ),\n",
-    "    \"powerplants\": PiecewiseLinearDamageCurve.from_csv(\n",
-    "        \"../config/damage_curves/flood/powerplants.csv\",\n",
-    "        intensity_col=\"depth_m\",\n",
-    "        damage_col=\"damage_fraction\"\n",
-    "    ),\n",
-    "}"
+    "def line_split_exposure(edges, grid, rasters):\n",
+    "    exposure = snail.intersection.split_linestrings(edges.reset_index(drop=True), grid)\n",
+    "    exposure = snail.intersection.apply_indices(exposure, grid)\n",
+    "    for r in rasters.itertuples():\n",
+    "        with rasterio.open(r.path) as src:\n",
+    "            data = src.read(1)\n",
+    "            exposure[r.key] = snail.intersection.get_raster_values_for_splits(\n",
+    "                exposure, data\n",
+    "            )\n",
+    "    return exposure"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rail_exposure = line_split_exposure(rail_edges, grid, rasters)\n",
+    "rail_exposure.head(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def line_split_damage(\n",
+    "    exposure,\n",
+    "    exposure_columns,\n",
+    "    costs,\n",
+    "    cost_col,\n",
+    "    damage_curves_by_asset_type,\n",
+    "    cost_unit=\"usd\",\n",
+    "):\n",
+    "    geod = Geod(ellps=\"WGS84\")\n",
+    "    damage_dfs = []\n",
+    "    for asset_type, splits_df in exposure.groupby(\"asset_type\"):\n",
+    "        cost_per_km = costs.loc[asset_type, cost_col]\n",
+    "        splits_df[\"split_length_km\"] = (\n",
+    "            splits_df.geometry.apply(geod.geometry_length) / 1e3\n",
+    "        )\n",
+    "        splits_df[\"rehab_cost\"] = splits_df.split_length_km * cost_per_km\n",
+    "        damage = damage_value(\n",
+    "            splits_df,\n",
+    "            exposure_columns,\n",
+    "            damage_curves_by_asset_type[asset_type],\n",
+    "            \"rehab_cost\",\n",
+    "            cost_unit,\n",
+    "        )\n",
+    "        damage_dfs.append(splits_df.join(damage))\n",
+    "    return pandas.concat(damage_dfs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rail_damage_curves_a = {\n",
+    "    \"rail_railway\": damage_curves[\"rail_a\"],\n",
+    "    \"rail_bridge\": damage_curves[\"rail_a\"],\n",
+    "}\n",
+    "rail_damage_a = line_split_damage(\n",
+    "    rail_exposure,\n",
+    "    rasters.key,\n",
+    "    rail_costs,\n",
+    "    \"rehab_cost_USD_per_km\",\n",
+    "    rail_damage_curves_a,\n",
+    ")\n",
+    "rail_damage_a[\"ead_usd__jrc_flood\"] = ead(rail_damage_a, \"usd\", rp_maps)\n",
+    "\n",
+    "rail_damage_curves_b = {\n",
+    "    \"rail_railway\": damage_curves[\"rail_b\"],\n",
+    "    \"rail_bridge\": damage_curves[\"rail_b\"],\n",
+    "}\n",
+    "rail_damage_b = line_split_damage(\n",
+    "    rail_exposure,\n",
+    "    rasters.key,\n",
+    "    rail_costs,\n",
+    "    \"rehab_cost_USD_per_km\",\n",
+    "    rail_damage_curves_b,\n",
+    ")\n",
+    "rail_damage_b[\"ead_usd__jrc_flood\"] = ead(rail_damage_b, \"usd\", rp_maps)"
    ]
   },
   {
@@ -254,7 +571,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "powerplants"
+    "for depth_col in rasters.key:\n",
+    "    rail_damage_a[f\"exposure_km__{depth_col}\"] = rail_damage_a.split_length_km * (\n",
+    "        rail_damage_a[depth_col] > 0\n",
+    "    )\n",
+    "rail_damage_a.filter(like=\"exposure_km__jrc_flood_\").sum()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Road damages\n"
    ]
   },
   {
@@ -263,15 +591,1010 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "exposure = powerplants.loc[:, rasters.key]\n",
-    "damage_fraction = pandas.DataFrame(\n",
-    "    damage_curves[\"powerplants\"].damage_fraction(exposure),\n",
-    "    index=exposure.index,\n",
-    "    columns=exposure.columns\n",
+    "road_costs = pandas.read_csv(\"../config/rehab_costs/road.csv\", comment=\"#\").set_index(\n",
+    "    \"asset_type\"\n",
+    ")\n",
+    "road_costs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "road_edges.asset_type.unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "road_damage_curves_lower = {\n",
+    "    \"road_trunk\": damage_curves[\"motorway_lower\"],\n",
+    "    \"road_motorway\": damage_curves[\"motorway_lower\"],\n",
+    "    \"road_primary\": damage_curves[\"paved_lower\"],\n",
+    "    \"road_secondary\": damage_curves[\"paved_lower\"],\n",
+    "    \"road_bridge\": damage_curves[\"paved_lower\"],\n",
+    "}\n",
+    "road_damage_curves_upper = {\n",
+    "    \"road_trunk\": damage_curves[\"motorway_upper\"],\n",
+    "    \"road_motorway\": damage_curves[\"motorway_upper\"],\n",
+    "    \"road_primary\": damage_curves[\"paved_upper\"],\n",
+    "    \"road_secondary\": damage_curves[\"paved_upper\"],\n",
+    "    \"road_bridge\": damage_curves[\"paved_upper\"],\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "road_exposure = line_split_exposure(road_edges, grid, rasters)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "road_damage_lower = line_split_damage(\n",
+    "    road_exposure,\n",
+    "    rasters.key,\n",
+    "    road_costs,\n",
+    "    \"rehab_cost_USD_per_km_per_lane\",\n",
+    "    road_damage_curves_lower,\n",
+    ")\n",
+    "road_damage_upper = line_split_damage(\n",
+    "    road_exposure,\n",
+    "    rasters.key,\n",
+    "    road_costs,\n",
+    "    \"rehab_cost_USD_per_km_per_lane\",\n",
+    "    road_damage_curves_upper,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "road_damage_upper[\"ead_usd__jrc_flood\"] = ead(road_damage_upper, \"usd\", rp_maps)\n",
+    "road_damage_lower[\"ead_usd__jrc_flood\"] = ead(road_damage_lower, \"usd\", rp_maps)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for depth_col in rasters.key:\n",
+    "    road_damage_upper[f\"exposure_km__{depth_col}\"] = (\n",
+    "        road_damage_upper.split_length_km * (road_damage_upper[depth_col] > 0)\n",
+    "    )\n",
+    "road_damage_upper.filter(like=\"exposure_km__jrc_flood_\").sum()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Plotting\n",
+    "\n",
+    "- exposure of infrastructure assets (numbers of power plants, km of roads) in Thailand in floods of varying return periods\n",
+    "- total direct damages to infrastructure assets (power plants, roads) in Thailand in floods of varying return periods\n",
+    "- total direct damages to buildings in Thailand in floods of varying return periods\n",
+    "- transport infrastructure exposure in 1: and 1: return period floods, illustrating where most trade flows are exposed\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Explore styles\n",
+    "for s in plt.style.available:\n",
+    "    print(s)\n",
+    "plt.style.use(\"seaborn-v0_8-muted\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Aggregate exposure for each return period\n",
+    "total_powerplants_exposure = (\n",
+    "    powerplants_with_damages.filter(like=\"exposure__jrc_flood_\").sum().astype(int)\n",
+    ")\n",
+    "\n",
+    "# Extract return periods from column names\n",
+    "return_periods = [int(col.split(\"_\")[-1]) for col in total_powerplants_exposure.index]\n",
+    "probabilities = [1 / rp for rp in return_periods]\n",
+    "\n",
+    "# Plot the exposure\n",
+    "fig = plt.figure(figsize=(10, 6), facecolor=\"white\", layout=\"constrained\")\n",
+    "fig.patch.set_facecolor(\"white\")\n",
+    "plt.plot(probabilities, total_powerplants_exposure, marker=\".\")\n",
+    "plt.xlabel(\"Return Period (years)\")\n",
+    "plt.ylabel(\"Total Powerplant Exposure (sites)\")\n",
+    "plt.title(\n",
+    "    \"\"\"Total exposure of powerplants in Thailand\n",
+    "to river floods of varying return periods\"\"\"\n",
+    ")\n",
+    "\n",
+    "plt.xticks(probabilities, return_periods, rotation=90)\n",
+    "plt.yticks(range(50, 55))\n",
+    "plt.savefig(base_dir / \"figures\" / \"rp_exposure_jrc-flood_power-sites.png\")\n",
+    "None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Aggregate exposure for each return period\n",
+    "total_power_damage = (\n",
+    "    powerplants_with_damages.filter(like=\"damage_usd__jrc_flood_\").sum() * 1e-9\n",
+    ")\n",
+    "\n",
+    "# Extract return periods from column names\n",
+    "return_periods = [int(col.split(\"_\")[-1]) for col in total_power_damage.index]\n",
+    "probabilities = [1 / rp for rp in return_periods]\n",
+    "\n",
+    "# Plot the exposure\n",
+    "fig = plt.figure(figsize=(10, 6), facecolor=\"white\", layout=\"constrained\")\n",
+    "fig.patch.set_facecolor(\"white\")\n",
+    "plt.plot(probabilities, total_power_damage, marker=\".\")\n",
+    "plt.xlabel(\"Return Period (years)\")\n",
+    "plt.ylabel(\"Total Powerplant Damage (billion USD)\")\n",
+    "plt.title(\n",
+    "    \"\"\"Total direct damages to powerplants in Thailand\n",
+    "for river floods of varying return periods\"\"\"\n",
+    ")\n",
+    "\n",
+    "plt.xticks(probabilities, return_periods, rotation=90)\n",
+    "plt.savefig(base_dir / \"figures\" / \"rp_damage_jrc-flood_power.png\")\n",
+    "None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Aggregate exposure for each return period\n",
+    "total_rail_exposure = rail_damage_a.filter(like=\"exposure_km__jrc_flood_\").sum() * 1e-3\n",
+    "\n",
+    "# Extract return periods from column names\n",
+    "return_periods = [int(col.split(\"_\")[-1]) for col in total_rail_exposure.index]\n",
+    "probabilities = [1 / rp for rp in return_periods]\n",
+    "\n",
+    "# Plot the exposure\n",
+    "fig = plt.figure(figsize=(10, 6), facecolor=\"white\", layout=\"constrained\")\n",
+    "fig.patch.set_facecolor(\"white\")\n",
+    "plt.plot(probabilities, total_rail_exposure, marker=\".\")\n",
+    "plt.xlabel(\"Return Period (years)\")\n",
+    "plt.ylabel(\"Total Railway Exposure ('000 km)\")\n",
+    "plt.title(\n",
+    "    \"\"\"Total exposure of railways in Thailand\n",
+    "to river floods of varying return periods\"\"\"\n",
+    ")\n",
+    "\n",
+    "plt.xticks(probabilities, return_periods, rotation=90)\n",
+    "plt.savefig(base_dir / \"figures\" / \"rp_exposure_jrc-flood_rail.png\")\n",
+    "None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Aggregate total damages for each return period\n",
+    "total_rail_damages_a = rail_damage_a.filter(like=\"damage_usd__jrc_flood_\").sum() * 10e-9\n",
+    "total_rail_damages_b = rail_damage_b.filter(like=\"damage_usd__jrc_flood_\").sum() * 10e-9\n",
+    "total_rail_damages_center = (total_rail_damages_a + total_rail_damages_b) / 2\n",
+    "\n",
+    "# Extract return periods from column names\n",
+    "return_periods = [int(col.split(\"_\")[-1]) for col in total_rail_damages_a.index]\n",
+    "probabilities = [1 / rp for rp in return_periods]\n",
+    "\n",
+    "# Plot the damages\n",
+    "fig = plt.figure(figsize=(10, 6), facecolor=\"white\", layout=\"constrained\")\n",
+    "fig.patch.set_facecolor(\"white\")\n",
+    "plt.plot(probabilities, total_rail_damages_center, marker=\".\")\n",
+    "plt.fill_between(probabilities, total_rail_damages_a, total_rail_damages_b, alpha=0.5)\n",
+    "plt.xlabel(\"Return Period (years)\")\n",
+    "plt.ylabel(\"Total Rail Damages (billion USD)\")\n",
+    "plt.title(\n",
+    "    \"\"\"Total direct damages to railways in Thailand\n",
+    "for river floods of varying return periods\"\"\"\n",
+    ")\n",
+    "\n",
+    "plt.xticks(probabilities, return_periods, rotation=90)\n",
+    "plt.savefig(base_dir / \"figures\" / \"rp_damage_jrc-flood_rail.png\")\n",
+    "None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Aggregate exposure for each return period\n",
+    "total_road_exposure = (\n",
+    "    road_damage_upper.filter(like=\"exposure_km__jrc_flood_\").sum() * 1e-3\n",
+    ")\n",
+    "\n",
+    "# Extract return periods from column names\n",
+    "return_periods = [int(col.split(\"_\")[-1]) for col in total_road_exposure.index]\n",
+    "probabilities = [1 / rp for rp in return_periods]\n",
+    "\n",
+    "# Plot the exposure\n",
+    "fig = plt.figure(figsize=(10, 6), facecolor=\"white\", layout=\"constrained\")\n",
+    "fig.patch.set_facecolor(\"white\")\n",
+    "plt.plot(probabilities, total_road_exposure, marker=\".\")\n",
+    "plt.xlabel(\"Return Period (years)\")\n",
+    "plt.ylabel(\"Total road Exposure ('000 km)\")\n",
+    "plt.title(\n",
+    "    \"\"\"Total exposure of roads in Thailand\n",
+    "to river floods of varying return periods\"\"\"\n",
+    ")\n",
+    "\n",
+    "plt.xticks(probabilities, return_periods, rotation=90)\n",
+    "plt.savefig(base_dir / \"figures\" / \"rp_exposure_jrc-flood_road.png\")\n",
+    "None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Aggregate total damages for each return period\n",
+    "dmg_road_a = road_damage_lower.filter(like=\"damage_usd__jrc_flood_\").sum() * 10e-9\n",
+    "dmg_road_b = road_damage_upper.filter(like=\"damage_usd__jrc_flood_\").sum() * 10e-9\n",
+    "dmg_road_c = (dmg_road_a + dmg_road_b) / 2\n",
+    "\n",
+    "# Extract return periods from column names\n",
+    "return_periods = [int(col.split(\"_\")[-1]) for col in dmg_road_a.index]\n",
+    "probabilities = [1 / rp for rp in return_periods]\n",
+    "\n",
+    "# Plot the damages\n",
+    "fig = plt.figure(figsize=(10, 6), facecolor=\"white\", layout=\"constrained\")\n",
+    "fig.patch.set_facecolor(\"white\")\n",
+    "plt.plot(probabilities, dmg_road_c, marker=\".\")\n",
+    "plt.fill_between(probabilities, dmg_road_a, dmg_road_b, alpha=0.5)\n",
+    "plt.xlabel(\"Return Period (years)\")\n",
+    "plt.ylabel(\"Total Rail Damages (billion USD)\")\n",
+    "plt.title(\n",
+    "    \"\"\"Total direct damages to roads in Thailand\n",
+    "for river floods of varying return periods\"\"\"\n",
+    ")\n",
+    "\n",
+    "plt.xticks(probabilities, return_periods, rotation=90)\n",
+    "plt.savefig(base_dir / \"figures\" / \"rp_damage_jrc-flood_road.png\")\n",
+    "None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Aggregate exposure for each return period\n",
+    "exposure_by_asset_type = {}\n",
+    "for asset_type, asset_exposure in road_damage_upper.groupby(\"asset_type\"):\n",
+    "    exposure_by_asset_type[asset_type] = (\n",
+    "        asset_exposure.filter(like=\"exposure_km__jrc_flood_\").sum() * 10e-3\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots(facecolor=\"white\", layout=\"constrained\")\n",
+    "fig.patch.set_facecolor(\"white\")\n",
+    "\n",
+    "# Stacked area\n",
+    "# ax.stackplot(probabilities, damage_by_asset_type.values(),\n",
+    "#              labels=damage_by_asset_type.keys(), alpha=0.8)\n",
+    "\n",
+    "# Grouped bar\n",
+    "x = numpy.arange(len(return_periods))\n",
+    "width = 1 / (len(exposure_by_asset_type) + 1)  # the width of the bars\n",
+    "multiplier = 0\n",
+    "for attribute, measurement in exposure_by_asset_type.items():\n",
+    "    offset = width * multiplier\n",
+    "    rects = ax.bar(\n",
+    "        x + offset, measurement, width, label=attribute.replace(\"road_\", \"\").title()\n",
+    "    )\n",
+    "    multiplier += 1\n",
+    "ax.set_ylim(0, 800)\n",
+    "plt.xlabel(\"Return Period (years)\")\n",
+    "plt.ylabel(\"Sector Road Exposure ('000 km)\")\n",
+    "plt.title(\n",
+    "    \"\"\"Direct Exposure of roads in Thailand\n",
+    "to river floods of varying return periods\"\"\"\n",
+    ")\n",
+    "plt.legend(facecolor=\"white\", framealpha=1)\n",
+    "\n",
+    "# Stacked area\n",
+    "# plt.xticks(probabilities, return_periods, rotation=90)\n",
+    "\n",
+    "# Grouped bar\n",
+    "plt.xticks(x + width * 2, return_periods)\n",
+    "\n",
+    "plt.savefig(base_dir / \"figures\" / \"rp_exposure_jrc-flood_road_sector.png\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "damage_by_asset_type = {}\n",
+    "for asset_type, asset_damage in road_damage_upper.groupby(\"asset_type\"):\n",
+    "    damage_by_asset_type[asset_type] = (\n",
+    "        asset_damage.filter(like=\"damage_usd__jrc_flood_\").sum() * 10e-9\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots(facecolor=\"white\", layout=\"constrained\")\n",
+    "fig.patch.set_facecolor(\"white\")\n",
+    "\n",
+    "# Stacked area\n",
+    "# ax.stackplot(probabilities, damage_by_asset_type.values(),\n",
+    "#              labels=damage_by_asset_type.keys(), alpha=0.8)\n",
+    "\n",
+    "# Grouped bar\n",
+    "x = numpy.arange(len(return_periods))\n",
+    "width = 1 / (len(damage_by_asset_type) + 1)  # the width of the bars\n",
+    "multiplier = 0\n",
+    "for attribute, measurement in damage_by_asset_type.items():\n",
+    "    offset = width * multiplier\n",
+    "    rects = ax.bar(\n",
+    "        x + offset, measurement, width, label=attribute.replace(\"road_\", \"\").title()\n",
+    "    )\n",
+    "    multiplier += 1\n",
+    "ax.set_ylim(0, 1400)\n",
+    "plt.xlabel(\"Return Period (years)\")\n",
+    "plt.ylabel(\"Sector Road Damages (billion USD)\")\n",
+    "plt.title(\n",
+    "    \"\"\"Direct damages to roads in Thailand\n",
+    "for river floods of varying return periods\"\"\"\n",
+    ")\n",
+    "plt.legend(facecolor=\"white\", framealpha=1, loc=\"upper left\")\n",
+    "\n",
+    "# Stacked area\n",
+    "# plt.xticks(probabilities, return_periods, rotation=90)\n",
+    "\n",
+    "# Grouped bar\n",
+    "plt.xticks(x + width * 2, return_periods)\n",
+    "\n",
+    "plt.savefig(base_dir / \"figures\" / \"rp_damage_jrc-flood_road_sector.png\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax1 = plt.subplots(facecolor=\"white\", layout=\"constrained\")\n",
+    "fig.patch.set_facecolor(\"white\")\n",
+    "\n",
+    "ax2 = ax1.twinx()\n",
+    "\n",
+    "ax1.set_title(\n",
+    "    \"\"\"Total exposure of infrastructure in Thailand\n",
+    "to river floods of varying return periods\"\"\"\n",
+    ")\n",
+    "\n",
+    "ax1.set_xticks(probabilities, return_periods, rotation=90)\n",
+    "ax1.set_xlabel(\"Return Period (years)\")\n",
+    "\n",
+    "ax1.set_ylabel(\"Total Road and Rail Exposure ('000 km)\")\n",
+    "ax1.set_ylim(0, 120)\n",
+    "\n",
+    "line_power = ax2.plot(\n",
+    "    probabilities,\n",
+    "    total_powerplants_exposure,\n",
+    "    marker=\".\",\n",
+    "    color=\"red\",\n",
+    "    zorder=2.5,\n",
+    "    label=\"Power\",\n",
+    ")\n",
+    "line_rail = ax1.plot(\n",
+    "    probabilities, total_rail_exposure, marker=\".\", zorder=2.5, label=\"Rail\"\n",
+    ")\n",
+    "line_road = ax1.plot(\n",
+    "    probabilities, total_road_exposure, marker=\".\", zorder=2.5, label=\"Road\"\n",
+    ")\n",
+    "\n",
+    "\n",
+    "ax2.set_ylabel(\"Total Powerplant Exposure (sites)\")\n",
+    "ax2.set_ylim(30, 60)\n",
+    "\n",
+    "ax2.grid(False)\n",
+    "\n",
+    "fig.legend(\n",
+    "    handles=[line_road[0], line_power[0], line_rail[0]],\n",
+    "    loc=\"center right\",\n",
+    "    bbox_to_anchor=(0.9, 0.4),\n",
+    ")\n",
+    "# fig.tight_layout()\n",
+    "plt.savefig(base_dir / \"figures\" / \"rp_exposure_jrc-flood_all.png\")\n",
+    "None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots()\n",
+    "fig.patch.set_facecolor(\"white\")\n",
+    "\n",
+    "ax.set_title(\n",
+    "    \"\"\"Total direct damages to infrastructure in Thailand\n",
+    "for river floods of varying return periods\"\"\"\n",
+    ")\n",
+    "\n",
+    "ax.set_xticks(probabilities, return_periods, rotation=90)\n",
+    "ax.set_xlabel(\"Return Period (years)\")\n",
+    "\n",
+    "ax.set_ylabel(\"Total Damage (billion USD)\")\n",
+    "# ax.set_yscale('log')\n",
+    "\n",
+    "line_power = ax.plot(\n",
+    "    probabilities,\n",
+    "    total_power_damage,\n",
+    "    marker=\".\",\n",
+    "    color=\"red\",\n",
+    "    zorder=2.5,\n",
+    "    label=\"Power\",\n",
+    ")\n",
+    "line_rail = ax.plot(\n",
+    "    probabilities, total_rail_damages_center, marker=\".\", zorder=2.5, label=\"Rail\"\n",
+    ")\n",
+    "line_road = ax.plot(probabilities, dmg_road_c, marker=\".\", zorder=2.5, label=\"Road\")\n",
+    "\n",
+    "\n",
+    "fig.legend(\n",
+    "    handles=[line_road[0], line_rail[0], line_power[0]],\n",
+    "    loc=\"center right\",\n",
+    "    bbox_to_anchor=(0.9, 0.2),\n",
+    ")\n",
+    "fig.tight_layout()\n",
+    "plt.savefig(base_dir / \"figures\" / \"rp_damage_jrc-flood_all.png\")\n",
+    "None"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Trade flow measures\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "list(\n",
+    "    (base_dir / \"transport_disruption\" / \"flow_allocation\" / \"project-thailand\").glob(\n",
+    "        \"*pq\"\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "flow_edges = geopandas.read_parquet(\n",
+    "    base_dir\n",
+    "    / \"transport_disruption\"\n",
+    "    / \"flow_allocation\"\n",
+    "    / \"project-thailand\"\n",
+    "    / \"edges.gpq\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def join_id(row):\n",
+    "    ids = sorted([row.to_id, row.from_id])\n",
+    "    return f\"{ids[0]}--{ids[1]}\"\n",
+    "\n",
+    "\n",
+    "road_flow_edges = flow_edges.query(\"mode == 'road'\").copy()\n",
+    "road_flow_edges[\"join_id\"] = road_flow_edges.apply(join_id, axis=1)\n",
+    "road_flow_edges.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "delta = geopandas.read_parquet(\n",
+    "    base_dir / \"transport_disruption\" / \"thailand_floods_trade_flow_delta.gpq\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "delta[\"join_id\"] = delta.apply(join_id, axis=1)\n",
+    "delta.head(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "road_damage_upper.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "road_damage_to_group = road_damage_upper[\n",
+    "    [\n",
+    "        \"from_id\",\n",
+    "        \"to_id\",\n",
+    "        \"jrc_flood_010\",\n",
+    "        \"jrc_flood_020\",\n",
+    "        \"jrc_flood_050\",\n",
+    "        \"jrc_flood_100\",\n",
+    "        \"jrc_flood_200\",\n",
+    "        \"jrc_flood_500\",\n",
+    "        \"damage_usd__jrc_flood_010\",\n",
+    "        \"damage_usd__jrc_flood_020\",\n",
+    "        \"damage_usd__jrc_flood_050\",\n",
+    "        \"damage_usd__jrc_flood_100\",\n",
+    "        \"damage_usd__jrc_flood_200\",\n",
+    "        \"damage_usd__jrc_flood_500\",\n",
+    "    ]\n",
+    "].copy()\n",
+    "road_damage_to_group.from_id = road_damage_to_group.from_id.apply(\n",
+    "    lambda id: f\"road_{id}\"\n",
+    ")\n",
+    "road_damage_to_group.to_id = road_damage_to_group.to_id.apply(lambda id: f\"road_{id}\")\n",
+    "road_damage_to_group[\"join_id\"] = road_damage_to_group.apply(join_id, axis=1)\n",
+    "\n",
+    "road_damage_grouped = (\n",
+    "    road_damage_to_group.drop(columns=[\"from_id\", \"to_id\"])\n",
+    "    .groupby(\n",
+    "        [\n",
+    "            \"join_id\",\n",
+    "        ]\n",
+    "    )\n",
+    "    .sum()\n",
+    "    .reset_index()\n",
+    "    .set_index(\"join_id\")\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "flows_and_damages = delta.set_index(\"join_id\").join(road_damage_grouped)\n",
+    "flows_and_damages.ref.fillna(\"-\", inplace=True)\n",
+    "flows_and_damages.fillna(0, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "flows_and_damages.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for key in rasters.key:\n",
+    "    # damage_mask = (flows_and_damages[f\"damage_usd__{key}\"] > 0)  # alternative assume disrupted if nonzero damage\n",
+    "    depth_mask = flows_and_damages[key] > 0.5  # assume disrupted if flood depth > 0.5m\n",
+    "    flows_and_damages[f\"flow_kusd__{key}\"] = numpy.maximum(\n",
+    "        flows_and_damages.nominal_value_kusd * depth_mask,\n",
+    "        0,\n",
+    "    )\n",
+    "    flows_and_damages[f\"flow_td-1__{key}\"] = numpy.maximum(\n",
+    "        flows_and_damages.nominal_volume_tons * depth_mask,\n",
+    "        0,\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_in_country_trade_flows(\n",
+    "    ax,\n",
+    "    edges: geopandas.GeoDataFrame,\n",
+    "    boundaries: geopandas.GeoDataFrame,\n",
+    "    title: str,\n",
+    "    column: str,\n",
+    "    column_label: str,\n",
+    "    vmin: float,\n",
+    "    vmax: float,\n",
+    "    legend: bool,\n",
+    ") -> None:\n",
+    "\n",
+    "    to_plot = edges[edges[column] != 0]\n",
+    "    to_plot_zeros = edges[edges[column] <= 0]\n",
+    "    norm = LogNorm(10 ** numpy.log10(vmin), 10 ** numpy.log10(vmax))\n",
+    "    to_plot_zeros.plot(\n",
+    "        color=\"#eeeeee\",\n",
+    "        ax=ax,\n",
+    "        linewidth=0.2,\n",
+    "    )\n",
+    "    to_plot.sort_values(column).plot(\n",
+    "        column,\n",
+    "        ax=ax,\n",
+    "        legend=legend,\n",
+    "        norm=norm,\n",
+    "        alpha=0.7,\n",
+    "        legend_kwds={\"shrink\": 0.5, \"label\": column_label},\n",
+    "        cmap=\"magma_r\",\n",
+    "    )\n",
+    "    xmin, xmax = ax.get_xlim()\n",
+    "    ymin, ymax = ax.get_ylim()\n",
+    "    ax.set_yticks(range(5, 22))\n",
+    "    xticks = range(98, 106)\n",
+    "    ax.set_xticks(xticks, labels=[str(t) for t in xticks])\n",
+    "    boundaries.plot(ax=ax, lw=0.5, alpha=0.2)\n",
+    "    ax.set_xlim(xmin, xmax)\n",
+    "    ax.set_ylim(ymin, ymax)\n",
+    "    ax.grid(alpha=0.3)\n",
+    "    ax.set_xlabel(\"Longitude [deg]\")\n",
+    "    ax.set_ylabel(\"Latitude [deg]\")\n",
+    "    ax.set_title(title)\n",
+    "    return ax\n",
+    "\n",
+    "\n",
+    "f, axs = plt.subplots(1, 2, figsize=(15, 12))\n",
+    "\n",
+    "vmin = flows_and_damages.volume_tons[flows_and_damages.volume_tons > 0].quantile(0.1)\n",
+    "vmax = flows_and_damages.volume_tons.max()\n",
+    "plot_in_country_trade_flows(\n",
+    "    axs[0],\n",
+    "    flows_and_damages,\n",
+    "    adm0,\n",
+    "    \"10y return period\",\n",
+    "    \"flow_td-1__jrc_flood_010\",\n",
+    "    \"Flow volume [t/d]\",\n",
+    "    vmin,\n",
+    "    vmax,\n",
+    "    False,\n",
+    ")\n",
+    "plot_in_country_trade_flows(\n",
+    "    axs[1],\n",
+    "    flows_and_damages,\n",
+    "    adm0,\n",
+    "    \"100y return period\",\n",
+    "    \"flow_td-1__jrc_flood_100\",\n",
+    "    \"Flow volume [t/d]\",\n",
+    "    vmin,\n",
+    "    vmax,\n",
+    "    True,\n",
+    ")\n",
+    "f.tight_layout()\n",
+    "f.savefig(base_dir / \"figures\" / \"rp_transport_disruption_010-100.png\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "flows_and_damages_with_ref = flows_and_damages  # .join(delta.set_index(\"join_id\")[[\"name_en\", \"ref\", \"osm_way_id\"]])\n",
+    "flows_and_damages_with_ref.osm_way_id = flows_and_damages_with_ref.osm_way_id.fillna(\n",
+    "    -1\n",
+    ").astype(\"int\")\n",
+    "flows_and_damages_with_ref.ref = flows_and_damages_with_ref.ref.fillna(\"-\").astype(\n",
+    "    \"str\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vol_disruption = (\n",
+    "    flows_and_damages_with_ref[\n",
+    "        [\n",
+    "            \"name_en\",\n",
+    "            \"ref\",\n",
+    "            \"tag_highway\",\n",
+    "            \"osm_way_id\",\n",
+    "            \"flow_td-1__jrc_flood_010\",\n",
+    "            \"flow_td-1__jrc_flood_100\",\n",
+    "            \"damage_usd__jrc_flood_010\",\n",
+    "            \"damage_usd__jrc_flood_100\",\n",
+    "        ]\n",
+    "    ].sort_values(\"flow_td-1__jrc_flood_100\", ascending=False)\n",
+    "    # .head(200)\n",
+    ")\n",
+    "vol_disruption.set_index(\"osm_way_id\").loc[1287965816, :]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "flows_and_damages_with_ref[\n",
+    "    flows_and_damages_with_ref[\"flow_td-1__jrc_flood_010\"]\n",
+    "    < flows_and_damages_with_ref[\"flow_td-1__jrc_flood_500\"]\n",
+    "].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vol_disruption = (\n",
+    "    flows_and_damages_with_ref[\n",
+    "        [\n",
+    "            \"name_en\",\n",
+    "            \"ref\",\n",
+    "            \"tag_highway\",\n",
+    "            \"osm_way_id\",\n",
+    "            \"flow_td-1__jrc_flood_010\",\n",
+    "            \"flow_td-1__jrc_flood_100\",\n",
+    "            \"damage_usd__jrc_flood_010\",\n",
+    "            \"damage_usd__jrc_flood_100\",\n",
+    "        ]\n",
+    "    ]\n",
+    "    .groupby([\"name_en\"])\n",
+    "    .agg(\n",
+    "        {\n",
+    "            \"ref\": \"first\",\n",
+    "            \"osm_way_id\": \"first\",\n",
+    "            \"tag_highway\": \"first\",\n",
+    "            \"flow_td-1__jrc_flood_010\": \"max\",\n",
+    "            \"flow_td-1__jrc_flood_100\": \"max\",\n",
+    "            \"damage_usd__jrc_flood_010\": \"sum\",\n",
+    "            \"damage_usd__jrc_flood_100\": \"sum\",\n",
+    "        }\n",
+    "    )\n",
+    "    .sort_values(\"flow_td-1__jrc_flood_100\", ascending=False)\n",
+    "    .reset_index()\n",
+    "    # .drop_duplicates(subset=\"name_en\")\n",
+    "    .head(20)[\n",
+    "        [\n",
+    "            \"osm_way_id\",\n",
+    "            \"flow_td-1__jrc_flood_010\",\n",
+    "            \"damage_usd__jrc_flood_010\",\n",
+    "            \"flow_td-1__jrc_flood_100\",\n",
+    "            \"damage_usd__jrc_flood_100\",\n",
+    "            \"name_en\",\n",
+    "            \"ref\",\n",
+    "            \"tag_highway\",\n",
+    "        ]\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "for col in [\"flow_td-1__jrc_flood_010\", \"flow_td-1__jrc_flood_100\"]:\n",
+    "    vol_disruption[col] = vol_disruption[col].round(-1).astype(int)\n",
+    "for col in [\"damage_usd__jrc_flood_010\", \"damage_usd__jrc_flood_100\"]:\n",
+    "    vol_disruption[col] = (vol_disruption[col] * 1e-3).round(-1).astype(int)\n",
+    "\n",
+    "\n",
+    "vol_disruption.to_csv(base_dir / \"figures\" / \"rp_transport_disruption_010-100.csv\")\n",
+    "vol_disruption"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Buildings exposure and damage calculations\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Input value in terms of built capital from GIRI BEM\n",
+    "# rr_val_5x5_tif = base_dir / \"processed_data/giri-bem/bem_5x5_valfis_res__THA.tif\"\n",
+    "# nr_val_5x5_tif = base_dir / \"processed_data/giri-bem/bem_5x5_valfis_nres__THA.tif\"\n",
+    "\n",
+    "# Input volume in terms of building volume from JRC GHSL\n",
+    "# rr_vol_3ss_tif = base_dir / \"processed_data/ghsl/ghs_built_v_res_3ss__THA.tif\"\n",
+    "# nr_vol_3ss_tif = base_dir / \"processed_data/ghsl/ghs_built_v_nres_3ss__THA.tif\"\n",
+    "\n",
+    "# Processed into value in terms of built capital, downscaled on volume\n",
+    "# see notebooks/built-capital-gva.ipynb and scripts/building_damages.py\n",
+    "rr_val_3ss_tif = base_dir / \"processed_data/giri-bem/bem_3ss_valfis_nres__THA.tif\"\n",
+    "nr_val_3ss_tif = base_dir / \"processed_data/giri-bem/bem_3ss_valfis_nres__THA.tif\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rr_val, rr_ds = read_raster_ds(rr_val_3ss_tif)\n",
+    "nr_val, nr_ds = read_raster_ds(nr_val_3ss_tif)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rr_damage_curve = PiecewiseLinearDamageCurve.from_csv(\n",
+    "    \"../config/damage_curves/flood/residential_asia.csv\",\n",
+    "    intensity_col=\"inundation_depth_(m)\",\n",
+    "    damage_col=\"damage_fraction\",\n",
+    ")\n",
+    "nr_damage_curve = PiecewiseLinearDamageCurve.from_csv(\n",
+    "    \"../config/damage_curves/flood/commercial_asia.csv\",\n",
+    "    intensity_col=\"inundation_depth_(m)\",\n",
+    "    damage_col=\"damage_fraction\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "list(rasters_3ss.path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def calculate_damage_val(depth, value, damage_curve):\n",
+    "    damage_fraction = damage_curve.damage_fraction(depth)\n",
+    "    damage_value = value * damage_fraction\n",
+    "    return damage_value"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rr_rp = {}\n",
+    "nr_rp = {}\n",
+    "for row in rasters_3ss.itertuples():\n",
+    "    print(row)\n",
+    "    rp_depth, rp_depth_ds = read_raster_ds(row.path)\n",
+    "    transform = rp_depth_ds.transform\n",
+    "    rr_dmg = calculate_damage_val(rp_depth, rr_val, rr_damage_curve)\n",
+    "    nr_dmg = calculate_damage_val(rp_depth, nr_val, nr_damage_curve)\n",
+    "    write_raster_ds(base_dir / \"buildings_flood\" / f\"dmg_val_res__{row.key}.tif\", rr_dmg, transform)\n",
+    "    write_raster_ds(base_dir / \"buildings_flood\" / f\"dmg_val_nres__{row.key}.tif\", nr_dmg, transform)\n",
+    "    rr_rp[row.rp] = rr_dmg\n",
+    "    nr_rp[row.rp] = nr_dmg\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rps = []\n",
+    "rr = []\n",
+    "nr = []\n",
+    "for rp, rr_dmg in rr_rp.items():\n",
+    "    rps.append(rp)\n",
+    "    rr.append(rr_dmg.sum()* 1e-9)\n",
+    "    nr.append(nr_rp[rp].sum()* 1e-9)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots(facecolor=\"white\", layout=\"constrained\")\n",
+    "fig.patch.set_facecolor(\"white\")\n",
+    "\n",
+    "ax.set_title(\n",
+    "    \"\"\"Total direct damages to built capital in Thailand\n",
+    "for river floods of varying return periods\"\"\"\n",
+    ")\n",
+    "probabilities = [1/rp for rp in rps]\n",
+    "ax.set_xticks(probabilities, rps, rotation=90)\n",
+    "ax.set_xlabel(\"Return Period (years)\")\n",
+    "\n",
+    "ax.set_ylabel(\"Total Damage (billion USD)\")\n",
+    "# ax.set_yscale('log')\n",
+    "\n",
+    "line_res = ax.plot(\n",
+    "    probabilities, rr, marker=\".\", zorder=2.5, label=\"Residential\"\n",
+    ")\n",
+    "line_nres = ax.plot(probabilities, nr, marker=\".\", zorder=2.5, label=\"Non-residential\")\n",
+    "\n",
+    "\n",
+    "fig.legend(\n",
+    "    handles=[line_nres[0], line_res[0]],\n",
+    "    loc=\"center right\",\n",
+    "    bbox_to_anchor=(0.9, 0.75),\n",
     ")\n",
-    "damage_value = damage_fraction \\\n",
-    "    .multiply(powerplants[fields.REHAB_COST], axis=\"index\") \\\n",
-    "    .multiply(powerplants[fields.SPLIT_LENGTH], axis=\"index\")"
+    "plt.savefig(base_dir / \"figures\" / \"rp_damage_jrc-flood_buildings.png\")\n",
+    "None"
    ]
   }
  ],
diff --git a/notebooks/windowed-raster.ipynb b/notebooks/windowed-raster.ipynb
deleted file mode 100644
index 99fe0296..00000000
--- a/notebooks/windowed-raster.ipynb
+++ /dev/null
@@ -1,822 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import numpy as np\n",
-    "import geopandas\n",
-    "import pandas\n",
-    "import rasterio\n",
-    "\n",
-    "from affine import Affine\n",
-    "from rasterio import features\n",
-    "from snail.damages import PiecewiseLinearDamageCurve"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "value_150ss_tif = \"../results/input/giri/THA/bem_5x5_valfis_res__THA.tif\"\n",
-    "volume_3ss_tif = \"../results/input/ghsl/THA/GHS_BUILT_V_E2020_GLOBE_R2023A_4326_3ss_V1_0__THA.tif\"\n",
-    "flood_1ss_tif = \"../results/input/footprints/JBA/Raster/TH_FLRF_ChaoPhraya2011_RD_01.tif\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def clip_array(arr, block_size):\n",
-    "    clip_rows = (arr.shape[0] - (arr.shape[0] % block_size))\n",
-    "    clip_cols = (arr.shape[1] - (arr.shape[1] % block_size))\n",
-    "\n",
-    "    clipped = arr[0:clip_rows, 0:clip_cols]\n",
-    "    return clipped\n",
-    "\n",
-    "def resample_sum(arr, block_size):\n",
-    "    nblocks_0 = arr.shape[0] // block_size\n",
-    "    nblocks_1 = arr.shape[1] // block_size\n",
-    "\n",
-    "    blocks = arr.reshape(nblocks_0, block_size, nblocks_1, block_size)\n",
-    "\n",
-    "    return np.sum(blocks, axis=(1, 3))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# d = np.arange(12).reshape((3,4))\n",
-    "# d, resample_sum(d, 2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def repeat_2d(arr, block_size):\n",
-    "    \"\"\"Repeat each element from a 2d array, so each value fills a (block_size x block_size) area\n",
-    "    \"\"\"\n",
-    "    return np.repeat(np.repeat(arr, block_size, axis=0), block_size, axis=1)\n",
-    "\n",
-    "# repeat_2d(d, 2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def read_ds(ds, band=1, replace_nodata=False, nodata_fill=0):\n",
-    "    data = ds.read(band)\n",
-    "    if replace_nodata:\n",
-    "        data = np.where(data == ds.nodata, nodata_fill, data)\n",
-    "    return data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with rasterio.open(value_150ss_tif) as value_150ss_ds:\n",
-    "    value_150ss_all = read_ds(value_150ss_ds, replace_nodata=True)\n",
-    "\n",
-    "with rasterio.open(volume_3ss_tif) as volume_3ss_ds:\n",
-    "    volume_3ss_all = read_ds(volume_3ss_ds, replace_nodata=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def to_int(a):\n",
-    "    return np.floor(a).astype(int)\n",
-    "\n",
-    "# lon, lat of volume_3ss top left\n",
-    "volume_3ss_all_ul_xy = volume_3ss_ds.transform * (0,0)\n",
-    "# col, row in value_150ss_all, inset one extra\n",
-    "value_150ss_ul_cr = to_int(~value_150ss_ds.transform * (volume_3ss_all_ul_xy)) + 1\n",
-    "# lon, lat of that value_150ss_all pixel - this is our new top left\n",
-    "ul_xy_150ss = value_150ss_ds.transform * value_150ss_ul_cr\n",
-    "# col, row in volume_3ss_all\n",
-    "volume_3ss_ul_cr = to_int(~volume_3ss_ds.transform * ul_xy_150ss)\n",
-    "# lon, lat of that volume_3ss_all pixel - new top left for 3ss purposes (tiny bit offset)\n",
-    "ul_xy_3ss = volume_3ss_ds.transform * volume_3ss_ul_cr\n",
-    "ul_xy_150ss, ul_xy_3ss"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Clip out volume array\n",
-    "col_idx, row_idx = volume_3ss_ul_cr\n",
-    "volume_3ss = volume_3ss_all[row_idx:, col_idx:]\n",
-    "volume_3ss = clip_array(volume_3ss, 50)\n",
-    "# Resample volume to coarse-scale, \"sum\"\n",
-    "volume_150ss = resample_sum(volume_3ss, 50)\n",
-    "volume_150ss.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Adapt transform to new top-left and resolution\n",
-    "a,b,c,d,e,f = volume_3ss_ds.transform[:6]\n",
-    "t_150ss = Affine(\n",
-    "    a * 50,\n",
-    "    b,\n",
-    "    ul_xy_150ss[0],\n",
-    "    d,\n",
-    "    e * 50,\n",
-    "    ul_xy_150ss[1]\n",
-    ")\n",
-    "t_3ss = Affine(\n",
-    "    a,\n",
-    "    b,\n",
-    "    ul_xy_3ss[0],\n",
-    "    d,\n",
-    "    e,\n",
-    "    ul_xy_3ss[1]\n",
-    ")\n",
-    "t_150ss, t_3ss"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "col_idx, row_idx = value_150ss_ul_cr\n",
-    "ncols, nrows = volume_150ss.shape\n",
-    "value_150ss = value_150ss_all[col_idx:col_idx+ncols, row_idx:row_idx+nrows]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "value_150ss.shape, volume_150ss.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with rasterio.open(\"../results/input/giri/THA/vol_150ss.tif\", 'w',\n",
-    "    driver='GTiff',\n",
-    "    height=volume_150ss.shape[0],\n",
-    "    width=volume_150ss.shape[1],\n",
-    "    count=1,\n",
-    "    dtype='float64',\n",
-    "    crs='+proj=latlong',\n",
-    "    transform=t_150ss\n",
-    ") as ds:\n",
-    "    ds.write(volume_150ss, indexes=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with rasterio.open(\"../results/input/giri/THA/vol_3ss.tif\", 'w',\n",
-    "    driver='GTiff',\n",
-    "    height=volume_3ss.shape[0],\n",
-    "    width=volume_3ss.shape[1],\n",
-    "    count=1,\n",
-    "    dtype=volume_3ss.dtype,\n",
-    "    crs='+proj=latlong',\n",
-    "    transform=t_3ss\n",
-    ") as ds:\n",
-    "    ds.write(volume_3ss, indexes=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "if value_150ss.shape != volume_150ss.shape:\n",
-    "    print(\"CHKS\", value_150ss.shape, volume_150ss.shape)\n",
-    "    assert False\n",
-    "\n",
-    "# Calculate value per unit volume\n",
-    "# value_per_volume_150ss = value_150ss / volume_150ss\n",
-    "value_per_volume_150ss = np.divide(value_150ss, volume_150ss, out=np.zeros_like(value_150ss), where=volume_150ss!=0)\n",
-    "# Resample to fine-scale value per volume, \"nearest\"\n",
-    "value_per_volume_3ss = repeat_2d(value_per_volume_150ss, 50)\n",
-    "# Calculate fine-scale value\n",
-    "value_3ss = value_per_volume_3ss * volume_3ss"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with rasterio.open(\"../results/input/giri/THA/val_vol_150ss.tif\", 'w',\n",
-    "    driver='GTiff',\n",
-    "    height=value_per_volume_150ss.shape[0],\n",
-    "    width=value_per_volume_150ss.shape[1],\n",
-    "    count=1,\n",
-    "    dtype=value_per_volume_150ss.dtype,\n",
-    "    crs='+proj=latlong',\n",
-    "    transform=t_150ss\n",
-    ") as ds:\n",
-    "    # Write to window\n",
-    "    ds.write(value_per_volume_150ss, indexes=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with rasterio.open(\"../results/input/giri/THA/val_vol_3ss.tif\", 'w',\n",
-    "    driver='GTiff',\n",
-    "    height=value_per_volume_3ss.shape[0],\n",
-    "    width=value_per_volume_3ss.shape[1],\n",
-    "    count=1,\n",
-    "    dtype=value_per_volume_3ss.dtype,\n",
-    "    crs='+proj=latlong',\n",
-    "    transform=t_3ss\n",
-    ") as ds:\n",
-    "    # Write to window\n",
-    "    ds.write(value_per_volume_3ss, indexes=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with rasterio.open(\"../results/input/giri/THA/val_3ss.tif\", 'w',\n",
-    "    driver='GTiff',\n",
-    "    height=value_3ss.shape[0],\n",
-    "    width=value_3ss.shape[1],\n",
-    "    count=1,\n",
-    "    dtype=value_3ss.dtype,\n",
-    "    crs='+proj=latlong',\n",
-    "    transform=t_3ss\n",
-    ") as ds:\n",
-    "    # Write to window\n",
-    "    ds.write(value_3ss, indexes=1)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Flood intersection"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with rasterio.open(flood_1ss_tif, 'r') as flood_1ss_ds:\n",
-    "    flood_1ss = read_ds(flood_1ss_ds, replace_nodata=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "flood_1ss_ds.transform"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# lon, lat of footprint top left\n",
-    "flood_1ss_ul_xy = flood_1ss_ds.transform * (0,0)\n",
-    "# col, row in value_3ss\n",
-    "t_3ss_ul_cr = to_int(~t_3ss * (flood_1ss_ul_xy))\n",
-    "# lon, lat of that pixel - this is our new top left\n",
-    "footprint_ul_xy_3ss = t_3ss * t_3ss_ul_cr\n",
-    "# col, row in flood_1ss\n",
-    "flood_1ss_ul_cr = to_int(~flood_1ss_ds.transform * footprint_ul_xy_3ss)\n",
-    "# lon, lat of that flood_1ss pixel - new top left for 1ss purposes (tiny bit offset)\n",
-    "ul_xy_1ss = flood_1ss_ds.transform * flood_1ss_ul_cr\n",
-    "flood_1ss_ul_xy, footprint_ul_xy_3ss, ul_xy_1ss\n",
-    "\n",
-    "# TODO should new top left be greater, not less, in both x and y values?"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# clip to match coarser array extent\n",
-    "flood_1ss_clipped = clip_array(flood_1ss, 3)\n",
-    "flood_1ss_height, flood_1ss_width = flood_1ss_clipped.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# lon, lat of footprint lower right\n",
-    "flood_1ss_lr_xy = flood_1ss_ds.transform * (flood_1ss_width, flood_1ss_height)\n",
-    "# col, row in value_3ss\n",
-    "t_3ss_lr_cr = to_int(~t_3ss * (flood_1ss_lr_xy))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ulc, ulr = t_3ss_ul_cr\n",
-    "lrc, lrr = t_3ss_lr_cr\n",
-    "footprint_value_3ss = value_3ss[ulr:lrr, ulc:lrc]\n",
-    "\n",
-    "footprint_value_1ss = repeat_2d(footprint_value_3ss, 3) / 9"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "building_flood_depth_damage_curve = PiecewiseLinearDamageCurve.from_csv(\n",
-    "    \"../bundled_data/damage_curves/flood/residential_asia.csv\",\n",
-    "    intensity_col=\"inundation_depth_(m)\",\n",
-    "    damage_col=\"damage_fraction\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "if footprint_value_1ss.shape != flood_1ss_clipped.shape:\n",
-    "    print(\"CHKS\", footprint_value_1ss.shape, flood_1ss_clipped.shape)\n",
-    "    assert False\n",
-    "\n",
-    "damage_fraction_1ss = building_flood_depth_damage_curve.damage_fraction(flood_1ss_clipped)\n",
-    "\n",
-    "damage_value_1ss = footprint_value_1ss * damage_fraction_1ss"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Adapt transform to new top-left and resolution\n",
-    "a,b,c,d,e,f = flood_1ss_ds.transform[:6]\n",
-    "t_1ss = Affine(\n",
-    "    a,\n",
-    "    b,\n",
-    "    ul_xy_1ss[0],\n",
-    "    d,\n",
-    "    e,\n",
-    "    ul_xy_1ss[1]\n",
-    ")\n",
-    "t_1ss"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with rasterio.open(\"../results/input/giri/THA/dmg_frac_1ss.tif\", 'w',\n",
-    "    driver='GTiff',\n",
-    "    height=damage_fraction_1ss.shape[0],\n",
-    "    width=damage_fraction_1ss.shape[1],\n",
-    "    count=1,\n",
-    "    dtype=damage_fraction_1ss.dtype,\n",
-    "    crs='+proj=latlong',\n",
-    "    transform=t_1ss\n",
-    ") as ds:\n",
-    "    ds.write(damage_fraction_1ss, indexes=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with rasterio.open(\"../results/input/giri/THA/dmg_val_1ss.tif\", 'w',\n",
-    "    driver='GTiff',\n",
-    "    height=damage_value_1ss.shape[0],\n",
-    "    width=damage_value_1ss.shape[1],\n",
-    "    count=1,\n",
-    "    dtype=damage_value_1ss.dtype,\n",
-    "    crs='+proj=latlong',\n",
-    "    transform=t_1ss\n",
-    ") as ds:\n",
-    "    ds.write(damage_value_1ss, indexes=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "damage_value_1ss.sum() / 1e9"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "value_150ss.sum() / 1e9, value_3ss.sum() / 1e9"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "footprint_value_3ss.sum() / 1e9, footprint_value_1ss.sum() / 1e9"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with rasterio.open(\"../results/input/giri/THA/nres_dmg_val_1ss.tif\") as nres_dmg_val_1ss_ds:\n",
-    "    nres_dmg_val_1ss = read_ds(nres_dmg_val_1ss_ds)\n",
-    "nres_dmg_val_1ss.sum() / 1e9"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\"\"\"\n",
-    "ADM1 damage values:\n",
-    "\n",
-    "    exactextract \\\n",
-    "        -p ../../admin-boundaries/tha_adm1.shp \\\n",
-    "        -r dmg_val_1ss.tif \\\n",
-    "        -f GID_1 \\\n",
-    "        -s sum \\\n",
-    "        -o dmg_val_1ss.csv\n",
-    "\n",
-    "ADM1 total built volume:\n",
-    "\n",
-    "    exactextract \\\n",
-    "        -p ../../admin-boundaries/tha_adm1.shp \\\n",
-    "        -r ../../ghsl/THA/GHS_BUILT_V_E2020_GLOBE_R2023A_4326_3ss_V1_0__THA.tif \\\n",
-    "        -f GID_1 \\\n",
-    "        -s sum \\\n",
-    "        -o ghs_built_v_3ss.csv\n",
-    "\"\"\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "adm1_vol = pandas.read_csv(\"input/giri/THA/ghs_built_v_3ss.csv\") \\\n",
-    "    .rename(columns={\"sum\": \"built_volume\"})"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "adm1 = geopandas.read_file(\"input/admin-boundaries/tha_adm1.shp\") \\\n",
-    "    .merge(adm1_vol, on=\"GID_1\")[[\"GID_1\", \"NAME_1\", \"built_volume\", \"geometry\"]]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "adm1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with rasterio.open(\"input/ghsl/THA/GHS_BUILT_V_E2020_GLOBE_R2023A_4326_3ss_V1_0__THA.tif\") as vol_3ss_ds:\n",
-    "    vol_3ss = vol_3ss_ds.read(1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def rasterize(gdf, column, template_ds):\n",
-    "    return features.rasterize(\n",
-    "        ((f['geometry'], f['properties'][column]) for f in gdf.__geo_interface__['features']),\n",
-    "        out_shape=template_ds.shape,\n",
-    "        transform=template_ds.transform\n",
-    "    )\n",
-    "\n",
-    "vol_adm1_3ss = rasterize(adm1, 'built_volume', vol_3ss_ds)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.imshow(vol_adm1_3ss)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "adm1_gva = pandas.read_csv(\"/data/incoming/wenz-2023-dose-reported-subnational-output/DOSE_V2_THA.csv\")\n",
-    "adm1_gva[\"ag_grp\"] = adm1_gva[\"pop\"] * adm1_gva.ag_grp_pc_usd\n",
-    "adm1_gva[\"man_grp\"] = adm1_gva[\"pop\"] * adm1_gva.man_grp_pc_usd\n",
-    "adm1_gva[\"serv_grp\"] = adm1_gva[\"pop\"] * adm1_gva.serv_grp_pc_usd\n",
-    "\n",
-    "adm1_gva = geopandas.read_file(\"input/admin-boundaries/tha_adm1.shp\") \\\n",
-    "    .merge(adm1_gva, on=\"GID_1\")[[\"GID_1\", \"NAME_1\", \"ag_grp\", \"man_grp\", \"serv_grp\", \"geometry\"]]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "adm1_gva.drop(columns=\"geometry\").to_csv(\"input/giri/THA/DOSE_V2_THA_rgva.csv\")\n",
-    "adm1_gva.to_file(\"input/giri/THA/DOSE_V2_THA_rgva.gpkg\", driver=\"GPKG\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "adm1_gva_ag_3ss = rasterize(adm1_gva, \"ag_grp\", vol_3ss_ds)\n",
-    "adm1_gva_man_3ss = rasterize(adm1_gva, \"man_grp\", vol_3ss_ds)\n",
-    "adm1_gva_serv_3ss = rasterize(adm1_gva, \"serv_grp\", vol_3ss_ds)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def zero_divide(a, b):\n",
-    "    return np.divide(a, b, out=np.zeros_like(a, dtype='float64'), where=(b!=0))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "gva_ag_3ss = zero_divide(vol_3ss, vol_adm1_3ss) * adm1_gva_ag_3ss\n",
-    "gva_man_3ss = zero_divide(vol_3ss, vol_adm1_3ss) * adm1_gva_man_3ss\n",
-    "gva_serv_3ss = zero_divide(vol_3ss, vol_adm1_3ss) * adm1_gva_serv_3ss"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def write_ds(fname, data, transform):\n",
-    "    with rasterio.open(fname, 'w',\n",
-    "        driver='GTiff',\n",
-    "        height=data.shape[0],\n",
-    "        width=data.shape[1],\n",
-    "        count=1,\n",
-    "        dtype=data.dtype,\n",
-    "        crs='+proj=latlong',\n",
-    "        transform=transform\n",
-    "    ) as ds:\n",
-    "        ds.write(data, indexes=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "write_ds(\"input/giri/THA/gva_ag_3ss.tif\", gva_ag_3ss, vol_3ss_ds.transform)\n",
-    "write_ds(\"input/giri/THA/gva_man_3ss.tif\", gva_man_3ss, vol_3ss_ds.transform)\n",
-    "write_ds(\"input/giri/THA/gva_serv_3ss.tif\", gva_serv_3ss, vol_3ss_ds.transform)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "gva_ag_1ss = repeat_2d(gva_ag_3ss, 3) / 9\n",
-    "gva_man_1ss = repeat_2d(gva_man_3ss, 3) / 9\n",
-    "gva_serv_1ss = repeat_2d(gva_serv_3ss, 3) / 9"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# TODO figure out transform, check we're on the right grid, write out to files\n",
-    "# TODO compare with damage fraction, write out interruption\n",
-    "# TODO calculate per day, sum back to zonal stats\n",
-    "# TODO check totals (re-aggregate after disaggregation) maybe rescale???"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "a,b,c,d,e,f = vol_3ss_ds.transform[:6]\n",
-    "gva_t_1ss = Affine(\n",
-    "    a / 3,\n",
-    "    b,\n",
-    "    c,\n",
-    "    d,\n",
-    "    e / 3,\n",
-    "    f\n",
-    ")\n",
-    "gva_t_1ss"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "write_ds(\"input/giri/THA/gva_ag_1ss.tif\", gva_ag_1ss, gva_t_1ss)\n",
-    "write_ds(\"input/giri/THA/gva_man_1ss.tif\", gva_man_1ss, gva_t_1ss)\n",
-    "write_ds(\"input/giri/THA/gva_serv_1ss.tif\", gva_serv_1ss, gva_t_1ss)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\"\"\"\n",
-    "gdalwarp -te 99.2393056 13.2781945 101.5259723 17.6765279 gva_man_1ss.tif gva_man_1ss_clipped.tif\n",
-    "gdal_calc.py -A nres_dmg_frac_1ss.tif -B gva_man_1ss_clipped.tif --outfile=disruption_man_1ss.tif --calc=\"(A>0.1)*B\"\n",
-    "\n",
-    "\n",
-    "gdalwarp -te 99.2393056 13.2781945 101.5259723 17.6765279 gva_ag_1ss.tif gva_ag_1ss_clipped.tif\n",
-    "gdal_calc.py -A nres_dmg_frac_1ss.tif -B gva_ag_1ss_clipped.tif --outfile=disruption_ag_1ss.tif --calc=\"(A>0.1)*B\"\n",
-    "\n",
-    "\n",
-    "gdalwarp -te 99.2393056 13.2781945 101.5259723 17.6765279 gva_serv_1ss.tif gva_serv_1ss_clipped.tif\n",
-    "gdal_calc.py -A nres_dmg_frac_1ss.tif -B gva_serv_1ss_clipped.tif --outfile=disruption_serv_1ss.tif --calc=\"(A>0.1)*B\"\n",
-    "\n",
-    "\n",
-    "for sector in serv ag man\n",
-    "    exactextract \\\n",
-    "        -p ../../admin-boundaries/tha_adm1.shp \\\n",
-    "        -r disruption_{$sector}_1ss.tif \\\n",
-    "        -f GID_1 \\\n",
-    "        -s sum \\\n",
-    "        -o disruption_{$sector}_1ss.csv\n",
-    "end\n",
-    "\"\"\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\"\"\"\n",
-    "gdalwarp -te 99.2393056 13.2781945 101.5259723 17.6765279  ../../footprints/JBA/Raster/TH_FLRF_ChaoPhraya2011_RD_01.tif ../../footprints/JBA/Raster/TH_FLRF_ChaoPhraya2011_RD_01_clipped.tif\n",
-    "for sector in serv ag man\n",
-    "    gdal_calc.py \\\n",
-    "        -A ../../footprints/JBA/Raster/TH_FLRF_ChaoPhraya2011_RD_01_clipped.tif \\\n",
-    "        -B gva_{$sector}_1ss_clipped.tif \\\n",
-    "        --outfile=disruption_0.3m_{$sector}_1ss.tif \\\n",
-    "        --calc=\"(A>0.3)*B\"\n",
-    "    exactextract \\\n",
-    "        -p ../../admin-boundaries/tha_adm1.shp \\\n",
-    "        -r disruption_0.3m_{$sector}_1ss.tif \\\n",
-    "        -f GID_1 \\\n",
-    "        -s sum \\\n",
-    "        -o disruption_0.3m_{$sector}_1ss.csv\n",
-    "end\n",
-    "\n",
-    "for sector in serv ag man\n",
-    "    gdal_calc.py \\\n",
-    "        -A nres_dmg_frac_1ss.tif \\\n",
-    "        -B gva_{$sector}_1ss_clipped.tif \\\n",
-    "        --outfile=disruption_dmg_{$sector}_1ss.tif \\\n",
-    "        --calc=\"A*B\"\n",
-    "    exactextract \\\n",
-    "        -p ../../admin-boundaries/tha_adm1.shp \\\n",
-    "        -r disruption_dmg_{$sector}_1ss.tif \\\n",
-    "        -f GID_1 \\\n",
-    "        -s sum \\\n",
-    "        -o disruption_dmg_{$sector}_1ss.csv\n",
-    "end\n",
-    "\"\"\""
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.13"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/src/open_gira/exposure.py b/src/open_gira/exposure.py
index 5d6a1a2d..491a838a 100644
--- a/src/open_gira/exposure.py
+++ b/src/open_gira/exposure.py
@@ -28,7 +28,8 @@ def max_vector_rasters_intersection(
 def max_vector_raster_intersection(
     vector: geopandas.GeoSeries, raster: str
 ) -> numpy.array:
-    """
+    """Intersect vector geometries with raster, return array of max raster values
+
     Parameters
     ----------
     vector: GeoSeries
diff --git a/src/open_gira/geometry.py b/src/open_gira/geometry.py
index 95144fc8..a2829c47 100644
--- a/src/open_gira/geometry.py
+++ b/src/open_gira/geometry.py
@@ -1,4 +1,6 @@
 import geopandas
+import numpy as np
+import rasterio.features
 
 
 def buffer_via_reprojection(
@@ -18,3 +20,56 @@ def buffer_via_reprojection(
     return (
         geoms.geometry.to_crs(projected_crs).buffer(buffer_radius_m).to_crs(geoms.crs)
     )
+
+
+#
+# Raster manipulation helpers (working with 2d numpy arrays)
+#
+def clip_array(arr, block_size):
+    """Clip a 2d array to an integer multiple of block_size in each
+    dimension"""
+    clip_rows = arr.shape[0] - (arr.shape[0] % block_size)
+    clip_cols = arr.shape[1] - (arr.shape[1] % block_size)
+
+    clipped = arr[0:clip_rows, 0:clip_cols]
+    return clipped
+
+
+def resample_sum(arr, block_size):
+    """Resample a 2d array, summing each block of (block_size x
+    block_size) to give each cell in the output array"""
+    nblocks_0 = arr.shape[0] // block_size
+    nblocks_1 = arr.shape[1] // block_size
+
+    blocks = arr.reshape(nblocks_0, block_size, nblocks_1, block_size)
+
+    return np.sum(blocks, axis=(1, 3))
+
+
+def repeat_2d(arr, block_size):
+    """Repeat each element in a 2d array, so each value fills a (block_size x
+    block_size) area"""
+    return np.repeat(np.repeat(arr, block_size, axis=0), block_size, axis=1)
+
+
+def floor_int(a):
+    """Floor and convert to integer"""
+    return np.floor(a).astype(int)
+
+
+def zero_divide(a, b):
+    """Divide (a / b) but return zero where (b == 0)"""
+    return np.divide(a, b, out=np.zeros_like(a, dtype="float64"), where=(b != 0))
+
+
+def rasterize(gdf: geopandas.GeoDataFrame, column: str, template_ds):
+    """Burn values from a GeoDataFrame column into a raster of shape and transform
+    specified by template_ds"""
+    return rasterio.features.rasterize(
+        (
+            (f["geometry"], f["properties"][column])
+            for f in gdf.__geo_interface__["features"]
+        ),
+        out_shape=template_ds.shape,
+        transform=template_ds.transform,
+    )
diff --git a/src/open_gira/io.py b/src/open_gira/io.py
index c9757e6a..6efa941d 100644
--- a/src/open_gira/io.py
+++ b/src/open_gira/io.py
@@ -3,9 +3,9 @@
 """
 
 import functools
-from glob import glob
 import logging
 import json
+from glob import glob
 from os.path import splitext, basename, join
 from typing import Optional, Tuple
 
@@ -13,8 +13,9 @@
 import numpy as np
 import pandas as pd
 import pyproj
-from tqdm import tqdm
+import rasterio
 import xarray as xr
+from tqdm import tqdm
 
 from open_gira.utils import natural_sort
 
@@ -24,6 +25,8 @@
 # lines beginning with this character will be ignored by pandas
 COMMENT_PREFIX: str = "#"
 
+NO_GEOM_ERROR_MSG = "geometry"
+
 
 def bit_pack_dataset_encoding(ds: xr.Dataset, n_bits: int = 16) -> dict:
     """
@@ -299,3 +302,32 @@ def read_rehab_costs(path: str) -> pd.DataFrame:
     assert (costs.iloc[:, 1] >= 0).all()
 
     return costs
+
+
+def write_raster_ds(
+    fname,
+    data,
+    transform,
+    driver="GTiff",
+    crs="+proj=latlong",
+):
+    with rasterio.open(
+        fname,
+        "w",
+        driver=driver,
+        height=data.shape[0],
+        width=data.shape[1],
+        count=1,
+        dtype=data.dtype,
+        crs=crs,
+        transform=transform,
+    ) as ds:
+        ds.write(data, indexes=1)
+
+
+def read_raster_ds(fname, band=1, replace_nodata=False, nodata_fill=0):
+    with rasterio.open(fname) as ds:
+        data = ds.read(band)
+    if replace_nodata:
+        data = np.where(data == ds.nodata, nodata_fill, data)
+    return data, ds