From 57edd747113d42d7d7dfa39051f563b668975c6d Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Wed, 22 May 2024 15:36:40 +0200
Subject: [PATCH 01/26] added testing of loading data, creating graphs, and
 training model. Also allowed calling train_model.main with arguments (will
 still use sys.argv when no arguments are supplied in the function call

---
 tests/__init__.py           |   0
 tests/test_mllam_dataset.py | 134 ++++++++++++++++++++++++++++++++++++
 train_model.py              |   4 +-
 3 files changed, 136 insertions(+), 2 deletions(-)
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_mllam_dataset.py

diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py
new file mode 100644
index 00000000..0e55b7da
--- /dev/null
+++ b/tests/test_mllam_dataset.py
@@ -0,0 +1,134 @@
+# Standard library
+from pathlib import Path
+
+# First-party
+from neural_lam.weather_dataset import WeatherDataset
+from neural_lam.models.graph_lam import GraphLAM
+from neural_lam.utils import load_graph, load_static_data
+from neural_lam.config import Config
+from train_model import main
+
+# Third-party
+import numpy as np
+import weather_model_graphs as wmg
+
+
+def load_reduced_meps_dataset():
+    data_config_file = 'data/meps_example_reduced/data_config.yaml'
+    dataset_name = 'meps_example_reduced'
+
+    dataset = WeatherDataset(dataset_name="meps_example_reduced")
+    config = Config.from_file(data_config_file)
+
+    var_names = config.values['dataset']['var_names']
+    var_units = config.values['dataset']['var_units']
+    var_longnames = config.values['dataset']['var_longnames']
+
+    assert len(var_names) == len(var_longnames)
+    assert len(var_names) == len(var_units)
+    
+    # TODO: can these two variables be loaded from elsewhere?
+    n_grid_static_features = 4
+    n_input_steps = 2
+
+    n_forcing_features = config.values['dataset']['num_forcing_features']
+    n_state_features = len(var_names)
+    n_prediction_timesteps = dataset.sample_length - n_input_steps
+    
+    nx, ny = config.values['grid_shape_state']
+    n_grid = nx * ny
+
+    # check that the dataset is not empty
+    assert len(dataset) > 0
+
+    # get the first item
+    init_states, target_states, forcing = dataset[0]
+    
+    # check that the shapes of the tensors are correct
+    assert init_states.shape == (
+        n_input_steps, 
+        n_grid, 
+        n_state_features
+    )
+    assert target_states.shape == (
+        n_prediction_timesteps,
+        n_grid,
+        n_state_features,
+    )
+    assert forcing.shape == (
+        n_prediction_timesteps,
+        n_grid,
+        n_forcing_features,
+    )
+
+    static_data = load_static_data(dataset_name=dataset_name)
+    
+    required_props = {'border_mask', 'grid_static_features', 'step_diff_mean', 'step_diff_std', 'data_mean', 'data_std', 'param_weights'}
+    
+    # check the sizes of the props
+    assert static_data["border_mask"].shape == (n_grid, 1)
+    assert static_data["grid_static_features"].shape == (n_grid, n_grid_static_features)
+    assert static_data["step_diff_mean"].shape == (n_state_features,)
+    assert static_data["step_diff_std"].shape == (n_state_features,)
+    assert static_data["data_mean"].shape == (n_state_features,)
+    assert static_data["data_std"].shape == (n_state_features,)
+    assert static_data["param_weights"].shape == (n_state_features,)
+
+    assert set(static_data.keys()) == required_props
+    
+
+def test_create_graph_reduced_meps_dataset():
+    dataset_name = "meps_example_reduced"
+    static_dir_path = Path("data", dataset_name, "static")
+    graph_dir_path = Path("graphs", "hierarchial")
+
+    # -- Static grid node features --
+    xy_grid = np.load(static_dir_path / "nwp_xy.npy")
+
+    # create the full graph
+    graph = wmg.create.archetype.create_oscarsson_hierarchical_graph(xy_grid=xy_grid)
+
+    # split the graph by component
+    graph_components = wmg.split_graph_by_edge_attribute(
+        graph=graph, attr="component"
+        # argument attribute seens to have been changed to attr, change also in weather-model-graphs/src/weather_model_graphs/save.py::to_pyg
+    )
+
+    m2m_graph = graph_components.pop("m2m")
+    m2m_graph_components = wmg.split_graph_by_edge_attribute(
+        graph=m2m_graph, attr="direction"
+    )
+    m2m_graph_components = {
+        f"m2m_{name}": graph for name, graph in m2m_graph_components.items()
+    }
+    graph_components.update(m2m_graph_components)
+
+    # save the graph components to disk in pytorch-geometric format
+    for component_name, graph_component in graph_components.items():
+        kwargs = {}
+        wmg.save.to_pyg(
+            graph=graph_component,
+            name=component_name,
+            output_directory=graph_dir_path,
+            **kwargs,
+        )
+
+
+def test_train_model_reduced_meps_dataset():
+    args = [
+        '--model=hi_lam',
+        '--data_config=data/meps_example_reduced/data_config.yaml',
+        '--n_workers=1',
+        '--epochs=1',
+        '--graph=hierarchical',
+        '--hidden_dim=16',
+        '--hidden_layers=1',
+        '--processor_layers=1',
+        '--ar_steps=1',
+        '--eval=val',
+        '--wandb_project=None',
+    ]
+    main(args)
+
+
+    
\ No newline at end of file
diff --git a/train_model.py b/train_model.py
index 390da6d4..0482c075 100644
--- a/train_model.py
+++ b/train_model.py
@@ -22,7 +22,7 @@
 }
 
 
-def main():
+def main(input_args=None):
     """
     Main function for training and evaluating models
     """
@@ -206,7 +206,7 @@ def main():
         default={},
         help="Dict with variables and lead times to log watched metrics for",
     )
-    args = parser.parse_args()
+    args = parser.parse_args(input_args)
 
     config_loader = config.Config.from_file(args.data_config)
 

From 4e17efbf5a0717135413d795d0ae7a976ad0ef32 Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Wed, 22 May 2024 15:48:48 +0200
Subject: [PATCH 02/26] added test to test name

---
 tests/test_mllam_dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py
index 0e55b7da..8cb5b7ec 100644
--- a/tests/test_mllam_dataset.py
+++ b/tests/test_mllam_dataset.py
@@ -13,7 +13,7 @@
 import weather_model_graphs as wmg
 
 
-def load_reduced_meps_dataset():
+def test_load_reduced_meps_dataset():
     data_config_file = 'data/meps_example_reduced/data_config.yaml'
     dataset_name = 'meps_example_reduced'
 

From 7fa7cdd430af60bc56c86076ec5cbc1aa16bb384 Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Wed, 22 May 2024 16:01:37 +0200
Subject: [PATCH 03/26] linting

---
 tests/test_mllam_dataset.py | 90 +++++++++++++++++++------------------
 1 file changed, 47 insertions(+), 43 deletions(-)

diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py
index 8cb5b7ec..0dd454bd 100644
--- a/tests/test_mllam_dataset.py
+++ b/tests/test_mllam_dataset.py
@@ -1,41 +1,40 @@
 # Standard library
 from pathlib import Path
 
-# First-party
-from neural_lam.weather_dataset import WeatherDataset
-from neural_lam.models.graph_lam import GraphLAM
-from neural_lam.utils import load_graph, load_static_data
-from neural_lam.config import Config
-from train_model import main
-
 # Third-party
 import numpy as np
 import weather_model_graphs as wmg
 
+# First-party
+from neural_lam.config import Config
+from neural_lam.utils import load_static_data
+from neural_lam.weather_dataset import WeatherDataset
+from train_model import main
+
 
 def test_load_reduced_meps_dataset():
-    data_config_file = 'data/meps_example_reduced/data_config.yaml'
-    dataset_name = 'meps_example_reduced'
+    data_config_file = "data/meps_example_reduced/data_config.yaml"
+    dataset_name = "meps_example_reduced"
 
     dataset = WeatherDataset(dataset_name="meps_example_reduced")
     config = Config.from_file(data_config_file)
 
-    var_names = config.values['dataset']['var_names']
-    var_units = config.values['dataset']['var_units']
-    var_longnames = config.values['dataset']['var_longnames']
+    var_names = config.values["dataset"]["var_names"]
+    var_units = config.values["dataset"]["var_units"]
+    var_longnames = config.values["dataset"]["var_longnames"]
 
     assert len(var_names) == len(var_longnames)
     assert len(var_names) == len(var_units)
-    
+
     # TODO: can these two variables be loaded from elsewhere?
     n_grid_static_features = 4
     n_input_steps = 2
 
-    n_forcing_features = config.values['dataset']['num_forcing_features']
+    n_forcing_features = config.values["dataset"]["num_forcing_features"]
     n_state_features = len(var_names)
     n_prediction_timesteps = dataset.sample_length - n_input_steps
-    
-    nx, ny = config.values['grid_shape_state']
+
+    nx, ny = config.values["grid_shape_state"]
     n_grid = nx * ny
 
     # check that the dataset is not empty
@@ -43,13 +42,9 @@ def test_load_reduced_meps_dataset():
 
     # get the first item
     init_states, target_states, forcing = dataset[0]
-    
+
     # check that the shapes of the tensors are correct
-    assert init_states.shape == (
-        n_input_steps, 
-        n_grid, 
-        n_state_features
-    )
+    assert init_states.shape == (n_input_steps, n_grid, n_state_features)
     assert target_states.shape == (
         n_prediction_timesteps,
         n_grid,
@@ -62,12 +57,23 @@ def test_load_reduced_meps_dataset():
     )
 
     static_data = load_static_data(dataset_name=dataset_name)
-    
-    required_props = {'border_mask', 'grid_static_features', 'step_diff_mean', 'step_diff_std', 'data_mean', 'data_std', 'param_weights'}
-    
+
+    required_props = {
+        "border_mask",
+        "grid_static_features",
+        "step_diff_mean",
+        "step_diff_std",
+        "data_mean",
+        "data_std",
+        "param_weights",
+    }
+
     # check the sizes of the props
     assert static_data["border_mask"].shape == (n_grid, 1)
-    assert static_data["grid_static_features"].shape == (n_grid, n_grid_static_features)
+    assert static_data["grid_static_features"].shape == (
+        n_grid,
+        n_grid_static_features,
+    )
     assert static_data["step_diff_mean"].shape == (n_state_features,)
     assert static_data["step_diff_std"].shape == (n_state_features,)
     assert static_data["data_mean"].shape == (n_state_features,)
@@ -75,7 +81,7 @@ def test_load_reduced_meps_dataset():
     assert static_data["param_weights"].shape == (n_state_features,)
 
     assert set(static_data.keys()) == required_props
-    
+
 
 def test_create_graph_reduced_meps_dataset():
     dataset_name = "meps_example_reduced"
@@ -86,12 +92,13 @@ def test_create_graph_reduced_meps_dataset():
     xy_grid = np.load(static_dir_path / "nwp_xy.npy")
 
     # create the full graph
-    graph = wmg.create.archetype.create_oscarsson_hierarchical_graph(xy_grid=xy_grid)
+    graph = wmg.create.archetype.create_oscarsson_hierarchical_graph(
+        xy_grid=xy_grid
+    )
 
     # split the graph by component
     graph_components = wmg.split_graph_by_edge_attribute(
         graph=graph, attr="component"
-        # argument attribute seens to have been changed to attr, change also in weather-model-graphs/src/weather_model_graphs/save.py::to_pyg
     )
 
     m2m_graph = graph_components.pop("m2m")
@@ -116,19 +123,16 @@ def test_create_graph_reduced_meps_dataset():
 
 def test_train_model_reduced_meps_dataset():
     args = [
-        '--model=hi_lam',
-        '--data_config=data/meps_example_reduced/data_config.yaml',
-        '--n_workers=1',
-        '--epochs=1',
-        '--graph=hierarchical',
-        '--hidden_dim=16',
-        '--hidden_layers=1',
-        '--processor_layers=1',
-        '--ar_steps=1',
-        '--eval=val',
-        '--wandb_project=None',
+        "--model=hi_lam",
+        "--data_config=data/meps_example_reduced/data_config.yaml",
+        "--n_workers=1",
+        "--epochs=1",
+        "--graph=hierarchical",
+        "--hidden_dim=16",
+        "--hidden_layers=1",
+        "--processor_layers=1",
+        "--ar_steps=1",
+        "--eval=val",
+        "--wandb_project=None",
     ]
     main(args)
-
-
-    
\ No newline at end of file

From 569d061029fd6d670d974c808190977def8c17bd Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Thu, 23 May 2024 10:39:35 +0200
Subject: [PATCH 04/26] made create_mesh callable as python function with
 arguments. Fixed error in plotting where non-callable cartopy projection from
 Config was called used current mesh generation from neural-lam instead of
 weather-model-graphs finished test of training call

---
 create_mesh.py              |   4 +-
 neural_lam/vis.py           |   4 +-
 tests/test_mllam_dataset.py | 103 +++++++++++++++++++++---------------
 3 files changed, 64 insertions(+), 47 deletions(-)

diff --git a/create_mesh.py b/create_mesh.py
index f04b4d4b..41557a97 100644
--- a/create_mesh.py
+++ b/create_mesh.py
@@ -153,7 +153,7 @@ def prepend_node_index(graph, new_index):
     return networkx.relabel_nodes(graph, to_mapping, copy=True)
 
 
-def main():
+def main(input_args=None):
     parser = ArgumentParser(description="Graph generation arguments")
     parser.add_argument(
         "--data_config",
@@ -186,7 +186,7 @@ def main():
         default=0,
         help="Generate hierarchical mesh graph (default: 0, no)",
     )
-    args = parser.parse_args()
+    args = parser.parse_args(input_args)
 
     # Load grid positions
     config_loader = config.Config.from_file(args.data_config)
diff --git a/neural_lam/vis.py b/neural_lam/vis.py
index 2b6abf15..8c9ca77c 100644
--- a/neural_lam/vis.py
+++ b/neural_lam/vis.py
@@ -87,7 +87,7 @@ def plot_prediction(
         1,
         2,
         figsize=(13, 7),
-        subplot_kw={"projection": data_config.coords_projection()},
+        subplot_kw={"projection": data_config.coords_projection},
     )
 
     # Plot pred and target
@@ -136,7 +136,7 @@ def plot_spatial_error(error, obs_mask, data_config, title=None, vrange=None):
 
     fig, ax = plt.subplots(
         figsize=(5, 4.8),
-        subplot_kw={"projection": data_config.coords_projection()},
+        subplot_kw={"projection": data_config.coords_projection},
     )
 
     ax.coastlines()  # Add coastline outlines
diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py
index 0dd454bd..bd638c78 100644
--- a/tests/test_mllam_dataset.py
+++ b/tests/test_mllam_dataset.py
@@ -1,15 +1,19 @@
 # Standard library
-from pathlib import Path
-
-# Third-party
-import numpy as np
-import weather_model_graphs as wmg
+import os
 
 # First-party
+from create_mesh import main as create_mesh
 from neural_lam.config import Config
 from neural_lam.utils import load_static_data
 from neural_lam.weather_dataset import WeatherDataset
-from train_model import main
+from train_model import main as train_model
+
+# from pathlib import Path
+# import numpy as np
+# import weather_model_graphs as wmg
+
+
+os.environ["WANDB_DISABLED"] = "true"
 
 
 def test_load_reduced_meps_dataset():
@@ -83,49 +87,59 @@ def test_load_reduced_meps_dataset():
     assert set(static_data.keys()) == required_props
 
 
-def test_create_graph_reduced_meps_dataset():
-    dataset_name = "meps_example_reduced"
-    static_dir_path = Path("data", dataset_name, "static")
-    graph_dir_path = Path("graphs", "hierarchial")
-
-    # -- Static grid node features --
-    xy_grid = np.load(static_dir_path / "nwp_xy.npy")
-
-    # create the full graph
-    graph = wmg.create.archetype.create_oscarsson_hierarchical_graph(
-        xy_grid=xy_grid
-    )
-
-    # split the graph by component
-    graph_components = wmg.split_graph_by_edge_attribute(
-        graph=graph, attr="component"
-    )
+# def test_create_wmg_graph_reduced_meps_dataset():
+#     dataset_name = "meps_example_reduced"
+#     static_dir_path = Path("data", dataset_name, "static")
+#     graph_dir_path = Path("graphs", "hierarchial")
+
+#     # -- Static grid node features --
+#     xy_grid = np.load(static_dir_path / "nwp_xy.npy")
+
+#     # create the full graph
+#     graph = wmg.create.archetype.create_oscarsson_hierarchical_graph(
+#         xy_grid=xy_grid
+#     )
+
+#     # split the graph by component
+#     graph_components = wmg.split_graph_by_edge_attribute(
+#         graph=graph, attr="component"
+#     )
+
+#     m2m_graph = graph_components.pop("m2m")
+#     m2m_graph_components = wmg.split_graph_by_edge_attribute(
+#         graph=m2m_graph, attr="direction"
+#     )
+#     m2m_graph_components = {
+#         f"m2m_{name}": graph for name, graph in m2m_graph_components.items()
+#     }
+#     graph_components.update(m2m_graph_components)
+
+#     # save the graph components to disk in pytorch-geometric format
+#     for component_name, graph_component in graph_components.items():
+#         kwargs = {}
+#         wmg.save.to_pyg(
+#             graph=graph_component,
+#             name=component_name,
+#             output_directory=graph_dir_path,
+#             **kwargs,
+#         )
 
-    m2m_graph = graph_components.pop("m2m")
-    m2m_graph_components = wmg.split_graph_by_edge_attribute(
-        graph=m2m_graph, attr="direction"
-    )
-    m2m_graph_components = {
-        f"m2m_{name}": graph for name, graph in m2m_graph_components.items()
-    }
-    graph_components.update(m2m_graph_components)
 
-    # save the graph components to disk in pytorch-geometric format
-    for component_name, graph_component in graph_components.items():
-        kwargs = {}
-        wmg.save.to_pyg(
-            graph=graph_component,
-            name=component_name,
-            output_directory=graph_dir_path,
-            **kwargs,
-        )
+def test_create_graph_reduced_meps_dataset():
+    args = [
+        "--graph=hierarchical",
+        "--hierarchical=1",
+        "--data_config=data/meps_example_reduced/data_config.yaml",
+        "--levels=2",
+    ]
+    create_mesh(args)
 
 
 def test_train_model_reduced_meps_dataset():
     args = [
         "--model=hi_lam",
         "--data_config=data/meps_example_reduced/data_config.yaml",
-        "--n_workers=1",
+        "--n_workers=4",
         "--epochs=1",
         "--graph=hierarchical",
         "--hidden_dim=16",
@@ -133,6 +147,9 @@ def test_train_model_reduced_meps_dataset():
         "--processor_layers=1",
         "--ar_steps=1",
         "--eval=val",
-        "--wandb_project=None",
+        "--n_example_pred=0",
     ]
-    main(args)
+    train_model(args)
+
+
+test_train_model_reduced_meps_dataset()

From 1ebe9006737ce40603038ddedec5fb9fa9389d0d Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Thu, 23 May 2024 11:15:05 +0200
Subject: [PATCH 05/26] added github ci/cd for running tests with pytest

---
 .github/workflows/run_tests.yml | 35 +++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 .github/workflows/run_tests.yml

diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
new file mode 100644
index 00000000..aac167a7
--- /dev/null
+++ b/.github/workflows/run_tests.yml
@@ -0,0 +1,35 @@
+name: Run Unit Test via Pytest
+
+on:
+  # trigger on pushes to any branch, but not main
+  push:
+    branches-ignore:
+      - main
+  # and also on PRs to main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+      - name: Test with pytest
+        run: |
+          coverage run -m pytest  -v -s
+      - name: Generate Coverage Report
+        run: |
+          coverage report -m

From 0e96e8809653f132188cb1742c0adb0f19c5b72b Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Thu, 23 May 2024 11:27:41 +0200
Subject: [PATCH 06/26] removed coverage from test ci/cd

---
 .github/workflows/run_tests.yml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index aac167a7..1629c6f1 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -29,7 +29,4 @@ jobs:
           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
       - name: Test with pytest
         run: |
-          coverage run -m pytest  -v -s
-      - name: Generate Coverage Report
-        run: |
-          coverage report -m
+          run -m pytest  -v -s

From 2339ed03e02ddd9f29632e2696e711f9a48352c3 Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Thu, 23 May 2024 11:28:31 +0200
Subject: [PATCH 07/26] fixed error in cicd

---
 .github/workflows/run_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index 1629c6f1..f1dad637 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -29,4 +29,4 @@ jobs:
           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
       - name: Test with pytest
         run: |
-          run -m pytest  -v -s
+          pytest  -v -s

From 5d3f83458029d524bafe091144a1c7978c6e1dd4 Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Thu, 23 May 2024 12:14:49 +0200
Subject: [PATCH 08/26] removed astroid from requirements, causes codespell
 error, assuming it will be installed anyway as a dependency during the other
 pip installs

---
 requirements.txt | 161 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 146 insertions(+), 15 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index f381d54f..19cb5aae 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,15 +1,146 @@
-# for all
-numpy>=1.24.2
-wandb>=0.13.10
-matplotlib>=3.7.0
-scipy>=1.10.0
-pytorch-lightning>=2.0.3
-shapely>=2.0.1
-networkx>=3.0
-Cartopy>=0.22.0
-pyproj>=3.4.1
-tueplots>=0.0.8
-plotly>=5.15.0
-
-# for dev
-pre-commit>=2.15.0
+aiohttp==3.9.5
+aiosignal==1.3.1
+appnope==0.1.4; platform_system == "Darwin"
+asciitree==0.3.3
+asttokens==2.4.1
+async-timeout==4.0.3; python_version < "3.11"
+attrs==23.2.0
+black==24.4.2
+cartopy==0.23.0
+certifi==2024.2.2
+cffi==1.16.0; implementation_name == "pypy"
+cfgv==3.4.0
+charset-normalizer==3.3.2
+click==8.1.7
+cloudpickle==3.0.0
+codespell==2.2.6
+colorama==0.4.6; sys_platform == "win32" or platform_system == "Windows"
+comm==0.2.2
+contourpy==1.2.1
+cycler==0.12.1
+dask==2024.5.0
+debugpy==1.8.1
+decorator==5.1.1
+dill==0.3.8
+distlib==0.3.8
+docker-pycreds==0.4.0
+exceptiongroup==1.2.1; python_version < "3.11"
+executing==2.0.1
+fasteners==0.19; sys_platform != "emscripten"
+filelock==3.14.0
+flake8==7.0.0
+fonttools==4.51.0
+frozenlist==1.4.1
+fsspec==2024.5.0
+gitdb==4.0.11
+gitpython==3.1.43
+identify==2.5.36
+idna==3.7
+importlib-metadata==7.1.0; python_version < "3.12"
+iniconfig==2.0.0
+intel-openmp==2021.4.0; platform_system == "Windows"
+ipdb==0.13.13
+ipykernel==6.29.4
+ipython==8.24.0
+isort==5.13.2
+jedi==0.19.1
+jinja2==3.1.4
+joblib==1.4.2
+jupyter-client==8.6.1
+jupyter-core==5.7.2
+kiwisolver==1.4.5
+lightning-utilities==0.11.2
+locket==1.0.0
+loguru==0.7.2
+markupsafe==2.1.5
+matplotlib==3.9.0
+matplotlib-inline==0.1.7
+mccabe==0.7.0
+mkl==2021.4.0; platform_system == "Windows"
+mpmath==1.3.0
+multidict==6.0.5
+mypy-extensions==1.0.0
+nest-asyncio==1.6.0
+networkx==3.3
+nodeenv==1.8.0
+numcodecs==0.12.1
+numpy==1.26.4
+nvidia-cublas-cu12==12.1.3.1; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cuda-cupti-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cuda-runtime-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cudnn-cu12==8.9.2.26; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cufft-cu12==11.0.2.54; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-curand-cu12==10.3.2.106; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cusolver-cu12==11.4.5.107; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cusparse-cu12==12.1.0.106; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-nccl-cu12==2.20.5; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-nvjitlink-cu12==12.4.127; platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-nvtx-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64"
+packaging==24.0
+pandas==2.2.2
+parso==0.8.4
+partd==1.4.2
+pathspec==0.12.1
+pexpect==4.9.0; sys_platform != "win32" and sys_platform != "emscripten"
+pillow==10.3.0
+platformdirs==4.2.2
+plotly==5.22.0
+pluggy==1.5.0
+pre-commit==3.7.1
+prompt-toolkit==3.0.43
+protobuf==4.25.3; python_version > "3.9" or sys_platform != "linux"
+psutil==5.9.8
+ptyprocess==0.7.0; sys_platform != "win32" and sys_platform != "emscripten"
+pure-eval==0.2.2
+pycodestyle==2.11.1
+pycparser==2.22; implementation_name == "pypy"
+pyflakes==3.2.0
+pygments==2.18.0
+pylint==3.2.0
+pyparsing==3.1.2
+pyproj==3.6.1
+pyshp==2.3.1
+pytest==8.2.0
+python-dateutil==2.9.0.post0
+pytorch-lightning==2.2.4
+pytz==2024.1
+pywin32==306; sys_platform == "win32" and platform_python_implementation != "PyPy"
+pyyaml==6.0.1
+pyzmq==26.0.3
+requests==2.31.0
+scikit-learn==1.4.2
+scipy==1.13.0
+sentry-sdk==2.2.0
+setproctitle==1.3.3
+setuptools==69.5.1
+shapely==2.0.4
+six==1.16.0
+smmap==5.0.1
+stack-data==0.6.3
+sympy==1.12
+tbb==2021.12.0; platform_system == "Windows"
+tenacity==8.3.0
+threadpoolctl==3.5.0
+tomli==2.0.1; python_version < "3.11"
+tomlkit==0.12.5
+toolz==0.12.1
+torch==2.3.0
+torch-geometric==2.5.3
+torchmetrics==1.4.0.post0
+tornado==6.4
+tqdm==4.66.4
+traitlets==5.14.3
+triton==2.3.0; platform_system == "Linux" and platform_machine == "x86_64" and python_version < "3.12"
+tueplots==0.0.15
+typing-extensions==4.11.0
+tzdata==2024.1
+urllib3==2.2.1
+virtualenv==20.26.2
+wandb==0.17.0
+wcwidth==0.2.13
+win32-setctime==1.1.0; sys_platform == "win32"
+xarray==2024.5.0
+yarl==1.9.4
+zarr==2.18.0
+zipp==3.18.2; python_version < "3.12"

From 8d733b7f56ef2245e1eb1950f850cf61b131c78e Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Thu, 23 May 2024 12:29:37 +0200
Subject: [PATCH 09/26] simplified requirements

---
 requirements.txt | 172 +++++++----------------------------------------
 1 file changed, 26 insertions(+), 146 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 19cb5aae..964483c9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,146 +1,26 @@
-aiohttp==3.9.5
-aiosignal==1.3.1
-appnope==0.1.4; platform_system == "Darwin"
-asciitree==0.3.3
-asttokens==2.4.1
-async-timeout==4.0.3; python_version < "3.11"
-attrs==23.2.0
-black==24.4.2
-cartopy==0.23.0
-certifi==2024.2.2
-cffi==1.16.0; implementation_name == "pypy"
-cfgv==3.4.0
-charset-normalizer==3.3.2
-click==8.1.7
-cloudpickle==3.0.0
-codespell==2.2.6
-colorama==0.4.6; sys_platform == "win32" or platform_system == "Windows"
-comm==0.2.2
-contourpy==1.2.1
-cycler==0.12.1
-dask==2024.5.0
-debugpy==1.8.1
-decorator==5.1.1
-dill==0.3.8
-distlib==0.3.8
-docker-pycreds==0.4.0
-exceptiongroup==1.2.1; python_version < "3.11"
-executing==2.0.1
-fasteners==0.19; sys_platform != "emscripten"
-filelock==3.14.0
-flake8==7.0.0
-fonttools==4.51.0
-frozenlist==1.4.1
-fsspec==2024.5.0
-gitdb==4.0.11
-gitpython==3.1.43
-identify==2.5.36
-idna==3.7
-importlib-metadata==7.1.0; python_version < "3.12"
-iniconfig==2.0.0
-intel-openmp==2021.4.0; platform_system == "Windows"
-ipdb==0.13.13
-ipykernel==6.29.4
-ipython==8.24.0
-isort==5.13.2
-jedi==0.19.1
-jinja2==3.1.4
-joblib==1.4.2
-jupyter-client==8.6.1
-jupyter-core==5.7.2
-kiwisolver==1.4.5
-lightning-utilities==0.11.2
-locket==1.0.0
-loguru==0.7.2
-markupsafe==2.1.5
-matplotlib==3.9.0
-matplotlib-inline==0.1.7
-mccabe==0.7.0
-mkl==2021.4.0; platform_system == "Windows"
-mpmath==1.3.0
-multidict==6.0.5
-mypy-extensions==1.0.0
-nest-asyncio==1.6.0
-networkx==3.3
-nodeenv==1.8.0
-numcodecs==0.12.1
-numpy==1.26.4
-nvidia-cublas-cu12==12.1.3.1; platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-cuda-cupti-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-cuda-runtime-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-cudnn-cu12==8.9.2.26; platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-cufft-cu12==11.0.2.54; platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-curand-cu12==10.3.2.106; platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-cusolver-cu12==11.4.5.107; platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-cusparse-cu12==12.1.0.106; platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-nccl-cu12==2.20.5; platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-nvjitlink-cu12==12.4.127; platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-nvtx-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64"
-packaging==24.0
-pandas==2.2.2
-parso==0.8.4
-partd==1.4.2
-pathspec==0.12.1
-pexpect==4.9.0; sys_platform != "win32" and sys_platform != "emscripten"
-pillow==10.3.0
-platformdirs==4.2.2
-plotly==5.22.0
-pluggy==1.5.0
-pre-commit==3.7.1
-prompt-toolkit==3.0.43
-protobuf==4.25.3; python_version > "3.9" or sys_platform != "linux"
-psutil==5.9.8
-ptyprocess==0.7.0; sys_platform != "win32" and sys_platform != "emscripten"
-pure-eval==0.2.2
-pycodestyle==2.11.1
-pycparser==2.22; implementation_name == "pypy"
-pyflakes==3.2.0
-pygments==2.18.0
-pylint==3.2.0
-pyparsing==3.1.2
-pyproj==3.6.1
-pyshp==2.3.1
-pytest==8.2.0
-python-dateutil==2.9.0.post0
-pytorch-lightning==2.2.4
-pytz==2024.1
-pywin32==306; sys_platform == "win32" and platform_python_implementation != "PyPy"
-pyyaml==6.0.1
-pyzmq==26.0.3
-requests==2.31.0
-scikit-learn==1.4.2
-scipy==1.13.0
-sentry-sdk==2.2.0
-setproctitle==1.3.3
-setuptools==69.5.1
-shapely==2.0.4
-six==1.16.0
-smmap==5.0.1
-stack-data==0.6.3
-sympy==1.12
-tbb==2021.12.0; platform_system == "Windows"
-tenacity==8.3.0
-threadpoolctl==3.5.0
-tomli==2.0.1; python_version < "3.11"
-tomlkit==0.12.5
-toolz==0.12.1
-torch==2.3.0
-torch-geometric==2.5.3
-torchmetrics==1.4.0.post0
-tornado==6.4
-tqdm==4.66.4
-traitlets==5.14.3
-triton==2.3.0; platform_system == "Linux" and platform_machine == "x86_64" and python_version < "3.12"
-tueplots==0.0.15
-typing-extensions==4.11.0
-tzdata==2024.1
-urllib3==2.2.1
-virtualenv==20.26.2
-wandb==0.17.0
-wcwidth==0.2.13
-win32-setctime==1.1.0; sys_platform == "win32"
-xarray==2024.5.0
-yarl==1.9.4
-zarr==2.18.0
-zipp==3.18.2; python_version < "3.12"
+# for all
+numpy>=1.24.2
+wandb>=0.13.10
+matplotlib>=3.7.0
+scipy>=1.10.0
+pytorch-lightning>=2.0.3
+shapely>=2.0.1
+networkx>=3.0
+Cartopy>=0.22.0
+pyproj>=3.4.1
+tueplots>=0.0.8
+plotly>=5.15.0
+torch-geometric>=2.5.2,
+loguru>=0.7.2,
+xarray>=2024.3.0,
+zarr>=2.17.2,
+dask>=2024.4.2,
+
+# for dev
+pre-commit>=2.15.0
+codespell>=2.0.0,
+black>=21.9b0,
+isort>=5.9.3,
+flake8>=4.0.1,
+pylint>=3.0.3,
+pytest>=8.1.1,

From 7ee8821d07854af130946d1b8cfe87e4ca650d96 Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Thu, 23 May 2024 12:32:59 +0200
Subject: [PATCH 10/26] removed commas in requirements

---
 requirements.txt | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 964483c9..5e735032 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,17 +10,17 @@ Cartopy>=0.22.0
 pyproj>=3.4.1
 tueplots>=0.0.8
 plotly>=5.15.0
-torch-geometric>=2.5.2,
-loguru>=0.7.2,
-xarray>=2024.3.0,
-zarr>=2.17.2,
-dask>=2024.4.2,
+torch-geometric>=2.5.2
+loguru>=0.7.2
+xarray>=2024.3.0
+zarr>=2.17.2
+dask>=2024.4.2
 
 # for dev
 pre-commit>=2.15.0
-codespell>=2.0.0,
-black>=21.9b0,
-isort>=5.9.3,
-flake8>=4.0.1,
-pylint>=3.0.3,
-pytest>=8.1.1,
+codespell>=2.0.0
+black>=21.9b0
+isort>=5.9.3
+flake8>=4.0.1
+pylint>=3.0.3
+pytest>=8.1.1

From 9a5f83cdde14c24d824cec9cecb354d63ff6db5b Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Fri, 24 May 2024 11:24:59 +0200
Subject: [PATCH 11/26] added downloading of test data from EWC using pooch

---
 tests/test_mllam_dataset.py | 66 ++++++++++++-------------------------
 1 file changed, 21 insertions(+), 45 deletions(-)

diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py
index bd638c78..d3934fc6 100644
--- a/tests/test_mllam_dataset.py
+++ b/tests/test_mllam_dataset.py
@@ -1,6 +1,9 @@
 # Standard library
 import os
 
+# Third-party
+import pooch
+
 # First-party
 from create_mesh import main as create_mesh
 from neural_lam.config import Config
@@ -8,12 +11,26 @@
 from neural_lam.weather_dataset import WeatherDataset
 from train_model import main as train_model
 
-# from pathlib import Path
-# import numpy as np
-# import weather_model_graphs as wmg
+os.environ["WANDB_DISABLED"] = "true"
 
 
-os.environ["WANDB_DISABLED"] = "true"
+def test_retrieve_data_ewc():
+    # Initializing variables for the client
+    S3_BUCKET_NAME = "mllam-testdata"
+    S3_ENDPOINT_URL = "https://object-store.os-api.cci1.ecmwf.int"
+    S3_FILE_PATH = "neural-lam/npy/meps_example_reduced.v0.1.0.zip"
+    S3_FULL_PATH = "/".join([S3_ENDPOINT_URL, S3_BUCKET_NAME, S3_FILE_PATH])
+    known_hash = (
+        "80903c4012018797eaa9f2818b6a205d1d2d3122297a15359f6343f54eddcb18"
+    )
+
+    pooch.retrieve(
+        url=S3_FULL_PATH,
+        known_hash=known_hash,
+        processor=pooch.Unzip(extract_dir=""),
+        path="data",
+        fname="meps_example_reduced.zip",
+    )
 
 
 def test_load_reduced_meps_dataset():
@@ -87,44 +104,6 @@ def test_load_reduced_meps_dataset():
     assert set(static_data.keys()) == required_props
 
 
-# def test_create_wmg_graph_reduced_meps_dataset():
-#     dataset_name = "meps_example_reduced"
-#     static_dir_path = Path("data", dataset_name, "static")
-#     graph_dir_path = Path("graphs", "hierarchial")
-
-#     # -- Static grid node features --
-#     xy_grid = np.load(static_dir_path / "nwp_xy.npy")
-
-#     # create the full graph
-#     graph = wmg.create.archetype.create_oscarsson_hierarchical_graph(
-#         xy_grid=xy_grid
-#     )
-
-#     # split the graph by component
-#     graph_components = wmg.split_graph_by_edge_attribute(
-#         graph=graph, attr="component"
-#     )
-
-#     m2m_graph = graph_components.pop("m2m")
-#     m2m_graph_components = wmg.split_graph_by_edge_attribute(
-#         graph=m2m_graph, attr="direction"
-#     )
-#     m2m_graph_components = {
-#         f"m2m_{name}": graph for name, graph in m2m_graph_components.items()
-#     }
-#     graph_components.update(m2m_graph_components)
-
-#     # save the graph components to disk in pytorch-geometric format
-#     for component_name, graph_component in graph_components.items():
-#         kwargs = {}
-#         wmg.save.to_pyg(
-#             graph=graph_component,
-#             name=component_name,
-#             output_directory=graph_dir_path,
-#             **kwargs,
-#         )
-
-
 def test_create_graph_reduced_meps_dataset():
     args = [
         "--graph=hierarchical",
@@ -150,6 +129,3 @@ def test_train_model_reduced_meps_dataset():
         "--n_example_pred=0",
     ]
     train_model(args)
-
-
-test_train_model_reduced_meps_dataset()

From c7d1d08053c3fd529270b6990a1cc3156dead0cf Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Fri, 24 May 2024 11:30:02 +0200
Subject: [PATCH 12/26] added pooch to requirements.txt

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 5e735032..ef3c39f7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -24,3 +24,4 @@ isort>=5.9.3
 flake8>=4.0.1
 pylint>=3.0.3
 pytest>=8.1.1
+pooch>=1.8.1

From 2667b6cdb852c122f2c1672e407cfa6e24bb23a8 Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Fri, 24 May 2024 12:00:05 +0200
Subject: [PATCH 13/26] updated test dataset

---
 tests/test_mllam_dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py
index d3934fc6..bf0123e4 100644
--- a/tests/test_mllam_dataset.py
+++ b/tests/test_mllam_dataset.py
@@ -21,7 +21,7 @@ def test_retrieve_data_ewc():
     S3_FILE_PATH = "neural-lam/npy/meps_example_reduced.v0.1.0.zip"
     S3_FULL_PATH = "/".join([S3_ENDPOINT_URL, S3_BUCKET_NAME, S3_FILE_PATH])
     known_hash = (
-        "80903c4012018797eaa9f2818b6a205d1d2d3122297a15359f6343f54eddcb18"
+        "7d80f0d8c3022aa8c0331f26a17566b44b4b33a5d9a60f6d2e60bf65ed857d86"
     )
 
     pooch.retrieve(

From 0c7edd4daac3365f70afcb5c07bc77e5aadadfec Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Mon, 27 May 2024 13:25:43 +0200
Subject: [PATCH 14/26] Disabled latex to enable running on github without
 having to install latex

---
 tests/test_mllam_dataset.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py
index bf0123e4..c43ac6aa 100644
--- a/tests/test_mllam_dataset.py
+++ b/tests/test_mllam_dataset.py
@@ -3,6 +3,7 @@
 
 # Third-party
 import pooch
+from matplotlib import rcParams
 
 # First-party
 from create_mesh import main as create_mesh
@@ -12,6 +13,7 @@
 from train_model import main as train_model
 
 os.environ["WANDB_DISABLED"] = "true"
+rcParams["text.usetex"] = False
 
 
 def test_retrieve_data_ewc():

From 9352949a631ccc22fd7eb6fb1957bcbb80fc4be4 Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Mon, 27 May 2024 14:08:05 +0200
Subject: [PATCH 15/26] only use latex if available

---
 neural_lam/utils.py         | 4 +++-
 tests/test_mllam_dataset.py | 4 +---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/neural_lam/utils.py b/neural_lam/utils.py
index 836b04ed..5cd2f9ad 100644
--- a/neural_lam/utils.py
+++ b/neural_lam/utils.py
@@ -1,5 +1,6 @@
 # Standard library
 import os
+import shutil
 
 # Third-party
 import numpy as np
@@ -250,7 +251,8 @@ def fractional_plot_bundle(fraction):
     Get the tueplots bundle, but with figure width as a fraction of
     the page width.
     """
-    bundle = bundles.neurips2023(usetex=True, family="serif")
+    usetex = True if shutil.which("latex") else False
+    bundle = bundles.neurips2023(usetex=usetex, family="serif")
     bundle.update(figsizes.neurips2023())
     original_figsize = bundle["figure.figsize"]
     bundle["figure.figsize"] = (
diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py
index c43ac6aa..06a75a93 100644
--- a/tests/test_mllam_dataset.py
+++ b/tests/test_mllam_dataset.py
@@ -3,7 +3,6 @@
 
 # Third-party
 import pooch
-from matplotlib import rcParams
 
 # First-party
 from create_mesh import main as create_mesh
@@ -13,7 +12,6 @@
 from train_model import main as train_model
 
 os.environ["WANDB_DISABLED"] = "true"
-rcParams["text.usetex"] = False
 
 
 def test_retrieve_data_ewc():
@@ -23,7 +21,7 @@ def test_retrieve_data_ewc():
     S3_FILE_PATH = "neural-lam/npy/meps_example_reduced.v0.1.0.zip"
     S3_FULL_PATH = "/".join([S3_ENDPOINT_URL, S3_BUCKET_NAME, S3_FILE_PATH])
     known_hash = (
-        "7d80f0d8c3022aa8c0331f26a17566b44b4b33a5d9a60f6d2e60bf65ed857d86"
+        "98c7a2f442922de40c6891fe3e5d190346889d6e0e97550170a82a7ce58a72b7"
     )
 
     pooch.retrieve(

From 4995de03e9542b6ea792a297f6264d26c5de514e Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Wed, 29 May 2024 00:00:31 +0200
Subject: [PATCH 16/26] included change requests from leifdenby:  - removed
 linting dependencies  - minor changes to test file  - added notebook
 outlining generation of meps_example_reduced from meps_example

---
 DEVELOPING.ipynb            | 237 ++++++++++++++++++++++++++++++++++++
 requirements.txt            |   5 -
 tests/test_mllam_dataset.py |  25 ++--
 3 files changed, 252 insertions(+), 15 deletions(-)
 create mode 100644 DEVELOPING.ipynb

diff --git a/DEVELOPING.ipynb b/DEVELOPING.ipynb
new file mode 100644
index 00000000..eef1268f
--- /dev/null
+++ b/DEVELOPING.ipynb
@@ -0,0 +1,237 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Creating meps_example_reduced\n",
+    "This notebook outlines how the small-size test dataset meps_example_reduced was created based on the slightly larger dataset meps_example. The zipped up datasets are 263 MB and 2.6 GB, respectively.\n",
+    "\n",
+    "The dataset was reduced in size by reducing the number of grid points and variables.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Standard library\n",
+    "import os\n",
+    "\n",
+    "# Third-party\n",
+    "import numpy as np\n",
+    "import torch"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "The number of grid points was reduced to 1/4 by halving the number of coordinates in both the x and y direction. This was done by removing a quarter of the grid points along each outer edge, so the center grid points would stay centered in the new set.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load existing grid\n",
+    "grid_xy = np.load('data/meps_example/static/nwp_xy.npy')\n",
+    "# Get slices in each dimension by cutting off a quarter along each edge\n",
+    "num_x, num_y = grid_xy.shape[1:]\n",
+    "x_slice = slice(num_x//4, 3*num_x//4)\n",
+    "y_slice = slice(num_y//4, 3*num_y//4)\n",
+    "# Index and save reduced grid\n",
+    "grid_xy_reduced = grid_xy[:, x_slice, y_slice]\n",
+    "np.save('data/meps_example_reduced/static/nwp_xy.npy', grid_xy_reduced)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "This cut out the border, so a new perimeter of 10 grid points was established as border (10 was also the border size in the original \"meps_example\").\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Outer 10 grid points are border\n",
+    "old_border_mask = np.load('data/meps_example/static/border_mask.npy')\n",
+    "assert np.all(old_border_mask[10:-10, 10:-10] == False)\n",
+    "assert np.all(old_border_mask[:10, :] == True)\n",
+    "assert np.all(old_border_mask[:, :10] == True)\n",
+    "assert np.all(old_border_mask[-10:,:] == True)\n",
+    "assert np.all(old_border_mask[:,-10:] == True)\n",
+    "\n",
+    "# Create new array with False everywhere but the outer 10 grid points\n",
+    "border_mask = np.zeros_like(grid_xy_reduced[0,:,:], dtype=bool)\n",
+    "border_mask[:10] = True\n",
+    "border_mask[:,:10] = True\n",
+    "border_mask[-10:] = True\n",
+    "border_mask[:,-10:] = True\n",
+    "np.save('data/meps_example_reduced/static/border_mask.npy', border_mask)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "A few other files also needed to be copied using only the new reduced grid"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load surface_geopotential.npy, index only values from the reduced grid, and save to new file\n",
+    "surface_geopotential = np.load('data/meps_example/static/surface_geopotential.npy')\n",
+    "surface_geopotential_reduced = surface_geopotential[x_slice, y_slice]\n",
+    "np.save('data/meps_example_reduced/static/surface_geopotential.npy', surface_geopotential_reduced)\n",
+    "\n",
+    "# Load pytorch file grid_features.pt\n",
+    "grid_features = torch.load('data/meps_example/static/grid_features.pt')\n",
+    "# Index only values from the reduced grid. \n",
+    "# First reshape from (num_grid_points_total, 4) to (num_grid_points_x, num_grid_points_y, 4), \n",
+    "# then index, then reshape back to new total number of grid points\n",
+    "print(grid_features.shape)\n",
+    "grid_features_new = grid_features.reshape(num_x, num_y, 4)[x_slice,y_slice,:].reshape((-1, 4))\n",
+    "# Save to new file\n",
+    "torch.save(grid_features_new, 'data/meps_example_reduced/static/grid_features.pt')\n",
+    "\n",
+    "# flux_stats.pt is just a vector of length 2, so the grid shape and variable changes does not change this file\n",
+    "torch.save(torch.load('data/meps_example/static/flux_stats.pt'), 'data/meps_example_reduced/static/flux_stats.pt')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "The number of variables was reduced by truncating the variable list to the first 8."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_vars = 8\n",
+    "\n",
+    "# Load parameter_weights.npy, truncate to first 8 variables, and save to new file\n",
+    "parameter_weights = np.load('data/meps_example/static/parameter_weights.npy')\n",
+    "parameter_weights_reduced = parameter_weights[:num_vars]\n",
+    "np.save('data/meps_example_reduced/static/parameter_weights.npy', parameter_weights_reduced)\n",
+    "\n",
+    "# Do the same for following 4 pytorch files\n",
+    "for file in ['diff_mean', 'diff_std', 'parameter_mean', 'parameter_std']:\n",
+    "    old_file = torch.load(f'data/meps_example/static/{file}.pt')\n",
+    "    new_file = old_file[:num_vars]\n",
+    "    torch.save(new_file, f'data/meps_example_reduced/static/{file}.pt')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Lastly the files in each of the directories train, test, and val have to be reduced. The folders all have the same structure with files of the following types:\n",
+    "```\n",
+    "nwp_YYYYMMDDHH_mbrXXX.npy\n",
+    "wtr_YYYYMMDDHH.npy\n",
+    "nwp_toa_downwelling_shortwave_flux_YYYYMMDDHH.npy\n",
+    "```\n",
+    "with ```YYYYMMDDHH``` being some date with hours, and ```XXX``` being some 3-digit integer.\n",
+    "\n",
+    "The first type of file has x and y in dimensions 1 and 2, and variable index in dimension 3. Dimension 0 is unchanged.\n",
+    "The second type has has x and y in dimensions 1 and 2. Dimension 0 is unchanged.\n",
+    "The last type has just x and y as the only 2 dimensions.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(65, 268, 238, 18)\n",
+      "(65, 268, 238)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(np.load('data/meps_example/samples/train/nwp_2022040100_mbr000.npy').shape)\n",
+    "print(np.load('data/meps_example/samples/train/nwp_toa_downwelling_shortwave_flux_2022040112.npy').shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The following loop goes through each file in each sample folder and indexes them according to the dimensions given by the file name."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for sample in ['train', 'test', 'val']:\n",
+    "    files = os.listdir(f'data/meps_example/samples/{sample}')\n",
+    "\n",
+    "    for f in files:\n",
+    "        data = np.load(f'data/meps_example/samples/{sample}/{f}')\n",
+    "        if 'mbr' in f:\n",
+    "            data = data[:,x_slice,y_slice,:num_vars]\n",
+    "        elif 'wtr' in f:\n",
+    "            data = data[x_slice, y_slice]\n",
+    "        else:\n",
+    "            data = data[:,x_slice,y_slice]\n",
+    "        np.save(f'data/meps_example_reduced/samples/{sample}/{f}', data)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Lastly, the file ```data_config.yaml``` is modified manually by truncating the variable units, long and short names, and setting the new grid shape. Also the unit descriptions containing ```^``` was automatically parsed using latex, and to avoid having to install latex in the GitHub CI/CD pipeline, this was changed to ```**```."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/requirements.txt b/requirements.txt
index ef3c39f7..cc19cfc6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,10 +18,5 @@ dask>=2024.4.2
 
 # for dev
 pre-commit>=2.15.0
-codespell>=2.0.0
-black>=21.9b0
-isort>=5.9.3
-flake8>=4.0.1
-pylint>=3.0.3
 pytest>=8.1.1
 pooch>=1.8.1
diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py
index 06a75a93..d660ac20 100644
--- a/tests/test_mllam_dataset.py
+++ b/tests/test_mllam_dataset.py
@@ -11,22 +11,25 @@
 from neural_lam.weather_dataset import WeatherDataset
 from train_model import main as train_model
 
+# Disable weights and biases to avoid unnecessary logging
+# and to avoid having to deal with authentication
 os.environ["WANDB_DISABLED"] = "true"
 
+# Initializing variables for the s3 client
+S3_BUCKET_NAME = "mllam-testdata"
+S3_ENDPOINT_URL = "https://object-store.os-api.cci1.ecmwf.int"
+S3_FILE_PATH = "neural-lam/npy/meps_example_reduced.v0.1.0.zip"
+S3_FULL_PATH = "/".join([S3_ENDPOINT_URL, S3_BUCKET_NAME, S3_FILE_PATH])
+TEST_DATA_KNOWN_HASH = (
+    "98c7a2f442922de40c6891fe3e5d190346889d6e0e97550170a82a7ce58a72b7"
+)
 
-def test_retrieve_data_ewc():
-    # Initializing variables for the client
-    S3_BUCKET_NAME = "mllam-testdata"
-    S3_ENDPOINT_URL = "https://object-store.os-api.cci1.ecmwf.int"
-    S3_FILE_PATH = "neural-lam/npy/meps_example_reduced.v0.1.0.zip"
-    S3_FULL_PATH = "/".join([S3_ENDPOINT_URL, S3_BUCKET_NAME, S3_FILE_PATH])
-    known_hash = (
-        "98c7a2f442922de40c6891fe3e5d190346889d6e0e97550170a82a7ce58a72b7"
-    )
 
+def test_retrieve_data_ewc():
+    # Download and unzip test data into data/meps_example_reduced
     pooch.retrieve(
         url=S3_FULL_PATH,
-        known_hash=known_hash,
+        known_hash=TEST_DATA_KNOWN_HASH,
         processor=pooch.Unzip(extract_dir=""),
         path="data",
         fname="meps_example_reduced.zip",
@@ -34,6 +37,8 @@ def test_retrieve_data_ewc():
 
 
 def test_load_reduced_meps_dataset():
+    # The data_config.yaml file is downloaded and extracted in
+    # test_retrieve_data_ewc together with the dataset itself
     data_config_file = "data/meps_example_reduced/data_config.yaml"
     dataset_name = "meps_example_reduced"
 

From d33180fa2a2358e8a108ff923e8168da7b4f2b4f Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Wed, 29 May 2024 00:06:17 +0200
Subject: [PATCH 17/26] added comment

---
 DEVELOPING.ipynb | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/DEVELOPING.ipynb b/DEVELOPING.ipynb
index eef1268f..484f56a1 100644
--- a/DEVELOPING.ipynb
+++ b/DEVELOPING.ipynb
@@ -209,7 +209,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Lastly, the file ```data_config.yaml``` is modified manually by truncating the variable units, long and short names, and setting the new grid shape. Also the unit descriptions containing ```^``` was automatically parsed using latex, and to avoid having to install latex in the GitHub CI/CD pipeline, this was changed to ```**```."
+    "Lastly, the file ```data_config.yaml``` is modified manually by truncating the variable units, long and short names, and setting the new grid shape. Also the unit descriptions containing ```^``` was automatically parsed using latex, and to avoid having to install latex in the GitHub CI/CD pipeline, this was changed to ```**```. \n",
+    "\n",
+    "This new config file was placed in ```data/meps_example_reduced```, and that directory was then zipped and placed in a European Weather Cloud S3 bucket."
    ]
   }
  ],

From e6c2c36c7d1ee5bbc9a88d05ec5e0371cb9e7070 Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Thu, 30 May 2024 19:19:53 +0200
Subject: [PATCH 18/26] minor requested changes

---
 README.md                                                     | 2 ++
 .../notebooks/create_reduced_meps_dataset.ipynb               | 0
 neural_lam/utils.py                                           | 3 +++
 requirements.txt                                              | 4 ----
 tests/test_mllam_dataset.py                                   | 4 +++-
 5 files changed, 8 insertions(+), 5 deletions(-)
 rename DEVELOPING.ipynb => docs/notebooks/create_reduced_meps_dataset.ipynb (100%)

diff --git a/README.md b/README.md
index ba0bb3fe..f16c940a 100644
--- a/README.md
+++ b/README.md
@@ -279,6 +279,8 @@ pre-commit run --all-files
 ```
 from the root directory of the repository.
 
+Furthermore, all tests in the ```tests``` directory will be run upon pushing changes by a github action. Failure in any of the tests will also reject the push/PR.
+
 # Contact
 If you are interested in machine learning models for LAM, have questions about our implementation or ideas for extending it, feel free to get in touch.
 You can open a github issue on this page, or (if more suitable) send an email to [joel.oskarsson@liu.se](mailto:joel.oskarsson@liu.se).
diff --git a/DEVELOPING.ipynb b/docs/notebooks/create_reduced_meps_dataset.ipynb
similarity index 100%
rename from DEVELOPING.ipynb
rename to docs/notebooks/create_reduced_meps_dataset.ipynb
diff --git a/neural_lam/utils.py b/neural_lam/utils.py
index 5cd2f9ad..59a529eb 100644
--- a/neural_lam/utils.py
+++ b/neural_lam/utils.py
@@ -251,6 +251,9 @@ def fractional_plot_bundle(fraction):
     Get the tueplots bundle, but with figure width as a fraction of
     the page width.
     """
+    # If latex is not available, some visualizations might not render correctly,
+    # but will at least not raise an error.
+    # Alternatively, use unicode raised numbers.
     usetex = True if shutil.which("latex") else False
     bundle = bundles.neurips2023(usetex=usetex, family="serif")
     bundle.update(figsizes.neurips2023())
diff --git a/requirements.txt b/requirements.txt
index cc19cfc6..16a29a18 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,10 +11,6 @@ pyproj>=3.4.1
 tueplots>=0.0.8
 plotly>=5.15.0
 torch-geometric>=2.5.2
-loguru>=0.7.2
-xarray>=2024.3.0
-zarr>=2.17.2
-dask>=2024.4.2
 
 # for dev
 pre-commit>=2.15.0
diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py
index d660ac20..f91170c9 100644
--- a/tests/test_mllam_dataset.py
+++ b/tests/test_mllam_dataset.py
@@ -52,8 +52,10 @@ def test_load_reduced_meps_dataset():
     assert len(var_names) == len(var_longnames)
     assert len(var_names) == len(var_units)
 
-    # TODO: can these two variables be loaded from elsewhere?
+    # in future the number of grid static features
+    # will be provided by the Dataset class itself
     n_grid_static_features = 4
+    # Hardcoded in model
     n_input_steps = 2
 
     n_forcing_features = config.values["dataset"]["num_forcing_features"]

From 3d77ac4379ff4d34c560379397b5726e0c5f1edb Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Fri, 31 May 2024 09:20:31 +0200
Subject: [PATCH 19/26] updated changelog, added cicd badges

---
 CHANGELOG.md                                     | 1 +
 README.md                                        | 8 ++++++++
 docs/notebooks/create_reduced_meps_dataset.ipynb | 2 +-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 061aa6bb..6ec00858 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [unreleased](https://github.com/joeloskarsson/neural-lam/compare/v0.1.0...HEAD)
 
 ### Added
+- Added tests for loading dataset, creating graph, and training model based on reduced MEPS dataset stored on AWS S3, along with automatic running of tests on push/PR to GitHub [/#38](https://github.com/mllam/neural-lam/pull/38) @SimonKamuk
 
 - Replaced `constants.py` with `data_config.yaml` for data configuration management
   [\#31](https://github.com/joeloskarsson/neural-lam/pull/31)
diff --git a/README.md b/README.md
index f16c940a..bd0d901f 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,11 @@
+![Linting](https://github.com/SimonKamuk/neural-lam/actions/workflows/pre-commit.yml/badge.svg?branch=feature_add_tests)
+![Automatic tests](https://github.com/SimonKamuk/neural-lam/actions/workflows/run_tests.yml/badge.svg?branch=feature_add_tests)
+
+<!---
+![Linting](https://github.com/mllam/neural-lam/actions/workflows/pre-commit.yml/badge.svg)
+![Automatic tests](https://github.com/mllam/neural-lam/actions/workflows/run_tests.yml/badge.svg)
+-->
+
 <p align="middle">
     <img src="figures/neural_lam_header.png" width="700">
 </p>
diff --git a/docs/notebooks/create_reduced_meps_dataset.ipynb b/docs/notebooks/create_reduced_meps_dataset.ipynb
index 484f56a1..daba23c4 100644
--- a/docs/notebooks/create_reduced_meps_dataset.ipynb
+++ b/docs/notebooks/create_reduced_meps_dataset.ipynb
@@ -5,7 +5,7 @@
    "metadata": {},
    "source": [
     "# Creating meps_example_reduced\n",
-    "This notebook outlines how the small-size test dataset meps_example_reduced was created based on the slightly larger dataset meps_example. The zipped up datasets are 263 MB and 2.6 GB, respectively.\n",
+    "This notebook outlines how the small-size test dataset ```meps_example_reduced``` was created based on the slightly larger dataset ```meps_example```. The zipped up datasets are 263 MB and 2.6 GB, respectively. See [README.md](../../README.md) for info on how to download ```meps_example```.\n",
     "\n",
     "The dataset was reduced in size by reducing the number of grid points and variables.\n"
    ]

From d39030825fad7f753cfbedd7638442204dde0e16 Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Fri, 31 May 2024 10:17:16 +0200
Subject: [PATCH 20/26] moved installation of torch-geometric from requirements
 to github test action

---
 .github/workflows/run_tests.yml | 1 +
 README.md                       | 5 -----
 requirements.txt                | 1 -
 3 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index f1dad637..031a75f6 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -27,6 +27,7 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+          pip install torch-geometric>=2.5.2
       - name: Test with pytest
         run: |
           pytest  -v -s
diff --git a/README.md b/README.md
index bd0d901f..1bdc6602 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,5 @@
-![Linting](https://github.com/SimonKamuk/neural-lam/actions/workflows/pre-commit.yml/badge.svg?branch=feature_add_tests)
-![Automatic tests](https://github.com/SimonKamuk/neural-lam/actions/workflows/run_tests.yml/badge.svg?branch=feature_add_tests)
-
-<!---
 ![Linting](https://github.com/mllam/neural-lam/actions/workflows/pre-commit.yml/badge.svg)
 ![Automatic tests](https://github.com/mllam/neural-lam/actions/workflows/run_tests.yml/badge.svg)
--->
 
 <p align="middle">
     <img src="figures/neural_lam_header.png" width="700">
diff --git a/requirements.txt b/requirements.txt
index 16a29a18..9309eea4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,7 +10,6 @@ Cartopy>=0.22.0
 pyproj>=3.4.1
 tueplots>=0.0.8
 plotly>=5.15.0
-torch-geometric>=2.5.2
 
 # for dev
 pre-commit>=2.15.0

From de4efba5fbf7f1a096a5b93b842c21313e33e09e Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Fri, 31 May 2024 10:29:43 +0200
Subject: [PATCH 21/26] changed name of unit test badge

---
 .github/workflows/run_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index 031a75f6..02fcffa9 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -1,4 +1,4 @@
-name: Run Unit Test via Pytest
+name: Unit Tests
 
 on:
   # trigger on pushes to any branch, but not main

From b0c4bed7ba8cdfe7eec8b6b3e118ee9d5c880dbb Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Mon, 3 Jun 2024 07:11:20 +0200
Subject: [PATCH 22/26] added caching of test data

---
 .github/workflows/pre-commit.yml | 2 +-
 .github/workflows/run_tests.yml  | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index dc519e5b..dadac50d 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -1,4 +1,4 @@
-name: lint
+name: Linting
 
 on:
   # trigger on pushes to any branch, but not main
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index 02fcffa9..4cbdfa4b 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -31,3 +31,10 @@ jobs:
       - name: Test with pytest
         run: |
           pytest  -v -s
+      - name: Cache data
+        uses: actions/cache@v2
+        with:
+        path: data
+        key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+        restore-keys: |
+          ${{ runner.os }}-meps-reduced-example-data-v0.1.0

From 18e55a4e93981db0b189df79548b075be4803804 Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Mon, 3 Jun 2024 07:21:50 +0200
Subject: [PATCH 23/26] fix for caching

---
 .github/workflows/run_tests.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index 4cbdfa4b..cc569f0a 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -33,7 +33,6 @@ jobs:
           pytest  -v -s
       - name: Cache data
         uses: actions/cache@v2
-        with:
         path: data
         key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
         restore-keys: |

From 4f753076e75a611c0687ccce61571082937fe505 Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Mon, 3 Jun 2024 10:36:40 +0200
Subject: [PATCH 24/26] tried fix for caching test data

---
 .github/workflows/run_tests.yml | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index cc569f0a..33edb243 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -32,8 +32,9 @@ jobs:
         run: |
           pytest  -v -s
       - name: Cache data
-        uses: actions/cache@v2
-        path: data
-        key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
-        restore-keys: |
-          ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+        uses: actions/cache@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+          restore-keys: |
+            ${{ runner.os }}-meps-reduced-example-data-v0.1.0

From aceb47c83fb9f60aaaaae3cca3db4d5003f586f6 Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Mon, 3 Jun 2024 11:34:12 +0200
Subject: [PATCH 25/26] updated changelog

---
 CHANGELOG.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6ec00858..757463f8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,7 +8,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [unreleased](https://github.com/joeloskarsson/neural-lam/compare/v0.1.0...HEAD)
 
 ### Added
-- Added tests for loading dataset, creating graph, and training model based on reduced MEPS dataset stored on AWS S3, along with automatic running of tests on push/PR to GitHub [/#38](https://github.com/mllam/neural-lam/pull/38) @SimonKamuk
+- Added tests for loading dataset, creating graph, and training model based on reduced MEPS dataset stored on AWS S3, along with automatic running of tests on push/PR to GitHub. Added caching of test data tp speed up running tests.
+  [/#38](https://github.com/mllam/neural-lam/pull/38)
+  @SimonKamuk
 
 - Replaced `constants.py` with `data_config.yaml` for data configuration management
   [\#31](https://github.com/joeloskarsson/neural-lam/pull/31)

From a6f80899af180f9fc451fd07c4985401e7242d3f Mon Sep 17 00:00:00 2001
From: Simon Kamuk Christiansen <skc@dmi.dk>
Date: Mon, 3 Jun 2024 12:26:39 +0200
Subject: [PATCH 26/26] separated saving and restoring of cache

---
 .github/workflows/run_tests.yml | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index 33edb243..71bff3d3 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -28,13 +28,18 @@ jobs:
           python -m pip install --upgrade pip
           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
           pip install torch-geometric>=2.5.2
+      - name: Load cache data
+        uses: actions/cache/restore@v4
+        with:
+          path: data
+          key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
+          restore-keys: |
+            ${{ runner.os }}-meps-reduced-example-data-v0.1.0
       - name: Test with pytest
         run: |
           pytest  -v -s
-      - name: Cache data
-        uses: actions/cache@v4
+      - name: Save cache data
+        uses: actions/cache/save@v4
         with:
           path: data
           key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0
-          restore-keys: |
-            ${{ runner.os }}-meps-reduced-example-data-v0.1.0