From 57edd747113d42d7d7dfa39051f563b668975c6d Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Wed, 22 May 2024 15:36:40 +0200 Subject: [PATCH 01/26] added testing of loading data, creating graphs, and training model. Also allowed calling train_model.main with arguments (will still use sys.argv when no arguments are supplied in the function call --- tests/__init__.py | 0 tests/test_mllam_dataset.py | 134 ++++++++++++++++++++++++++++++++++++ train_model.py | 4 +- 3 files changed, 136 insertions(+), 2 deletions(-) create mode 100644 tests/__init__.py create mode 100644 tests/test_mllam_dataset.py diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py new file mode 100644 index 00000000..0e55b7da --- /dev/null +++ b/tests/test_mllam_dataset.py @@ -0,0 +1,134 @@ +# Standard library +from pathlib import Path + +# First-party +from neural_lam.weather_dataset import WeatherDataset +from neural_lam.models.graph_lam import GraphLAM +from neural_lam.utils import load_graph, load_static_data +from neural_lam.config import Config +from train_model import main + +# Third-party +import numpy as np +import weather_model_graphs as wmg + + +def load_reduced_meps_dataset(): + data_config_file = 'data/meps_example_reduced/data_config.yaml' + dataset_name = 'meps_example_reduced' + + dataset = WeatherDataset(dataset_name="meps_example_reduced") + config = Config.from_file(data_config_file) + + var_names = config.values['dataset']['var_names'] + var_units = config.values['dataset']['var_units'] + var_longnames = config.values['dataset']['var_longnames'] + + assert len(var_names) == len(var_longnames) + assert len(var_names) == len(var_units) + + # TODO: can these two variables be loaded from elsewhere? + n_grid_static_features = 4 + n_input_steps = 2 + + n_forcing_features = config.values['dataset']['num_forcing_features'] + n_state_features = len(var_names) + n_prediction_timesteps = dataset.sample_length - n_input_steps + + nx, ny = config.values['grid_shape_state'] + n_grid = nx * ny + + # check that the dataset is not empty + assert len(dataset) > 0 + + # get the first item + init_states, target_states, forcing = dataset[0] + + # check that the shapes of the tensors are correct + assert init_states.shape == ( + n_input_steps, + n_grid, + n_state_features + ) + assert target_states.shape == ( + n_prediction_timesteps, + n_grid, + n_state_features, + ) + assert forcing.shape == ( + n_prediction_timesteps, + n_grid, + n_forcing_features, + ) + + static_data = load_static_data(dataset_name=dataset_name) + + required_props = {'border_mask', 'grid_static_features', 'step_diff_mean', 'step_diff_std', 'data_mean', 'data_std', 'param_weights'} + + # check the sizes of the props + assert static_data["border_mask"].shape == (n_grid, 1) + assert static_data["grid_static_features"].shape == (n_grid, n_grid_static_features) + assert static_data["step_diff_mean"].shape == (n_state_features,) + assert static_data["step_diff_std"].shape == (n_state_features,) + assert static_data["data_mean"].shape == (n_state_features,) + assert static_data["data_std"].shape == (n_state_features,) + assert static_data["param_weights"].shape == (n_state_features,) + + assert set(static_data.keys()) == required_props + + +def test_create_graph_reduced_meps_dataset(): + dataset_name = "meps_example_reduced" + static_dir_path = Path("data", dataset_name, "static") + graph_dir_path = Path("graphs", "hierarchial") + + # -- Static grid node features -- + xy_grid = np.load(static_dir_path / "nwp_xy.npy") + + # create the full graph + graph = wmg.create.archetype.create_oscarsson_hierarchical_graph(xy_grid=xy_grid) + + # split the graph by component + graph_components = wmg.split_graph_by_edge_attribute( + graph=graph, attr="component" + # argument attribute seens to have been changed to attr, change also in weather-model-graphs/src/weather_model_graphs/save.py::to_pyg + ) + + m2m_graph = graph_components.pop("m2m") + m2m_graph_components = wmg.split_graph_by_edge_attribute( + graph=m2m_graph, attr="direction" + ) + m2m_graph_components = { + f"m2m_{name}": graph for name, graph in m2m_graph_components.items() + } + graph_components.update(m2m_graph_components) + + # save the graph components to disk in pytorch-geometric format + for component_name, graph_component in graph_components.items(): + kwargs = {} + wmg.save.to_pyg( + graph=graph_component, + name=component_name, + output_directory=graph_dir_path, + **kwargs, + ) + + +def test_train_model_reduced_meps_dataset(): + args = [ + '--model=hi_lam', + '--data_config=data/meps_example_reduced/data_config.yaml', + '--n_workers=1', + '--epochs=1', + '--graph=hierarchical', + '--hidden_dim=16', + '--hidden_layers=1', + '--processor_layers=1', + '--ar_steps=1', + '--eval=val', + '--wandb_project=None', + ] + main(args) + + + \ No newline at end of file diff --git a/train_model.py b/train_model.py index 390da6d4..0482c075 100644 --- a/train_model.py +++ b/train_model.py @@ -22,7 +22,7 @@ } -def main(): +def main(input_args=None): """ Main function for training and evaluating models """ @@ -206,7 +206,7 @@ def main(): default={}, help="Dict with variables and lead times to log watched metrics for", ) - args = parser.parse_args() + args = parser.parse_args(input_args) config_loader = config.Config.from_file(args.data_config) From 4e17efbf5a0717135413d795d0ae7a976ad0ef32 Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Wed, 22 May 2024 15:48:48 +0200 Subject: [PATCH 02/26] added test to test name --- tests/test_mllam_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py index 0e55b7da..8cb5b7ec 100644 --- a/tests/test_mllam_dataset.py +++ b/tests/test_mllam_dataset.py @@ -13,7 +13,7 @@ import weather_model_graphs as wmg -def load_reduced_meps_dataset(): +def test_load_reduced_meps_dataset(): data_config_file = 'data/meps_example_reduced/data_config.yaml' dataset_name = 'meps_example_reduced' From 7fa7cdd430af60bc56c86076ec5cbc1aa16bb384 Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Wed, 22 May 2024 16:01:37 +0200 Subject: [PATCH 03/26] linting --- tests/test_mllam_dataset.py | 90 +++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 43 deletions(-) diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py index 8cb5b7ec..0dd454bd 100644 --- a/tests/test_mllam_dataset.py +++ b/tests/test_mllam_dataset.py @@ -1,41 +1,40 @@ # Standard library from pathlib import Path -# First-party -from neural_lam.weather_dataset import WeatherDataset -from neural_lam.models.graph_lam import GraphLAM -from neural_lam.utils import load_graph, load_static_data -from neural_lam.config import Config -from train_model import main - # Third-party import numpy as np import weather_model_graphs as wmg +# First-party +from neural_lam.config import Config +from neural_lam.utils import load_static_data +from neural_lam.weather_dataset import WeatherDataset +from train_model import main + def test_load_reduced_meps_dataset(): - data_config_file = 'data/meps_example_reduced/data_config.yaml' - dataset_name = 'meps_example_reduced' + data_config_file = "data/meps_example_reduced/data_config.yaml" + dataset_name = "meps_example_reduced" dataset = WeatherDataset(dataset_name="meps_example_reduced") config = Config.from_file(data_config_file) - var_names = config.values['dataset']['var_names'] - var_units = config.values['dataset']['var_units'] - var_longnames = config.values['dataset']['var_longnames'] + var_names = config.values["dataset"]["var_names"] + var_units = config.values["dataset"]["var_units"] + var_longnames = config.values["dataset"]["var_longnames"] assert len(var_names) == len(var_longnames) assert len(var_names) == len(var_units) - + # TODO: can these two variables be loaded from elsewhere? n_grid_static_features = 4 n_input_steps = 2 - n_forcing_features = config.values['dataset']['num_forcing_features'] + n_forcing_features = config.values["dataset"]["num_forcing_features"] n_state_features = len(var_names) n_prediction_timesteps = dataset.sample_length - n_input_steps - - nx, ny = config.values['grid_shape_state'] + + nx, ny = config.values["grid_shape_state"] n_grid = nx * ny # check that the dataset is not empty @@ -43,13 +42,9 @@ def test_load_reduced_meps_dataset(): # get the first item init_states, target_states, forcing = dataset[0] - + # check that the shapes of the tensors are correct - assert init_states.shape == ( - n_input_steps, - n_grid, - n_state_features - ) + assert init_states.shape == (n_input_steps, n_grid, n_state_features) assert target_states.shape == ( n_prediction_timesteps, n_grid, @@ -62,12 +57,23 @@ def test_load_reduced_meps_dataset(): ) static_data = load_static_data(dataset_name=dataset_name) - - required_props = {'border_mask', 'grid_static_features', 'step_diff_mean', 'step_diff_std', 'data_mean', 'data_std', 'param_weights'} - + + required_props = { + "border_mask", + "grid_static_features", + "step_diff_mean", + "step_diff_std", + "data_mean", + "data_std", + "param_weights", + } + # check the sizes of the props assert static_data["border_mask"].shape == (n_grid, 1) - assert static_data["grid_static_features"].shape == (n_grid, n_grid_static_features) + assert static_data["grid_static_features"].shape == ( + n_grid, + n_grid_static_features, + ) assert static_data["step_diff_mean"].shape == (n_state_features,) assert static_data["step_diff_std"].shape == (n_state_features,) assert static_data["data_mean"].shape == (n_state_features,) @@ -75,7 +81,7 @@ def test_load_reduced_meps_dataset(): assert static_data["param_weights"].shape == (n_state_features,) assert set(static_data.keys()) == required_props - + def test_create_graph_reduced_meps_dataset(): dataset_name = "meps_example_reduced" @@ -86,12 +92,13 @@ def test_create_graph_reduced_meps_dataset(): xy_grid = np.load(static_dir_path / "nwp_xy.npy") # create the full graph - graph = wmg.create.archetype.create_oscarsson_hierarchical_graph(xy_grid=xy_grid) + graph = wmg.create.archetype.create_oscarsson_hierarchical_graph( + xy_grid=xy_grid + ) # split the graph by component graph_components = wmg.split_graph_by_edge_attribute( graph=graph, attr="component" - # argument attribute seens to have been changed to attr, change also in weather-model-graphs/src/weather_model_graphs/save.py::to_pyg ) m2m_graph = graph_components.pop("m2m") @@ -116,19 +123,16 @@ def test_create_graph_reduced_meps_dataset(): def test_train_model_reduced_meps_dataset(): args = [ - '--model=hi_lam', - '--data_config=data/meps_example_reduced/data_config.yaml', - '--n_workers=1', - '--epochs=1', - '--graph=hierarchical', - '--hidden_dim=16', - '--hidden_layers=1', - '--processor_layers=1', - '--ar_steps=1', - '--eval=val', - '--wandb_project=None', + "--model=hi_lam", + "--data_config=data/meps_example_reduced/data_config.yaml", + "--n_workers=1", + "--epochs=1", + "--graph=hierarchical", + "--hidden_dim=16", + "--hidden_layers=1", + "--processor_layers=1", + "--ar_steps=1", + "--eval=val", + "--wandb_project=None", ] main(args) - - - \ No newline at end of file From 569d061029fd6d670d974c808190977def8c17bd Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Thu, 23 May 2024 10:39:35 +0200 Subject: [PATCH 04/26] made create_mesh callable as python function with arguments. Fixed error in plotting where non-callable cartopy projection from Config was called used current mesh generation from neural-lam instead of weather-model-graphs finished test of training call --- create_mesh.py | 4 +- neural_lam/vis.py | 4 +- tests/test_mllam_dataset.py | 103 +++++++++++++++++++++--------------- 3 files changed, 64 insertions(+), 47 deletions(-) diff --git a/create_mesh.py b/create_mesh.py index f04b4d4b..41557a97 100644 --- a/create_mesh.py +++ b/create_mesh.py @@ -153,7 +153,7 @@ def prepend_node_index(graph, new_index): return networkx.relabel_nodes(graph, to_mapping, copy=True) -def main(): +def main(input_args=None): parser = ArgumentParser(description="Graph generation arguments") parser.add_argument( "--data_config", @@ -186,7 +186,7 @@ def main(): default=0, help="Generate hierarchical mesh graph (default: 0, no)", ) - args = parser.parse_args() + args = parser.parse_args(input_args) # Load grid positions config_loader = config.Config.from_file(args.data_config) diff --git a/neural_lam/vis.py b/neural_lam/vis.py index 2b6abf15..8c9ca77c 100644 --- a/neural_lam/vis.py +++ b/neural_lam/vis.py @@ -87,7 +87,7 @@ def plot_prediction( 1, 2, figsize=(13, 7), - subplot_kw={"projection": data_config.coords_projection()}, + subplot_kw={"projection": data_config.coords_projection}, ) # Plot pred and target @@ -136,7 +136,7 @@ def plot_spatial_error(error, obs_mask, data_config, title=None, vrange=None): fig, ax = plt.subplots( figsize=(5, 4.8), - subplot_kw={"projection": data_config.coords_projection()}, + subplot_kw={"projection": data_config.coords_projection}, ) ax.coastlines() # Add coastline outlines diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py index 0dd454bd..bd638c78 100644 --- a/tests/test_mllam_dataset.py +++ b/tests/test_mllam_dataset.py @@ -1,15 +1,19 @@ # Standard library -from pathlib import Path - -# Third-party -import numpy as np -import weather_model_graphs as wmg +import os # First-party +from create_mesh import main as create_mesh from neural_lam.config import Config from neural_lam.utils import load_static_data from neural_lam.weather_dataset import WeatherDataset -from train_model import main +from train_model import main as train_model + +# from pathlib import Path +# import numpy as np +# import weather_model_graphs as wmg + + +os.environ["WANDB_DISABLED"] = "true" def test_load_reduced_meps_dataset(): @@ -83,49 +87,59 @@ def test_load_reduced_meps_dataset(): assert set(static_data.keys()) == required_props -def test_create_graph_reduced_meps_dataset(): - dataset_name = "meps_example_reduced" - static_dir_path = Path("data", dataset_name, "static") - graph_dir_path = Path("graphs", "hierarchial") - - # -- Static grid node features -- - xy_grid = np.load(static_dir_path / "nwp_xy.npy") - - # create the full graph - graph = wmg.create.archetype.create_oscarsson_hierarchical_graph( - xy_grid=xy_grid - ) - - # split the graph by component - graph_components = wmg.split_graph_by_edge_attribute( - graph=graph, attr="component" - ) +# def test_create_wmg_graph_reduced_meps_dataset(): +# dataset_name = "meps_example_reduced" +# static_dir_path = Path("data", dataset_name, "static") +# graph_dir_path = Path("graphs", "hierarchial") + +# # -- Static grid node features -- +# xy_grid = np.load(static_dir_path / "nwp_xy.npy") + +# # create the full graph +# graph = wmg.create.archetype.create_oscarsson_hierarchical_graph( +# xy_grid=xy_grid +# ) + +# # split the graph by component +# graph_components = wmg.split_graph_by_edge_attribute( +# graph=graph, attr="component" +# ) + +# m2m_graph = graph_components.pop("m2m") +# m2m_graph_components = wmg.split_graph_by_edge_attribute( +# graph=m2m_graph, attr="direction" +# ) +# m2m_graph_components = { +# f"m2m_{name}": graph for name, graph in m2m_graph_components.items() +# } +# graph_components.update(m2m_graph_components) + +# # save the graph components to disk in pytorch-geometric format +# for component_name, graph_component in graph_components.items(): +# kwargs = {} +# wmg.save.to_pyg( +# graph=graph_component, +# name=component_name, +# output_directory=graph_dir_path, +# **kwargs, +# ) - m2m_graph = graph_components.pop("m2m") - m2m_graph_components = wmg.split_graph_by_edge_attribute( - graph=m2m_graph, attr="direction" - ) - m2m_graph_components = { - f"m2m_{name}": graph for name, graph in m2m_graph_components.items() - } - graph_components.update(m2m_graph_components) - # save the graph components to disk in pytorch-geometric format - for component_name, graph_component in graph_components.items(): - kwargs = {} - wmg.save.to_pyg( - graph=graph_component, - name=component_name, - output_directory=graph_dir_path, - **kwargs, - ) +def test_create_graph_reduced_meps_dataset(): + args = [ + "--graph=hierarchical", + "--hierarchical=1", + "--data_config=data/meps_example_reduced/data_config.yaml", + "--levels=2", + ] + create_mesh(args) def test_train_model_reduced_meps_dataset(): args = [ "--model=hi_lam", "--data_config=data/meps_example_reduced/data_config.yaml", - "--n_workers=1", + "--n_workers=4", "--epochs=1", "--graph=hierarchical", "--hidden_dim=16", @@ -133,6 +147,9 @@ def test_train_model_reduced_meps_dataset(): "--processor_layers=1", "--ar_steps=1", "--eval=val", - "--wandb_project=None", + "--n_example_pred=0", ] - main(args) + train_model(args) + + +test_train_model_reduced_meps_dataset() From 1ebe9006737ce40603038ddedec5fb9fa9389d0d Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Thu, 23 May 2024 11:15:05 +0200 Subject: [PATCH 05/26] added github ci/cd for running tests with pytest --- .github/workflows/run_tests.yml | 35 +++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 .github/workflows/run_tests.yml diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml new file mode 100644 index 00000000..aac167a7 --- /dev/null +++ b/.github/workflows/run_tests.yml @@ -0,0 +1,35 @@ +name: Run Unit Test via Pytest + +on: + # trigger on pushes to any branch, but not main + push: + branches-ignore: + - main + # and also on PRs to main + pull_request: + branches: + - main + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9", "3.10", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Test with pytest + run: | + coverage run -m pytest -v -s + - name: Generate Coverage Report + run: | + coverage report -m From 0e96e8809653f132188cb1742c0adb0f19c5b72b Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Thu, 23 May 2024 11:27:41 +0200 Subject: [PATCH 06/26] removed coverage from test ci/cd --- .github/workflows/run_tests.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index aac167a7..1629c6f1 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -29,7 +29,4 @@ jobs: if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Test with pytest run: | - coverage run -m pytest -v -s - - name: Generate Coverage Report - run: | - coverage report -m + run -m pytest -v -s From 2339ed03e02ddd9f29632e2696e711f9a48352c3 Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Thu, 23 May 2024 11:28:31 +0200 Subject: [PATCH 07/26] fixed error in cicd --- .github/workflows/run_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 1629c6f1..f1dad637 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -29,4 +29,4 @@ jobs: if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Test with pytest run: | - run -m pytest -v -s + pytest -v -s From 5d3f83458029d524bafe091144a1c7978c6e1dd4 Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Thu, 23 May 2024 12:14:49 +0200 Subject: [PATCH 08/26] removed astroid from requirements, causes codespell error, assuming it will be installed anyway as a dependency during the other pip installs --- requirements.txt | 161 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 146 insertions(+), 15 deletions(-) diff --git a/requirements.txt b/requirements.txt index f381d54f..19cb5aae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,15 +1,146 @@ -# for all -numpy>=1.24.2 -wandb>=0.13.10 -matplotlib>=3.7.0 -scipy>=1.10.0 -pytorch-lightning>=2.0.3 -shapely>=2.0.1 -networkx>=3.0 -Cartopy>=0.22.0 -pyproj>=3.4.1 -tueplots>=0.0.8 -plotly>=5.15.0 - -# for dev -pre-commit>=2.15.0 +aiohttp==3.9.5 +aiosignal==1.3.1 +appnope==0.1.4; platform_system == "Darwin" +asciitree==0.3.3 +asttokens==2.4.1 +async-timeout==4.0.3; python_version < "3.11" +attrs==23.2.0 +black==24.4.2 +cartopy==0.23.0 +certifi==2024.2.2 +cffi==1.16.0; implementation_name == "pypy" +cfgv==3.4.0 +charset-normalizer==3.3.2 +click==8.1.7 +cloudpickle==3.0.0 +codespell==2.2.6 +colorama==0.4.6; sys_platform == "win32" or platform_system == "Windows" +comm==0.2.2 +contourpy==1.2.1 +cycler==0.12.1 +dask==2024.5.0 +debugpy==1.8.1 +decorator==5.1.1 +dill==0.3.8 +distlib==0.3.8 +docker-pycreds==0.4.0 +exceptiongroup==1.2.1; python_version < "3.11" +executing==2.0.1 +fasteners==0.19; sys_platform != "emscripten" +filelock==3.14.0 +flake8==7.0.0 +fonttools==4.51.0 +frozenlist==1.4.1 +fsspec==2024.5.0 +gitdb==4.0.11 +gitpython==3.1.43 +identify==2.5.36 +idna==3.7 +importlib-metadata==7.1.0; python_version < "3.12" +iniconfig==2.0.0 +intel-openmp==2021.4.0; platform_system == "Windows" +ipdb==0.13.13 +ipykernel==6.29.4 +ipython==8.24.0 +isort==5.13.2 +jedi==0.19.1 +jinja2==3.1.4 +joblib==1.4.2 +jupyter-client==8.6.1 +jupyter-core==5.7.2 +kiwisolver==1.4.5 +lightning-utilities==0.11.2 +locket==1.0.0 +loguru==0.7.2 +markupsafe==2.1.5 +matplotlib==3.9.0 +matplotlib-inline==0.1.7 +mccabe==0.7.0 +mkl==2021.4.0; platform_system == "Windows" +mpmath==1.3.0 +multidict==6.0.5 +mypy-extensions==1.0.0 +nest-asyncio==1.6.0 +networkx==3.3 +nodeenv==1.8.0 +numcodecs==0.12.1 +numpy==1.26.4 +nvidia-cublas-cu12==12.1.3.1; platform_system == "Linux" and platform_machine == "x86_64" +nvidia-cuda-cupti-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64" +nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64" +nvidia-cuda-runtime-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64" +nvidia-cudnn-cu12==8.9.2.26; platform_system == "Linux" and platform_machine == "x86_64" +nvidia-cufft-cu12==11.0.2.54; platform_system == "Linux" and platform_machine == "x86_64" +nvidia-curand-cu12==10.3.2.106; platform_system == "Linux" and platform_machine == "x86_64" +nvidia-cusolver-cu12==11.4.5.107; platform_system == "Linux" and platform_machine == "x86_64" +nvidia-cusparse-cu12==12.1.0.106; platform_system == "Linux" and platform_machine == "x86_64" +nvidia-nccl-cu12==2.20.5; platform_system == "Linux" and platform_machine == "x86_64" +nvidia-nvjitlink-cu12==12.4.127; platform_system == "Linux" and platform_machine == "x86_64" +nvidia-nvtx-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64" +packaging==24.0 +pandas==2.2.2 +parso==0.8.4 +partd==1.4.2 +pathspec==0.12.1 +pexpect==4.9.0; sys_platform != "win32" and sys_platform != "emscripten" +pillow==10.3.0 +platformdirs==4.2.2 +plotly==5.22.0 +pluggy==1.5.0 +pre-commit==3.7.1 +prompt-toolkit==3.0.43 +protobuf==4.25.3; python_version > "3.9" or sys_platform != "linux" +psutil==5.9.8 +ptyprocess==0.7.0; sys_platform != "win32" and sys_platform != "emscripten" +pure-eval==0.2.2 +pycodestyle==2.11.1 +pycparser==2.22; implementation_name == "pypy" +pyflakes==3.2.0 +pygments==2.18.0 +pylint==3.2.0 +pyparsing==3.1.2 +pyproj==3.6.1 +pyshp==2.3.1 +pytest==8.2.0 +python-dateutil==2.9.0.post0 +pytorch-lightning==2.2.4 +pytz==2024.1 +pywin32==306; sys_platform == "win32" and platform_python_implementation != "PyPy" +pyyaml==6.0.1 +pyzmq==26.0.3 +requests==2.31.0 +scikit-learn==1.4.2 +scipy==1.13.0 +sentry-sdk==2.2.0 +setproctitle==1.3.3 +setuptools==69.5.1 +shapely==2.0.4 +six==1.16.0 +smmap==5.0.1 +stack-data==0.6.3 +sympy==1.12 +tbb==2021.12.0; platform_system == "Windows" +tenacity==8.3.0 +threadpoolctl==3.5.0 +tomli==2.0.1; python_version < "3.11" +tomlkit==0.12.5 +toolz==0.12.1 +torch==2.3.0 +torch-geometric==2.5.3 +torchmetrics==1.4.0.post0 +tornado==6.4 +tqdm==4.66.4 +traitlets==5.14.3 +triton==2.3.0; platform_system == "Linux" and platform_machine == "x86_64" and python_version < "3.12" +tueplots==0.0.15 +typing-extensions==4.11.0 +tzdata==2024.1 +urllib3==2.2.1 +virtualenv==20.26.2 +wandb==0.17.0 +wcwidth==0.2.13 +win32-setctime==1.1.0; sys_platform == "win32" +xarray==2024.5.0 +yarl==1.9.4 +zarr==2.18.0 +zipp==3.18.2; python_version < "3.12" From 8d733b7f56ef2245e1eb1950f850cf61b131c78e Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Thu, 23 May 2024 12:29:37 +0200 Subject: [PATCH 09/26] simplified requirements --- requirements.txt | 172 +++++++---------------------------------------- 1 file changed, 26 insertions(+), 146 deletions(-) diff --git a/requirements.txt b/requirements.txt index 19cb5aae..964483c9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,146 +1,26 @@ -aiohttp==3.9.5 -aiosignal==1.3.1 -appnope==0.1.4; platform_system == "Darwin" -asciitree==0.3.3 -asttokens==2.4.1 -async-timeout==4.0.3; python_version < "3.11" -attrs==23.2.0 -black==24.4.2 -cartopy==0.23.0 -certifi==2024.2.2 -cffi==1.16.0; implementation_name == "pypy" -cfgv==3.4.0 -charset-normalizer==3.3.2 -click==8.1.7 -cloudpickle==3.0.0 -codespell==2.2.6 -colorama==0.4.6; sys_platform == "win32" or platform_system == "Windows" -comm==0.2.2 -contourpy==1.2.1 -cycler==0.12.1 -dask==2024.5.0 -debugpy==1.8.1 -decorator==5.1.1 -dill==0.3.8 -distlib==0.3.8 -docker-pycreds==0.4.0 -exceptiongroup==1.2.1; python_version < "3.11" -executing==2.0.1 -fasteners==0.19; sys_platform != "emscripten" -filelock==3.14.0 -flake8==7.0.0 -fonttools==4.51.0 -frozenlist==1.4.1 -fsspec==2024.5.0 -gitdb==4.0.11 -gitpython==3.1.43 -identify==2.5.36 -idna==3.7 -importlib-metadata==7.1.0; python_version < "3.12" -iniconfig==2.0.0 -intel-openmp==2021.4.0; platform_system == "Windows" -ipdb==0.13.13 -ipykernel==6.29.4 -ipython==8.24.0 -isort==5.13.2 -jedi==0.19.1 -jinja2==3.1.4 -joblib==1.4.2 -jupyter-client==8.6.1 -jupyter-core==5.7.2 -kiwisolver==1.4.5 -lightning-utilities==0.11.2 -locket==1.0.0 -loguru==0.7.2 -markupsafe==2.1.5 -matplotlib==3.9.0 -matplotlib-inline==0.1.7 -mccabe==0.7.0 -mkl==2021.4.0; platform_system == "Windows" -mpmath==1.3.0 -multidict==6.0.5 -mypy-extensions==1.0.0 -nest-asyncio==1.6.0 -networkx==3.3 -nodeenv==1.8.0 -numcodecs==0.12.1 -numpy==1.26.4 -nvidia-cublas-cu12==12.1.3.1; platform_system == "Linux" and platform_machine == "x86_64" -nvidia-cuda-cupti-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64" -nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64" -nvidia-cuda-runtime-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64" -nvidia-cudnn-cu12==8.9.2.26; platform_system == "Linux" and platform_machine == "x86_64" -nvidia-cufft-cu12==11.0.2.54; platform_system == "Linux" and platform_machine == "x86_64" -nvidia-curand-cu12==10.3.2.106; platform_system == "Linux" and platform_machine == "x86_64" -nvidia-cusolver-cu12==11.4.5.107; platform_system == "Linux" and platform_machine == "x86_64" -nvidia-cusparse-cu12==12.1.0.106; platform_system == "Linux" and platform_machine == "x86_64" -nvidia-nccl-cu12==2.20.5; platform_system == "Linux" and platform_machine == "x86_64" -nvidia-nvjitlink-cu12==12.4.127; platform_system == "Linux" and platform_machine == "x86_64" -nvidia-nvtx-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64" -packaging==24.0 -pandas==2.2.2 -parso==0.8.4 -partd==1.4.2 -pathspec==0.12.1 -pexpect==4.9.0; sys_platform != "win32" and sys_platform != "emscripten" -pillow==10.3.0 -platformdirs==4.2.2 -plotly==5.22.0 -pluggy==1.5.0 -pre-commit==3.7.1 -prompt-toolkit==3.0.43 -protobuf==4.25.3; python_version > "3.9" or sys_platform != "linux" -psutil==5.9.8 -ptyprocess==0.7.0; sys_platform != "win32" and sys_platform != "emscripten" -pure-eval==0.2.2 -pycodestyle==2.11.1 -pycparser==2.22; implementation_name == "pypy" -pyflakes==3.2.0 -pygments==2.18.0 -pylint==3.2.0 -pyparsing==3.1.2 -pyproj==3.6.1 -pyshp==2.3.1 -pytest==8.2.0 -python-dateutil==2.9.0.post0 -pytorch-lightning==2.2.4 -pytz==2024.1 -pywin32==306; sys_platform == "win32" and platform_python_implementation != "PyPy" -pyyaml==6.0.1 -pyzmq==26.0.3 -requests==2.31.0 -scikit-learn==1.4.2 -scipy==1.13.0 -sentry-sdk==2.2.0 -setproctitle==1.3.3 -setuptools==69.5.1 -shapely==2.0.4 -six==1.16.0 -smmap==5.0.1 -stack-data==0.6.3 -sympy==1.12 -tbb==2021.12.0; platform_system == "Windows" -tenacity==8.3.0 -threadpoolctl==3.5.0 -tomli==2.0.1; python_version < "3.11" -tomlkit==0.12.5 -toolz==0.12.1 -torch==2.3.0 -torch-geometric==2.5.3 -torchmetrics==1.4.0.post0 -tornado==6.4 -tqdm==4.66.4 -traitlets==5.14.3 -triton==2.3.0; platform_system == "Linux" and platform_machine == "x86_64" and python_version < "3.12" -tueplots==0.0.15 -typing-extensions==4.11.0 -tzdata==2024.1 -urllib3==2.2.1 -virtualenv==20.26.2 -wandb==0.17.0 -wcwidth==0.2.13 -win32-setctime==1.1.0; sys_platform == "win32" -xarray==2024.5.0 -yarl==1.9.4 -zarr==2.18.0 -zipp==3.18.2; python_version < "3.12" +# for all +numpy>=1.24.2 +wandb>=0.13.10 +matplotlib>=3.7.0 +scipy>=1.10.0 +pytorch-lightning>=2.0.3 +shapely>=2.0.1 +networkx>=3.0 +Cartopy>=0.22.0 +pyproj>=3.4.1 +tueplots>=0.0.8 +plotly>=5.15.0 +torch-geometric>=2.5.2, +loguru>=0.7.2, +xarray>=2024.3.0, +zarr>=2.17.2, +dask>=2024.4.2, + +# for dev +pre-commit>=2.15.0 +codespell>=2.0.0, +black>=21.9b0, +isort>=5.9.3, +flake8>=4.0.1, +pylint>=3.0.3, +pytest>=8.1.1, From 7ee8821d07854af130946d1b8cfe87e4ca650d96 Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Thu, 23 May 2024 12:32:59 +0200 Subject: [PATCH 10/26] removed commas in requirements --- requirements.txt | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/requirements.txt b/requirements.txt index 964483c9..5e735032 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,17 +10,17 @@ Cartopy>=0.22.0 pyproj>=3.4.1 tueplots>=0.0.8 plotly>=5.15.0 -torch-geometric>=2.5.2, -loguru>=0.7.2, -xarray>=2024.3.0, -zarr>=2.17.2, -dask>=2024.4.2, +torch-geometric>=2.5.2 +loguru>=0.7.2 +xarray>=2024.3.0 +zarr>=2.17.2 +dask>=2024.4.2 # for dev pre-commit>=2.15.0 -codespell>=2.0.0, -black>=21.9b0, -isort>=5.9.3, -flake8>=4.0.1, -pylint>=3.0.3, -pytest>=8.1.1, +codespell>=2.0.0 +black>=21.9b0 +isort>=5.9.3 +flake8>=4.0.1 +pylint>=3.0.3 +pytest>=8.1.1 From 9a5f83cdde14c24d824cec9cecb354d63ff6db5b Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Fri, 24 May 2024 11:24:59 +0200 Subject: [PATCH 11/26] added downloading of test data from EWC using pooch --- tests/test_mllam_dataset.py | 66 ++++++++++++------------------------- 1 file changed, 21 insertions(+), 45 deletions(-) diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py index bd638c78..d3934fc6 100644 --- a/tests/test_mllam_dataset.py +++ b/tests/test_mllam_dataset.py @@ -1,6 +1,9 @@ # Standard library import os +# Third-party +import pooch + # First-party from create_mesh import main as create_mesh from neural_lam.config import Config @@ -8,12 +11,26 @@ from neural_lam.weather_dataset import WeatherDataset from train_model import main as train_model -# from pathlib import Path -# import numpy as np -# import weather_model_graphs as wmg +os.environ["WANDB_DISABLED"] = "true" -os.environ["WANDB_DISABLED"] = "true" +def test_retrieve_data_ewc(): + # Initializing variables for the client + S3_BUCKET_NAME = "mllam-testdata" + S3_ENDPOINT_URL = "https://object-store.os-api.cci1.ecmwf.int" + S3_FILE_PATH = "neural-lam/npy/meps_example_reduced.v0.1.0.zip" + S3_FULL_PATH = "/".join([S3_ENDPOINT_URL, S3_BUCKET_NAME, S3_FILE_PATH]) + known_hash = ( + "80903c4012018797eaa9f2818b6a205d1d2d3122297a15359f6343f54eddcb18" + ) + + pooch.retrieve( + url=S3_FULL_PATH, + known_hash=known_hash, + processor=pooch.Unzip(extract_dir=""), + path="data", + fname="meps_example_reduced.zip", + ) def test_load_reduced_meps_dataset(): @@ -87,44 +104,6 @@ def test_load_reduced_meps_dataset(): assert set(static_data.keys()) == required_props -# def test_create_wmg_graph_reduced_meps_dataset(): -# dataset_name = "meps_example_reduced" -# static_dir_path = Path("data", dataset_name, "static") -# graph_dir_path = Path("graphs", "hierarchial") - -# # -- Static grid node features -- -# xy_grid = np.load(static_dir_path / "nwp_xy.npy") - -# # create the full graph -# graph = wmg.create.archetype.create_oscarsson_hierarchical_graph( -# xy_grid=xy_grid -# ) - -# # split the graph by component -# graph_components = wmg.split_graph_by_edge_attribute( -# graph=graph, attr="component" -# ) - -# m2m_graph = graph_components.pop("m2m") -# m2m_graph_components = wmg.split_graph_by_edge_attribute( -# graph=m2m_graph, attr="direction" -# ) -# m2m_graph_components = { -# f"m2m_{name}": graph for name, graph in m2m_graph_components.items() -# } -# graph_components.update(m2m_graph_components) - -# # save the graph components to disk in pytorch-geometric format -# for component_name, graph_component in graph_components.items(): -# kwargs = {} -# wmg.save.to_pyg( -# graph=graph_component, -# name=component_name, -# output_directory=graph_dir_path, -# **kwargs, -# ) - - def test_create_graph_reduced_meps_dataset(): args = [ "--graph=hierarchical", @@ -150,6 +129,3 @@ def test_train_model_reduced_meps_dataset(): "--n_example_pred=0", ] train_model(args) - - -test_train_model_reduced_meps_dataset() From c7d1d08053c3fd529270b6990a1cc3156dead0cf Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Fri, 24 May 2024 11:30:02 +0200 Subject: [PATCH 12/26] added pooch to requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 5e735032..ef3c39f7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,3 +24,4 @@ isort>=5.9.3 flake8>=4.0.1 pylint>=3.0.3 pytest>=8.1.1 +pooch>=1.8.1 From 2667b6cdb852c122f2c1672e407cfa6e24bb23a8 Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Fri, 24 May 2024 12:00:05 +0200 Subject: [PATCH 13/26] updated test dataset --- tests/test_mllam_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py index d3934fc6..bf0123e4 100644 --- a/tests/test_mllam_dataset.py +++ b/tests/test_mllam_dataset.py @@ -21,7 +21,7 @@ def test_retrieve_data_ewc(): S3_FILE_PATH = "neural-lam/npy/meps_example_reduced.v0.1.0.zip" S3_FULL_PATH = "/".join([S3_ENDPOINT_URL, S3_BUCKET_NAME, S3_FILE_PATH]) known_hash = ( - "80903c4012018797eaa9f2818b6a205d1d2d3122297a15359f6343f54eddcb18" + "7d80f0d8c3022aa8c0331f26a17566b44b4b33a5d9a60f6d2e60bf65ed857d86" ) pooch.retrieve( From 0c7edd4daac3365f70afcb5c07bc77e5aadadfec Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Mon, 27 May 2024 13:25:43 +0200 Subject: [PATCH 14/26] Disabled latex to enable running on github without having to install latex --- tests/test_mllam_dataset.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py index bf0123e4..c43ac6aa 100644 --- a/tests/test_mllam_dataset.py +++ b/tests/test_mllam_dataset.py @@ -3,6 +3,7 @@ # Third-party import pooch +from matplotlib import rcParams # First-party from create_mesh import main as create_mesh @@ -12,6 +13,7 @@ from train_model import main as train_model os.environ["WANDB_DISABLED"] = "true" +rcParams["text.usetex"] = False def test_retrieve_data_ewc(): From 9352949a631ccc22fd7eb6fb1957bcbb80fc4be4 Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Mon, 27 May 2024 14:08:05 +0200 Subject: [PATCH 15/26] only use latex if available --- neural_lam/utils.py | 4 +++- tests/test_mllam_dataset.py | 4 +--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/neural_lam/utils.py b/neural_lam/utils.py index 836b04ed..5cd2f9ad 100644 --- a/neural_lam/utils.py +++ b/neural_lam/utils.py @@ -1,5 +1,6 @@ # Standard library import os +import shutil # Third-party import numpy as np @@ -250,7 +251,8 @@ def fractional_plot_bundle(fraction): Get the tueplots bundle, but with figure width as a fraction of the page width. """ - bundle = bundles.neurips2023(usetex=True, family="serif") + usetex = True if shutil.which("latex") else False + bundle = bundles.neurips2023(usetex=usetex, family="serif") bundle.update(figsizes.neurips2023()) original_figsize = bundle["figure.figsize"] bundle["figure.figsize"] = ( diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py index c43ac6aa..06a75a93 100644 --- a/tests/test_mllam_dataset.py +++ b/tests/test_mllam_dataset.py @@ -3,7 +3,6 @@ # Third-party import pooch -from matplotlib import rcParams # First-party from create_mesh import main as create_mesh @@ -13,7 +12,6 @@ from train_model import main as train_model os.environ["WANDB_DISABLED"] = "true" -rcParams["text.usetex"] = False def test_retrieve_data_ewc(): @@ -23,7 +21,7 @@ def test_retrieve_data_ewc(): S3_FILE_PATH = "neural-lam/npy/meps_example_reduced.v0.1.0.zip" S3_FULL_PATH = "/".join([S3_ENDPOINT_URL, S3_BUCKET_NAME, S3_FILE_PATH]) known_hash = ( - "7d80f0d8c3022aa8c0331f26a17566b44b4b33a5d9a60f6d2e60bf65ed857d86" + "98c7a2f442922de40c6891fe3e5d190346889d6e0e97550170a82a7ce58a72b7" ) pooch.retrieve( From 4995de03e9542b6ea792a297f6264d26c5de514e Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Wed, 29 May 2024 00:00:31 +0200 Subject: [PATCH 16/26] included change requests from leifdenby: - removed linting dependencies - minor changes to test file - added notebook outlining generation of meps_example_reduced from meps_example --- DEVELOPING.ipynb | 237 ++++++++++++++++++++++++++++++++++++ requirements.txt | 5 - tests/test_mllam_dataset.py | 25 ++-- 3 files changed, 252 insertions(+), 15 deletions(-) create mode 100644 DEVELOPING.ipynb diff --git a/DEVELOPING.ipynb b/DEVELOPING.ipynb new file mode 100644 index 00000000..eef1268f --- /dev/null +++ b/DEVELOPING.ipynb @@ -0,0 +1,237 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Creating meps_example_reduced\n", + "This notebook outlines how the small-size test dataset meps_example_reduced was created based on the slightly larger dataset meps_example. The zipped up datasets are 263 MB and 2.6 GB, respectively.\n", + "\n", + "The dataset was reduced in size by reducing the number of grid points and variables.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Standard library\n", + "import os\n", + "\n", + "# Third-party\n", + "import numpy as np\n", + "import torch" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "The number of grid points was reduced to 1/4 by halving the number of coordinates in both the x and y direction. This was done by removing a quarter of the grid points along each outer edge, so the center grid points would stay centered in the new set.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load existing grid\n", + "grid_xy = np.load('data/meps_example/static/nwp_xy.npy')\n", + "# Get slices in each dimension by cutting off a quarter along each edge\n", + "num_x, num_y = grid_xy.shape[1:]\n", + "x_slice = slice(num_x//4, 3*num_x//4)\n", + "y_slice = slice(num_y//4, 3*num_y//4)\n", + "# Index and save reduced grid\n", + "grid_xy_reduced = grid_xy[:, x_slice, y_slice]\n", + "np.save('data/meps_example_reduced/static/nwp_xy.npy', grid_xy_reduced)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "This cut out the border, so a new perimeter of 10 grid points was established as border (10 was also the border size in the original \"meps_example\").\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Outer 10 grid points are border\n", + "old_border_mask = np.load('data/meps_example/static/border_mask.npy')\n", + "assert np.all(old_border_mask[10:-10, 10:-10] == False)\n", + "assert np.all(old_border_mask[:10, :] == True)\n", + "assert np.all(old_border_mask[:, :10] == True)\n", + "assert np.all(old_border_mask[-10:,:] == True)\n", + "assert np.all(old_border_mask[:,-10:] == True)\n", + "\n", + "# Create new array with False everywhere but the outer 10 grid points\n", + "border_mask = np.zeros_like(grid_xy_reduced[0,:,:], dtype=bool)\n", + "border_mask[:10] = True\n", + "border_mask[:,:10] = True\n", + "border_mask[-10:] = True\n", + "border_mask[:,-10:] = True\n", + "np.save('data/meps_example_reduced/static/border_mask.npy', border_mask)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A few other files also needed to be copied using only the new reduced grid" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load surface_geopotential.npy, index only values from the reduced grid, and save to new file\n", + "surface_geopotential = np.load('data/meps_example/static/surface_geopotential.npy')\n", + "surface_geopotential_reduced = surface_geopotential[x_slice, y_slice]\n", + "np.save('data/meps_example_reduced/static/surface_geopotential.npy', surface_geopotential_reduced)\n", + "\n", + "# Load pytorch file grid_features.pt\n", + "grid_features = torch.load('data/meps_example/static/grid_features.pt')\n", + "# Index only values from the reduced grid. \n", + "# First reshape from (num_grid_points_total, 4) to (num_grid_points_x, num_grid_points_y, 4), \n", + "# then index, then reshape back to new total number of grid points\n", + "print(grid_features.shape)\n", + "grid_features_new = grid_features.reshape(num_x, num_y, 4)[x_slice,y_slice,:].reshape((-1, 4))\n", + "# Save to new file\n", + "torch.save(grid_features_new, 'data/meps_example_reduced/static/grid_features.pt')\n", + "\n", + "# flux_stats.pt is just a vector of length 2, so the grid shape and variable changes does not change this file\n", + "torch.save(torch.load('data/meps_example/static/flux_stats.pt'), 'data/meps_example_reduced/static/flux_stats.pt')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "The number of variables was reduced by truncating the variable list to the first 8." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "num_vars = 8\n", + "\n", + "# Load parameter_weights.npy, truncate to first 8 variables, and save to new file\n", + "parameter_weights = np.load('data/meps_example/static/parameter_weights.npy')\n", + "parameter_weights_reduced = parameter_weights[:num_vars]\n", + "np.save('data/meps_example_reduced/static/parameter_weights.npy', parameter_weights_reduced)\n", + "\n", + "# Do the same for following 4 pytorch files\n", + "for file in ['diff_mean', 'diff_std', 'parameter_mean', 'parameter_std']:\n", + " old_file = torch.load(f'data/meps_example/static/{file}.pt')\n", + " new_file = old_file[:num_vars]\n", + " torch.save(new_file, f'data/meps_example_reduced/static/{file}.pt')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Lastly the files in each of the directories train, test, and val have to be reduced. The folders all have the same structure with files of the following types:\n", + "```\n", + "nwp_YYYYMMDDHH_mbrXXX.npy\n", + "wtr_YYYYMMDDHH.npy\n", + "nwp_toa_downwelling_shortwave_flux_YYYYMMDDHH.npy\n", + "```\n", + "with ```YYYYMMDDHH``` being some date with hours, and ```XXX``` being some 3-digit integer.\n", + "\n", + "The first type of file has x and y in dimensions 1 and 2, and variable index in dimension 3. Dimension 0 is unchanged.\n", + "The second type has has x and y in dimensions 1 and 2. Dimension 0 is unchanged.\n", + "The last type has just x and y as the only 2 dimensions.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(65, 268, 238, 18)\n", + "(65, 268, 238)\n" + ] + } + ], + "source": [ + "print(np.load('data/meps_example/samples/train/nwp_2022040100_mbr000.npy').shape)\n", + "print(np.load('data/meps_example/samples/train/nwp_toa_downwelling_shortwave_flux_2022040112.npy').shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following loop goes through each file in each sample folder and indexes them according to the dimensions given by the file name." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for sample in ['train', 'test', 'val']:\n", + " files = os.listdir(f'data/meps_example/samples/{sample}')\n", + "\n", + " for f in files:\n", + " data = np.load(f'data/meps_example/samples/{sample}/{f}')\n", + " if 'mbr' in f:\n", + " data = data[:,x_slice,y_slice,:num_vars]\n", + " elif 'wtr' in f:\n", + " data = data[x_slice, y_slice]\n", + " else:\n", + " data = data[:,x_slice,y_slice]\n", + " np.save(f'data/meps_example_reduced/samples/{sample}/{f}', data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Lastly, the file ```data_config.yaml``` is modified manually by truncating the variable units, long and short names, and setting the new grid shape. Also the unit descriptions containing ```^``` was automatically parsed using latex, and to avoid having to install latex in the GitHub CI/CD pipeline, this was changed to ```**```." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/requirements.txt b/requirements.txt index ef3c39f7..cc19cfc6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,10 +18,5 @@ dask>=2024.4.2 # for dev pre-commit>=2.15.0 -codespell>=2.0.0 -black>=21.9b0 -isort>=5.9.3 -flake8>=4.0.1 -pylint>=3.0.3 pytest>=8.1.1 pooch>=1.8.1 diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py index 06a75a93..d660ac20 100644 --- a/tests/test_mllam_dataset.py +++ b/tests/test_mllam_dataset.py @@ -11,22 +11,25 @@ from neural_lam.weather_dataset import WeatherDataset from train_model import main as train_model +# Disable weights and biases to avoid unnecessary logging +# and to avoid having to deal with authentication os.environ["WANDB_DISABLED"] = "true" +# Initializing variables for the s3 client +S3_BUCKET_NAME = "mllam-testdata" +S3_ENDPOINT_URL = "https://object-store.os-api.cci1.ecmwf.int" +S3_FILE_PATH = "neural-lam/npy/meps_example_reduced.v0.1.0.zip" +S3_FULL_PATH = "/".join([S3_ENDPOINT_URL, S3_BUCKET_NAME, S3_FILE_PATH]) +TEST_DATA_KNOWN_HASH = ( + "98c7a2f442922de40c6891fe3e5d190346889d6e0e97550170a82a7ce58a72b7" +) -def test_retrieve_data_ewc(): - # Initializing variables for the client - S3_BUCKET_NAME = "mllam-testdata" - S3_ENDPOINT_URL = "https://object-store.os-api.cci1.ecmwf.int" - S3_FILE_PATH = "neural-lam/npy/meps_example_reduced.v0.1.0.zip" - S3_FULL_PATH = "/".join([S3_ENDPOINT_URL, S3_BUCKET_NAME, S3_FILE_PATH]) - known_hash = ( - "98c7a2f442922de40c6891fe3e5d190346889d6e0e97550170a82a7ce58a72b7" - ) +def test_retrieve_data_ewc(): + # Download and unzip test data into data/meps_example_reduced pooch.retrieve( url=S3_FULL_PATH, - known_hash=known_hash, + known_hash=TEST_DATA_KNOWN_HASH, processor=pooch.Unzip(extract_dir=""), path="data", fname="meps_example_reduced.zip", @@ -34,6 +37,8 @@ def test_retrieve_data_ewc(): def test_load_reduced_meps_dataset(): + # The data_config.yaml file is downloaded and extracted in + # test_retrieve_data_ewc together with the dataset itself data_config_file = "data/meps_example_reduced/data_config.yaml" dataset_name = "meps_example_reduced" From d33180fa2a2358e8a108ff923e8168da7b4f2b4f Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Wed, 29 May 2024 00:06:17 +0200 Subject: [PATCH 17/26] added comment --- DEVELOPING.ipynb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/DEVELOPING.ipynb b/DEVELOPING.ipynb index eef1268f..484f56a1 100644 --- a/DEVELOPING.ipynb +++ b/DEVELOPING.ipynb @@ -209,7 +209,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Lastly, the file ```data_config.yaml``` is modified manually by truncating the variable units, long and short names, and setting the new grid shape. Also the unit descriptions containing ```^``` was automatically parsed using latex, and to avoid having to install latex in the GitHub CI/CD pipeline, this was changed to ```**```." + "Lastly, the file ```data_config.yaml``` is modified manually by truncating the variable units, long and short names, and setting the new grid shape. Also the unit descriptions containing ```^``` was automatically parsed using latex, and to avoid having to install latex in the GitHub CI/CD pipeline, this was changed to ```**```. \n", + "\n", + "This new config file was placed in ```data/meps_example_reduced```, and that directory was then zipped and placed in a European Weather Cloud S3 bucket." ] } ], From e6c2c36c7d1ee5bbc9a88d05ec5e0371cb9e7070 Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Thu, 30 May 2024 19:19:53 +0200 Subject: [PATCH 18/26] minor requested changes --- README.md | 2 ++ .../notebooks/create_reduced_meps_dataset.ipynb | 0 neural_lam/utils.py | 3 +++ requirements.txt | 4 ---- tests/test_mllam_dataset.py | 4 +++- 5 files changed, 8 insertions(+), 5 deletions(-) rename DEVELOPING.ipynb => docs/notebooks/create_reduced_meps_dataset.ipynb (100%) diff --git a/README.md b/README.md index ba0bb3fe..f16c940a 100644 --- a/README.md +++ b/README.md @@ -279,6 +279,8 @@ pre-commit run --all-files ``` from the root directory of the repository. +Furthermore, all tests in the ```tests``` directory will be run upon pushing changes by a github action. Failure in any of the tests will also reject the push/PR. + # Contact If you are interested in machine learning models for LAM, have questions about our implementation or ideas for extending it, feel free to get in touch. You can open a github issue on this page, or (if more suitable) send an email to [joel.oskarsson@liu.se](mailto:joel.oskarsson@liu.se). diff --git a/DEVELOPING.ipynb b/docs/notebooks/create_reduced_meps_dataset.ipynb similarity index 100% rename from DEVELOPING.ipynb rename to docs/notebooks/create_reduced_meps_dataset.ipynb diff --git a/neural_lam/utils.py b/neural_lam/utils.py index 5cd2f9ad..59a529eb 100644 --- a/neural_lam/utils.py +++ b/neural_lam/utils.py @@ -251,6 +251,9 @@ def fractional_plot_bundle(fraction): Get the tueplots bundle, but with figure width as a fraction of the page width. """ + # If latex is not available, some visualizations might not render correctly, + # but will at least not raise an error. + # Alternatively, use unicode raised numbers. usetex = True if shutil.which("latex") else False bundle = bundles.neurips2023(usetex=usetex, family="serif") bundle.update(figsizes.neurips2023()) diff --git a/requirements.txt b/requirements.txt index cc19cfc6..16a29a18 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,10 +11,6 @@ pyproj>=3.4.1 tueplots>=0.0.8 plotly>=5.15.0 torch-geometric>=2.5.2 -loguru>=0.7.2 -xarray>=2024.3.0 -zarr>=2.17.2 -dask>=2024.4.2 # for dev pre-commit>=2.15.0 diff --git a/tests/test_mllam_dataset.py b/tests/test_mllam_dataset.py index d660ac20..f91170c9 100644 --- a/tests/test_mllam_dataset.py +++ b/tests/test_mllam_dataset.py @@ -52,8 +52,10 @@ def test_load_reduced_meps_dataset(): assert len(var_names) == len(var_longnames) assert len(var_names) == len(var_units) - # TODO: can these two variables be loaded from elsewhere? + # in future the number of grid static features + # will be provided by the Dataset class itself n_grid_static_features = 4 + # Hardcoded in model n_input_steps = 2 n_forcing_features = config.values["dataset"]["num_forcing_features"] From 3d77ac4379ff4d34c560379397b5726e0c5f1edb Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Fri, 31 May 2024 09:20:31 +0200 Subject: [PATCH 19/26] updated changelog, added cicd badges --- CHANGELOG.md | 1 + README.md | 8 ++++++++ docs/notebooks/create_reduced_meps_dataset.ipynb | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 061aa6bb..6ec00858 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [unreleased](https://github.com/joeloskarsson/neural-lam/compare/v0.1.0...HEAD) ### Added +- Added tests for loading dataset, creating graph, and training model based on reduced MEPS dataset stored on AWS S3, along with automatic running of tests on push/PR to GitHub [/#38](https://github.com/mllam/neural-lam/pull/38) @SimonKamuk - Replaced `constants.py` with `data_config.yaml` for data configuration management [\#31](https://github.com/joeloskarsson/neural-lam/pull/31) diff --git a/README.md b/README.md index f16c940a..bd0d901f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,11 @@ +![Linting](https://github.com/SimonKamuk/neural-lam/actions/workflows/pre-commit.yml/badge.svg?branch=feature_add_tests) +![Automatic tests](https://github.com/SimonKamuk/neural-lam/actions/workflows/run_tests.yml/badge.svg?branch=feature_add_tests) + + +

diff --git a/docs/notebooks/create_reduced_meps_dataset.ipynb b/docs/notebooks/create_reduced_meps_dataset.ipynb index 484f56a1..daba23c4 100644 --- a/docs/notebooks/create_reduced_meps_dataset.ipynb +++ b/docs/notebooks/create_reduced_meps_dataset.ipynb @@ -5,7 +5,7 @@ "metadata": {}, "source": [ "# Creating meps_example_reduced\n", - "This notebook outlines how the small-size test dataset meps_example_reduced was created based on the slightly larger dataset meps_example. The zipped up datasets are 263 MB and 2.6 GB, respectively.\n", + "This notebook outlines how the small-size test dataset ```meps_example_reduced``` was created based on the slightly larger dataset ```meps_example```. The zipped up datasets are 263 MB and 2.6 GB, respectively. See [README.md](../../README.md) for info on how to download ```meps_example```.\n", "\n", "The dataset was reduced in size by reducing the number of grid points and variables.\n" ] From d39030825fad7f753cfbedd7638442204dde0e16 Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Fri, 31 May 2024 10:17:16 +0200 Subject: [PATCH 20/26] moved installation of torch-geometric from requirements to github test action --- .github/workflows/run_tests.yml | 1 + README.md | 5 ----- requirements.txt | 1 - 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index f1dad637..031a75f6 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -27,6 +27,7 @@ jobs: run: | python -m pip install --upgrade pip if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install torch-geometric>=2.5.2 - name: Test with pytest run: | pytest -v -s diff --git a/README.md b/README.md index bd0d901f..1bdc6602 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,5 @@ -![Linting](https://github.com/SimonKamuk/neural-lam/actions/workflows/pre-commit.yml/badge.svg?branch=feature_add_tests) -![Automatic tests](https://github.com/SimonKamuk/neural-lam/actions/workflows/run_tests.yml/badge.svg?branch=feature_add_tests) - -

diff --git a/requirements.txt b/requirements.txt index 16a29a18..9309eea4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,6 @@ Cartopy>=0.22.0 pyproj>=3.4.1 tueplots>=0.0.8 plotly>=5.15.0 -torch-geometric>=2.5.2 # for dev pre-commit>=2.15.0 From de4efba5fbf7f1a096a5b93b842c21313e33e09e Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Fri, 31 May 2024 10:29:43 +0200 Subject: [PATCH 21/26] changed name of unit test badge --- .github/workflows/run_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 031a75f6..02fcffa9 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -1,4 +1,4 @@ -name: Run Unit Test via Pytest +name: Unit Tests on: # trigger on pushes to any branch, but not main From b0c4bed7ba8cdfe7eec8b6b3e118ee9d5c880dbb Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Mon, 3 Jun 2024 07:11:20 +0200 Subject: [PATCH 22/26] added caching of test data --- .github/workflows/pre-commit.yml | 2 +- .github/workflows/run_tests.yml | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index dc519e5b..dadac50d 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -1,4 +1,4 @@ -name: lint +name: Linting on: # trigger on pushes to any branch, but not main diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 02fcffa9..4cbdfa4b 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -31,3 +31,10 @@ jobs: - name: Test with pytest run: | pytest -v -s + - name: Cache data + uses: actions/cache@v2 + with: + path: data + key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + restore-keys: | + ${{ runner.os }}-meps-reduced-example-data-v0.1.0 From 18e55a4e93981db0b189df79548b075be4803804 Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Mon, 3 Jun 2024 07:21:50 +0200 Subject: [PATCH 23/26] fix for caching --- .github/workflows/run_tests.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 4cbdfa4b..cc569f0a 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -33,7 +33,6 @@ jobs: pytest -v -s - name: Cache data uses: actions/cache@v2 - with: path: data key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 restore-keys: | From 4f753076e75a611c0687ccce61571082937fe505 Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Mon, 3 Jun 2024 10:36:40 +0200 Subject: [PATCH 24/26] tried fix for caching test data --- .github/workflows/run_tests.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index cc569f0a..33edb243 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -32,8 +32,9 @@ jobs: run: | pytest -v -s - name: Cache data - uses: actions/cache@v2 - path: data - key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 - restore-keys: | - ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + uses: actions/cache@v4 + with: + path: data + key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + restore-keys: | + ${{ runner.os }}-meps-reduced-example-data-v0.1.0 From aceb47c83fb9f60aaaaae3cca3db4d5003f586f6 Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Mon, 3 Jun 2024 11:34:12 +0200 Subject: [PATCH 25/26] updated changelog --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ec00858..757463f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [unreleased](https://github.com/joeloskarsson/neural-lam/compare/v0.1.0...HEAD) ### Added -- Added tests for loading dataset, creating graph, and training model based on reduced MEPS dataset stored on AWS S3, along with automatic running of tests on push/PR to GitHub [/#38](https://github.com/mllam/neural-lam/pull/38) @SimonKamuk +- Added tests for loading dataset, creating graph, and training model based on reduced MEPS dataset stored on AWS S3, along with automatic running of tests on push/PR to GitHub. Added caching of test data tp speed up running tests. + [/#38](https://github.com/mllam/neural-lam/pull/38) + @SimonKamuk - Replaced `constants.py` with `data_config.yaml` for data configuration management [\#31](https://github.com/joeloskarsson/neural-lam/pull/31) From a6f80899af180f9fc451fd07c4985401e7242d3f Mon Sep 17 00:00:00 2001 From: Simon Kamuk Christiansen Date: Mon, 3 Jun 2024 12:26:39 +0200 Subject: [PATCH 26/26] separated saving and restoring of cache --- .github/workflows/run_tests.yml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 33edb243..71bff3d3 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -28,13 +28,18 @@ jobs: python -m pip install --upgrade pip if [ -f requirements.txt ]; then pip install -r requirements.txt; fi pip install torch-geometric>=2.5.2 + - name: Load cache data + uses: actions/cache/restore@v4 + with: + path: data + key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 + restore-keys: | + ${{ runner.os }}-meps-reduced-example-data-v0.1.0 - name: Test with pytest run: | pytest -v -s - - name: Cache data - uses: actions/cache@v4 + - name: Save cache data + uses: actions/cache/save@v4 with: path: data key: ${{ runner.os }}-meps-reduced-example-data-v0.1.0 - restore-keys: | - ${{ runner.os }}-meps-reduced-example-data-v0.1.0