From f112e28b70cce99eef56e941c951c08b84fe5a63 Mon Sep 17 00:00:00 2001 From: Will Usher Date: Wed, 27 Mar 2019 17:10:19 +0000 Subject: [PATCH 01/61] Added get_state method to ResultsHandle class --- src/smif/data_layer/data_handle.py | 36 +++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/src/smif/data_layer/data_handle.py b/src/smif/data_layer/data_handle.py index f7382f901..587fb8b1f 100644 --- a/src/smif/data_layer/data_handle.py +++ b/src/smif/data_layer/data_handle.py @@ -12,8 +12,9 @@ from typing import Dict, List import numpy as np # type: ignore -from smif.data_layer.store import Store + from smif.data_layer.data_array import DataArray +from smif.data_layer.store import Store from smif.exception import SmifDataError from smif.metadata import RelativeTimestep @@ -32,11 +33,12 @@ def __init__(self, store: Store, modelrun_name, current_timestep, timesteps, mod Backing store for inputs, parameters, results modelrun_name : str Name of the current modelrun + current_timestep : str + timesteps : list model : Model Model which will use this DataHandle decision_iteration : int, default=None ID of the current Decision iteration - state : list, default=None """ self.logger = getLogger(__name__) self._store = store @@ -619,7 +621,7 @@ def write_coefficients(self, source_dim: str, destination_dim: str, data: np.nda class ResultsHandle(object): """Results access for decision modules """ - def __init__(self, store, modelrun_name, sos_model, current_timestep, timesteps=None, + def __init__(self, store : Store, modelrun_name, sos_model, current_timestep, timesteps=None, decision_iteration=None): self._store = store self._modelrun_name = modelrun_name @@ -689,3 +691,31 @@ def get_results(self, model_name, output_name, timestep, decision_iteration): decision_iteration) return results + + def get_state(self) -> List: + """The current state of the model + + If the DataHandle instance has a timestep, then state is + established from the state file. + + Returns + ------- + list of tuple + A list of (intervention name, build_year) installed in the current timestep + + Raises + ------ + ValueError + If self._current_timestep is None an error is raised. + """ + if self._current_timestep is None: + raise ValueError("You must pass a timestep value to get state") + else: + + sos_state = self._store.read_state( + self._modelrun_name, + self._current_timestep, + self._decision_iteration + ) + + return sos_state From 4671130bdc80684b2de81c0e7979a7d8953ce78b Mon Sep 17 00:00:00 2001 From: Will Usher Date: Fri, 29 Mar 2019 10:39:47 +0000 Subject: [PATCH 02/61] Added test for get_state method on ResultsHandle --- tests/data_layer/test_data_handle.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/data_layer/test_data_handle.py b/tests/data_layer/test_data_handle.py index a16d605b9..63f3636d1 100644 --- a/tests/data_layer/test_data_handle.py +++ b/tests/data_layer/test_data_handle.py @@ -6,6 +6,7 @@ import numpy as np from pytest import fixture, raises + from smif.data_layer import DataHandle from smif.data_layer.data_array import DataArray from smif.data_layer.data_handle import ResultsHandle @@ -851,6 +852,28 @@ class TestResultsHandle: """Get results from any model """ + def test_get_state(self, mock_store, mock_model, mock_sos_model): + """should get decision module state for given timestep/decision_iteration + + A call to ``get_state`` method on the data handle calls the read_state + method of the store with arguments for model run name, current + timestep and decision iteration. + """ + mock_store.read_state = Mock(return_value=[{'name': 'test', 'build_year': 2010}]) + mock_store.write_interventions('energy_demand', [{ + 'name': 'test', + 'capital_cost': {'value': 2500, 'unit': '£/GW'} + }]) + data_handle = ResultsHandle(mock_store, 1, mock_sos_model, + 2015, timesteps=[2015, 2020]) + expected = [{ + 'name': 'test', + 'build_year': 2010 + }] + actual = data_handle.get_state() + mock_store.read_state.assert_called_with(1, 2015, None) + assert actual == expected + def test_get_results_sos_model(self, mock_store, mock_model, mock_sos_model): """Get results from a sector model within a sos model """ From 3806b8cb2bda508f3837c387b00ad38e074bf03e Mon Sep 17 00:00:00 2001 From: Will Usher Date: Fri, 29 Mar 2019 12:10:07 +0000 Subject: [PATCH 03/61] Added typing to data_handle and below --- src/smif/data_layer/abstract_data_store.py | 14 ++++++--- .../data_layer/abstract_metadata_store.py | 5 +-- src/smif/data_layer/data_handle.py | 31 ++++++++++++------- src/smif/data_layer/store.py | 19 +++++++++--- src/smif/metadata/timestep.py | 5 +-- 5 files changed, 49 insertions(+), 25 deletions(-) diff --git a/src/smif/data_layer/abstract_data_store.py b/src/smif/data_layer/abstract_data_store.py index ee56c91d4..c8645d360 100644 --- a/src/smif/data_layer/abstract_data_store.py +++ b/src/smif/data_layer/abstract_data_store.py @@ -5,6 +5,9 @@ - results """ from abc import ABCMeta, abstractmethod +from typing import Dict, List + +from smif.data_layer.data_array import DataArray class DataStore(metaclass=ABCMeta): @@ -12,7 +15,7 @@ class DataStore(metaclass=ABCMeta): """ # region DataArray @abstractmethod - def read_scenario_variant_data(self, key, spec, timestep=None): + def read_scenario_variant_data(self, key, spec, timestep=None) -> DataArray: """Read data array Parameters @@ -148,7 +151,7 @@ def write_initial_conditions(self, key, initial_conditions): # region State @abstractmethod - def read_state(self, modelrun_name, timestep, decision_iteration=None): + def read_state(self, modelrun_name, timestep, decision_iteration=None) -> List[Dict]: """Read list of (name, build_year) for a given model_run, timestep, decision @@ -164,7 +167,10 @@ def read_state(self, modelrun_name, timestep, decision_iteration=None): """ @abstractmethod - def write_state(self, state, modelrun_name, timestep, decision_iteration=None): + def write_state(self, state: List[Dict], + modelrun_name: str, + timestep: int, + decision_iteration=None): """State is a list of decisions with name and build_year. State is output from the DecisionManager @@ -228,7 +234,7 @@ def write_coefficients(self, source_dim, destination_dim, data): # region Results @abstractmethod def read_results(self, modelrun_name, model_name, output_spec, timestep=None, - decision_iteration=None): + decision_iteration=None) -> DataArray: """Return results of a model from a model_run for a given output at a timestep and decision iteration diff --git a/src/smif/data_layer/abstract_metadata_store.py b/src/smif/data_layer/abstract_metadata_store.py index f9c2dfcd1..928953962 100644 --- a/src/smif/data_layer/abstract_metadata_store.py +++ b/src/smif/data_layer/abstract_metadata_store.py @@ -4,6 +4,7 @@ - dimension definitions """ from abc import ABCMeta, abstractmethod +from typing import List class MetadataStore(metaclass=ABCMeta): @@ -11,7 +12,7 @@ class MetadataStore(metaclass=ABCMeta): """ # region Units @abstractmethod - def read_unit_definitions(self): + def read_unit_definitions(self) -> List[str]: """Reads custom unit definitions Returns @@ -20,7 +21,7 @@ def read_unit_definitions(self): Pint-compatible unit definitions """ - def write_unit_definitions(self, definitions): + def write_unit_definitions(self, definitions: List[str]): """Reads custom unit definitions Parameters diff --git a/src/smif/data_layer/data_handle.py b/src/smif/data_layer/data_handle.py index 587fb8b1f..c289b5dc4 100644 --- a/src/smif/data_layer/data_handle.py +++ b/src/smif/data_layer/data_handle.py @@ -9,7 +9,7 @@ from copy import copy from logging import getLogger from types import MappingProxyType -from typing import Dict, List +from typing import Dict, List, Union import numpy as np # type: ignore @@ -341,7 +341,8 @@ def _get_result(self, dep, timestep, input_spec) -> DataArray: """ output_spec = copy(input_spec) output_spec.name = dep['source_output_name'] - self.logger.debug("Getting model result for %s via %s from %s", input_spec, dep, output_spec) + self.logger.debug("Getting model result for %s via %s from %s", + input_spec, dep, output_spec) try: data = self._store.read_results( self._modelrun_name, @@ -621,34 +622,38 @@ def write_coefficients(self, source_dim: str, destination_dim: str, data: np.nda class ResultsHandle(object): """Results access for decision modules """ - def __init__(self, store : Store, modelrun_name, sos_model, current_timestep, timesteps=None, + def __init__(self, store: Store, modelrun_name: str, sos_model, + current_timestep: int, timesteps=None, decision_iteration=None): self._store = store self._modelrun_name = modelrun_name self._sos_model = sos_model self._current_timestep = current_timestep - self._timesteps = timesteps - self._decision_iteration = decision_iteration + self._timesteps: List[int] = timesteps + self._decision_iteration: int = decision_iteration @property - def base_timestep(self): + def base_timestep(self) -> int: return self._timesteps[0] @property - def current_timestep(self): + def current_timestep(self) -> int: return self._current_timestep @property - def previous_timestep(self): + def previous_timestep(self) -> Union[None, int]: rel = RelativeTimestep.PREVIOUS return rel.resolve_relative_to(self._current_timestep, self._timesteps) @property - def decision_iteration(self): + def decision_iteration(self) -> int: return self._decision_iteration - def get_results(self, model_name, output_name, timestep, decision_iteration): + def get_results(self, model_name: str, + output_name: str, + timestep: Union[int, RelativeTimestep], + decision_iteration: int) -> DataArray: """Access model results Parameters @@ -666,7 +671,9 @@ def get_results(self, model_name, output_name, timestep, decision_iteration): """ # resolve timestep if hasattr(timestep, 'resolve_relative_to'): - timestep = timestep.resolve_relative_to(self._current_timestep, self._timesteps) + timestep: Union[int, None] = \ + timestep.resolve_relative_to(self._current_timestep, + self._timesteps) else: assert isinstance(timestep, int) and timestep <= self._current_timestep @@ -692,7 +699,7 @@ def get_results(self, model_name, output_name, timestep, decision_iteration): return results - def get_state(self) -> List: + def get_state(self) -> List[Dict]: """The current state of the model If the DataHandle instance has a timestep, then state is diff --git a/src/smif/data_layer/store.py b/src/smif/data_layer/store.py index cfe382bf9..2366595ff 100644 --- a/src/smif/data_layer/store.py +++ b/src/smif/data_layer/store.py @@ -17,9 +17,13 @@ from copy import deepcopy from logging import getLogger from operator import itemgetter +from typing import Dict, List, Optional import numpy as np # type: ignore + from smif.data_layer import DataArray +from smif.data_layer.abstract_data_store import DataStore +from smif.data_layer.abstract_metadata_store import MetadataStore from smif.data_layer.file import CSVDataStore, ParquetDataStore from smif.data_layer.validate import (validate_sos_model_config, validate_sos_model_format) @@ -36,7 +40,8 @@ class Store(): metadata_store: ~smif.data_layer.abstract_metadata_store.MetadataStore data_store: ~smif.data_layer.abstract_data_store.DataStore """ - def __init__(self, config_store, metadata_store, data_store, model_base_folder="."): + def __init__(self, config_store, metadata_store: MetadataStore, + data_store: DataStore, model_base_folder="."): self.logger = getLogger(__name__) self.config_store = config_store self.metadata_store = metadata_store @@ -387,7 +392,7 @@ def write_strategies(self, model_run_name, strategies): # # region Units - def read_unit_definitions(self): + def read_unit_definitions(self) -> List[str]: """Reads custom unit definitions Returns @@ -710,7 +715,7 @@ def read_all_initial_conditions(self, model_run_name): # endregion # region State - def read_state(self, model_run_name, timestep, decision_iteration=None): + def read_state(self, model_run_name, timestep, decision_iteration=None) -> List[Dict]: """Read list of (name, build_year) for a given model_run, timestep, decision @@ -789,8 +794,12 @@ def write_coefficients(self, source_dim: str, destination_dim: str, data: np.nda # endregion # region Results - def read_results(self, model_run_name, model_name, output_spec, timestep=None, - decision_iteration=None) -> DataArray: + def read_results(self, + model_run_name: str, + model_name: str, + output_spec: Spec, + timestep: Optional[int] = None, + decision_iteration: Optional[int] = None) -> DataArray: """Return results of a `model_name` in `model_run_name` for a given `output_name` Parameters diff --git a/src/smif/metadata/timestep.py b/src/smif/metadata/timestep.py index 490fe8ed1..c810d3300 100644 --- a/src/smif/metadata/timestep.py +++ b/src/smif/metadata/timestep.py @@ -7,6 +7,7 @@ self-dependency using a relative timestep. """ from enum import Enum +from typing import List, Union from smif.exception import SmifTimestepResolutionError @@ -31,7 +32,7 @@ def from_name(cls, name): return cls.ALL raise ValueError("Relative timestep '%s' is not recognised" % name) - def resolve_relative_to(self, timestep, timesteps): + def resolve_relative_to(self, timestep: int, timesteps: List[int]) -> Union[None, int]: """Resolve a relative timestep with respect to a given timestep and sequence of timesteps. @@ -61,7 +62,7 @@ def resolve_relative_to(self, timestep, timesteps): return relative_timestep -def element_before(element, list_): +def element_before(element: int, list_: List[int]) -> int: """Return the element before a given element in a list, or None if the given element is first or not in the list. """ From e5bed50844af2d7c248c8f5e759de18461f8b9d1 Mon Sep 17 00:00:00 2001 From: Will Usher Date: Fri, 29 Mar 2019 12:31:07 +0000 Subject: [PATCH 04/61] Made variable type hints compatible with Python 3.5 --- src/smif/data_layer/data_handle.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/smif/data_layer/data_handle.py b/src/smif/data_layer/data_handle.py index c289b5dc4..c94264513 100644 --- a/src/smif/data_layer/data_handle.py +++ b/src/smif/data_layer/data_handle.py @@ -9,7 +9,7 @@ from copy import copy from logging import getLogger from types import MappingProxyType -from typing import Dict, List, Union +from typing import Dict, List, Optional, Union import numpy as np # type: ignore @@ -623,15 +623,16 @@ class ResultsHandle(object): """Results access for decision modules """ def __init__(self, store: Store, modelrun_name: str, sos_model, - current_timestep: int, timesteps=None, - decision_iteration=None): + current_timestep: int, + timesteps: Optional[List[int]] = None, + decision_iteration: Optional[int] = None): self._store = store self._modelrun_name = modelrun_name self._sos_model = sos_model self._current_timestep = current_timestep - self._timesteps: List[int] = timesteps - self._decision_iteration: int = decision_iteration + self._timesteps = timesteps + self._decision_iteration = decision_iteration @property def base_timestep(self) -> int: @@ -671,9 +672,9 @@ def get_results(self, model_name: str, """ # resolve timestep if hasattr(timestep, 'resolve_relative_to'): - timestep: Union[int, None] = \ + timestep = \ timestep.resolve_relative_to(self._current_timestep, - self._timesteps) + self._timesteps) # type: Union[int, None] else: assert isinstance(timestep, int) and timestep <= self._current_timestep From e3cd363fb8f05db667c19600fc63516995b040ae Mon Sep 17 00:00:00 2001 From: Will Usher Date: Fri, 29 Mar 2019 15:04:26 +0000 Subject: [PATCH 05/61] Add arguments for timestep and decision iteration to get_state --- src/smif/data_layer/data_handle.py | 36 ++++++++++++---------------- tests/data_layer/test_data_handle.py | 2 +- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/src/smif/data_layer/data_handle.py b/src/smif/data_layer/data_handle.py index c94264513..d54ad4800 100644 --- a/src/smif/data_layer/data_handle.py +++ b/src/smif/data_layer/data_handle.py @@ -661,7 +661,7 @@ def get_results(self, model_name: str, ---------- model_name : str output_name : str - timestep : int + timestep : [int, RelativeTimestep] decision_iteration : int Returns @@ -672,11 +672,12 @@ def get_results(self, model_name: str, """ # resolve timestep if hasattr(timestep, 'resolve_relative_to'): - timestep = \ + timestep_value = \ timestep.resolve_relative_to(self._current_timestep, self._timesteps) # type: Union[int, None] else: assert isinstance(timestep, int) and timestep <= self._current_timestep + timestep_value = timestep if model_name in [model.name for model in self._sos_model.models]: results_model = self._sos_model.get_model(model_name) @@ -695,35 +696,28 @@ def get_results(self, model_name: str, results = self._store.read_results(self._modelrun_name, model_name, spec, - timestep, + timestep_value, decision_iteration) return results - def get_state(self) -> List[Dict]: - """The current state of the model + def get_state(self, timestep: int, decision_iteration: int) -> List[Dict]: + """Retrieve the pre-decision state of the model If the DataHandle instance has a timestep, then state is established from the state file. Returns ------- - list of tuple - A list of (intervention name, build_year) installed in the current timestep + List[Dict] + A list of {'name', 'build_year'} dictionaries showing the history of + decisions made up to this point - Raises - ------ - ValueError - If self._current_timestep is None an error is raised. """ - if self._current_timestep is None: - raise ValueError("You must pass a timestep value to get state") - else: - - sos_state = self._store.read_state( - self._modelrun_name, - self._current_timestep, - self._decision_iteration - ) + state = self._store.read_state( + self._modelrun_name, + timestep, + decision_iteration + ) - return sos_state + return state diff --git a/tests/data_layer/test_data_handle.py b/tests/data_layer/test_data_handle.py index 63f3636d1..8467051c8 100644 --- a/tests/data_layer/test_data_handle.py +++ b/tests/data_layer/test_data_handle.py @@ -870,7 +870,7 @@ def test_get_state(self, mock_store, mock_model, mock_sos_model): 'name': 'test', 'build_year': 2010 }] - actual = data_handle.get_state() + actual = data_handle.get_state(2015, None) mock_store.read_state.assert_called_with(1, 2015, None) assert actual == expected From f01b3efe8bbfac2c3b9bbc99e8310c5a88cc7aeb Mon Sep 17 00:00:00 2001 From: Will Usher Date: Fri, 29 Mar 2019 16:56:48 +0000 Subject: [PATCH 06/61] Decision module may access available interventions Added two layers of set operations in the DecisionManager and DecisionModule Consider set of all interventions $I$ Set of planned interventions $P \subset I$ Available interventions $A = P \cap I$ Decisions at time t $D_t \subset A - D_{t-1}$ --- src/smif/decision/decision.py | 45 +++++++++++++++++++++++++-------- tests/decision/test_decision.py | 9 ++++--- 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/src/smif/decision/decision.py b/src/smif/decision/decision.py index 2e5904eea..8d38d5799 100644 --- a/src/smif/decision/decision.py +++ b/src/smif/decision/decision.py @@ -16,6 +16,8 @@ import os from abc import ABCMeta, abstractmethod from logging import getLogger +from types import MappingProxyType +from typing import Dict, List from smif.data_layer.data_handle import ResultsHandle from smif.data_layer.model_loader import ModelLoader @@ -57,10 +59,10 @@ def __init__(self, store, timesteps, modelrun_name, sos_model): self._timesteps = timesteps self._decision_module = None - self._register = {} + self._register = {} # type: Dict for sector_model in sos_model.sector_models: self._register.update(self._store.read_interventions(sector_model.name)) - self.planned_interventions = {} + self.planned_interventions = set() # type: set strategies = self._store.read_strategies(modelrun_name) self.logger.info("%s strategies found", len(strategies)) @@ -93,7 +95,7 @@ def _set_up_pre_spec_planning(self, modelrun_name, strategies): pre_spec_planning = PreSpecified(self._timesteps, self._register, planned_interventions) - self.planned_interventions = {x['name'] for x in planned_interventions} + self.planned_interventions = set([x['name'] for x in planned_interventions]) return pre_spec_planning @@ -116,7 +118,7 @@ def _set_up_decision_modules(self, modelrun_name, strategies): os.path.join(self._store.model_base_folder, strategy['path'])) strategy['timesteps'] = self._timesteps # Pass a reference to the register of interventions - strategy['register'] = self.available_interventions + strategy['register'] = MappingProxyType(self.available_interventions) strategy['name'] = strategy['classname'] + '_' + strategy['type'] @@ -125,13 +127,21 @@ def _set_up_decision_modules(self, modelrun_name, strategies): self._decision_module = decision_module @property - def available_interventions(self): + def available_interventions(self) -> Dict[str, Dict]: """Returns a register of available interventions, i.e. those not planned + + """ edited_register = {name: self._register[name] for name in self._register.keys() - self.planned_interventions} - return edited_register + return MappingProxyType(edited_register) + + def update_planned_interventions(self, decisions: List[Dict]): + """Adds a list of decisions to the set of planned interventions + """ + for decision in decisions: + self.planned_interventions.add(decision['name']) def get_intervention(self, value): try: @@ -235,6 +245,7 @@ def get_and_save_decisions(self, iteration, timestep): decisions = [] if self._decision_module: decisions.extend(self._decision_module.get_decision(results_handle)) + self.update_planned_interventions(decisions) if self.pre_spec_planning: decisions.extend(self.pre_spec_planning.get_decision(results_handle)) @@ -267,23 +278,35 @@ class DecisionModule(metaclass=ABCMeta): """Current iteration of the decision module """ - def __init__(self, timesteps, register): + def __init__(self, timesteps: List[int], register: MappingProxyType): self.timesteps = timesteps self._register = register self.logger = getLogger(__name__) + self.decisions = set() # type: set - def __next__(self): + def __next__(self) -> List[Dict]: return self._get_next_decision_iteration() @property - def interventions(self): + def interventions(self) -> List: """Return the list of available interventions Returns ------- list """ - return self._register + edited_register = {name for name in self._register.keys() + - self.decisions} + return list(edited_register) + + def update_decisions(self, decisions: List[Dict]): + """Adds a list of decisions to the set of planned interventions + """ + for decision in decisions: + if decision['name'] in self.decisions: + msg = "Decision {} already exists in decision history" + raise ValueError(msg.format(decision['name'])) + self.decisions.add(decision['name']) def get_intervention(self, name): """Return an intervention dict @@ -299,7 +322,7 @@ def get_intervention(self, name): raise SmifDataNotFoundError(msg.format(name)) @abstractmethod - def _get_next_decision_iteration(self): + def _get_next_decision_iteration(self) -> List[Dict]: """Implement to return the next decision iteration Within a list of decision-iteration/timestep pairs, the assumption is diff --git a/tests/decision/test_decision.py b/tests/decision/test_decision.py index cb7f28300..e158ff621 100644 --- a/tests/decision/test_decision.py +++ b/tests/decision/test_decision.py @@ -1,6 +1,7 @@ from unittest.mock import Mock, PropertyMock from pytest import fixture, raises + from smif.decision.decision import DecisionManager, PreSpecified, RuleBased from smif.exception import SmifDataNotFoundError @@ -264,7 +265,7 @@ def test_generator(self): class TestDecisionManager(): @fixture(scope='function') - def decision_manager(self, empty_store): + def decision_manager(self, empty_store) -> DecisionManager: empty_store.write_model_run({'name': 'test', 'sos_model': 'test_sos_model'}) empty_store.write_sos_model({'name': 'test_sos_model', 'sector_models': []}) empty_store.write_strategies('test', []) @@ -275,7 +276,7 @@ def decision_manager(self, empty_store): df = DecisionManager(empty_store, [2010, 2015], 'test', sos_model) return df - def test_decision_manager_init(self, decision_manager): + def test_decision_manager_init(self, decision_manager: DecisionManager): df = decision_manager dm = df.decision_loop() bundle = next(dm) @@ -286,7 +287,7 @@ def test_decision_manager_init(self, decision_manager): with raises(StopIteration): next(dm) - def test_available_interventions(self, decision_manager): + def test_available_interventions(self, decision_manager: DecisionManager): df = decision_manager df._register = {'a': {'name': 'a'}, 'b': {'name': 'b'}, @@ -300,7 +301,7 @@ def test_available_interventions(self, decision_manager): assert df.available_interventions == expected - def test_get_intervention(self, decision_manager): + def test_get_intervention(self, decision_manager: DecisionManager): df = decision_manager df._register = {'a': {'name': 'a'}, 'b': {'name': 'b'}, From 4dd29963db282c1f4887a08b24c4a634220d1377 Mon Sep 17 00:00:00 2001 From: Will Usher Date: Mon, 1 Apr 2019 15:08:14 +0100 Subject: [PATCH 07/61] Added decisions property setter to decision module --- src/smif/decision/decision.py | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/src/smif/decision/decision.py b/src/smif/decision/decision.py index 8d38d5799..c74a096fe 100644 --- a/src/smif/decision/decision.py +++ b/src/smif/decision/decision.py @@ -282,7 +282,7 @@ def __init__(self, timesteps: List[int], register: MappingProxyType): self.timesteps = timesteps self._register = register self.logger = getLogger(__name__) - self.decisions = set() # type: set + self._decisions = set() # type: set def __next__(self) -> List[Dict]: return self._get_next_decision_iteration() @@ -291,6 +291,9 @@ def __next__(self) -> List[Dict]: def interventions(self) -> List: """Return the list of available interventions + Available interventions are the subset of interventions that have not + been implemented in a prior iteration or timestep + Returns ------- list @@ -299,14 +302,35 @@ def interventions(self) -> List: - self.decisions} return list(edited_register) + @property + def decisions(self) -> set: + """The set of historical decisions + + Returns + ------- + set + + Raises + ------ + ValueError + If a duplicate decision is added to the set of historical decisions + """ + return self._decisions + + @decisions.setter + def decisions(self, value: str): + if value in self._decisions: + msg = "Decision {} already exists in decision history" + raise ValueError(msg.format(value)) + else: + self._decisions.add(value) + def update_decisions(self, decisions: List[Dict]): """Adds a list of decisions to the set of planned interventions """ for decision in decisions: - if decision['name'] in self.decisions: - msg = "Decision {} already exists in decision history" - raise ValueError(msg.format(decision['name'])) - self.decisions.add(decision['name']) + self.decisions = decision['name'] + self.logger.debug("Internal record of state updated to: %s", self.decisions) def get_intervention(self, name): """Return an intervention dict From ba90956bbc2efd3603a701624187da70ce717ecc Mon Sep 17 00:00:00 2001 From: Will Usher Date: Wed, 3 Apr 2019 11:41:47 +0100 Subject: [PATCH 08/61] Intervention property should return a dict of available interventions --- src/smif/decision/decision.py | 10 +++++----- tests/decision/test_decision.py | 16 +++++++++++++--- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/smif/decision/decision.py b/src/smif/decision/decision.py index c74a096fe..d2998b999 100644 --- a/src/smif/decision/decision.py +++ b/src/smif/decision/decision.py @@ -135,7 +135,7 @@ def available_interventions(self) -> Dict[str, Dict]: edited_register = {name: self._register[name] for name in self._register.keys() - self.planned_interventions} - return MappingProxyType(edited_register) + return edited_register def update_planned_interventions(self, decisions: List[Dict]): """Adds a list of decisions to the set of planned interventions @@ -288,8 +288,8 @@ def __next__(self) -> List[Dict]: return self._get_next_decision_iteration() @property - def interventions(self) -> List: - """Return the list of available interventions + def interventions(self) -> Dict[str, Dict]: + """Return the collection of available interventions Available interventions are the subset of interventions that have not been implemented in a prior iteration or timestep @@ -298,9 +298,9 @@ def interventions(self) -> List: ------- list """ - edited_register = {name for name in self._register.keys() + edited_register = {name: self._register[name] for name in self._register.keys() - self.decisions} - return list(edited_register) + return edited_register @property def decisions(self) -> set: diff --git a/tests/decision/test_decision.py b/tests/decision/test_decision.py index e158ff621..b52e729a8 100644 --- a/tests/decision/test_decision.py +++ b/tests/decision/test_decision.py @@ -192,11 +192,21 @@ def test_timesteps_first_last(self): def test_interventions(self): - interventions = Mock() + available_interventions = {'test': {'name': 'test_intervention'}} timesteps = [2010, 2015, 2020] - dm = RuleBased(timesteps, interventions) - assert dm.interventions == interventions + dm = RuleBased(timesteps, available_interventions) + assert dm.interventions == available_interventions + + def test_interventions_planned(self): + + available_interventions = {'test': {'name': 'test_intervention'}, + 'planned': {'name': 'planned_intervention'}} + + timesteps = [2010, 2015, 2020] + dm = RuleBased(timesteps, available_interventions) + dm.update_decisions([{'name': 'planned'}]) + assert dm.interventions == {'test': {'name': 'test_intervention'}} def test_get_intervention(self): From 2587a06a4114d2ce59801caa66a20e1f6a7b7d8f Mon Sep 17 00:00:00 2001 From: Will Usher Date: Wed, 3 Apr 2019 17:02:27 +0100 Subject: [PATCH 09/61] DecisionManager updates pre-decision-state with new decisions --- src/smif/decision/decision.py | 109 ++++++++++++++++++++++++++-------- 1 file changed, 85 insertions(+), 24 deletions(-) diff --git a/src/smif/decision/decision.py b/src/smif/decision/decision.py index d2998b999..1b822d885 100644 --- a/src/smif/decision/decision.py +++ b/src/smif/decision/decision.py @@ -57,7 +57,7 @@ def __init__(self, store, timesteps, modelrun_name, sos_model): self._modelrun_name = modelrun_name self._sos_model = sos_model self._timesteps = timesteps - self._decision_module = None + self._decision_module = None # type: DecisionModule self._register = {} # type: Dict for sector_model in sos_model.sector_models: @@ -129,8 +129,6 @@ def _set_up_decision_modules(self, modelrun_name, strategies): @property def available_interventions(self) -> Dict[str, Dict]: """Returns a register of available interventions, i.e. those not planned - - """ edited_register = {name: self._register[name] for name in self._register.keys() - @@ -233,6 +231,21 @@ def get_and_save_decisions(self, iteration, timestep): --------- timestep : int iteration : int + + Notes + ----- + State contains all intervention names which are present in the system at + the given ``timestep`` for the current ``iteration``. This must include + planned interventions from a previous timestep that are still within + their lifetime, and interventions picked by a decision module in the + previous timesteps. + + After loading all historical interventions, and screening them to remove + interventions from the previous timestep that have reached the end + of their lifetime, new decisions are added to the list of current + interventions. + + Finally, the new state file is written to disk. """ results_handle = ResultsHandle( store=self._store, @@ -242,21 +255,44 @@ def get_and_save_decisions(self, iteration, timestep): timesteps=self._timesteps, decision_iteration=iteration ) - decisions = [] + + pre_decision_state = [] if self._decision_module: - decisions.extend(self._decision_module.get_decision(results_handle)) - self.update_planned_interventions(decisions) + pre_decision_state.extend(self._decision_module.get_previous_state( + results_handle)) if self.pre_spec_planning: - decisions.extend(self.pre_spec_planning.get_decision(results_handle)) + pre_decision_state.extend(self.pre_spec_planning.get_previous_state( + results_handle)) - self.logger.debug( - "Retrieved %s decisions from %s", len(decisions), str(self._decision_module)) + msg = "Pre-decision state at timestep %s and iteration %s:\n%s" + self.logger.debug(msg, + timestep, iteration, pre_decision_state) + + new_decisions = [] + if self._decision_module: + new_decisions.extend(self._decision_module.get_decision(results_handle)) + if self.pre_spec_planning: + new_decisions.extend(self.pre_spec_planning.get_decision(results_handle)) + + self.logger.debug("New decisions at timestep %s and iteration %s:\n%s", + timestep, iteration, new_decisions) + + if new_decisions: + # self.update_planned_interventions(new_decisions) + post_decision_state = pre_decision_state + new_decisions + else: + post_decision_state = pre_decision_state + + self.logger.debug("Post-decision state at timestep %s and iteration %s:\n%s", + timestep, iteration, post_decision_state) self.logger.debug( "Writing state for timestep %s and interation %s", timestep, iteration) - self._store.write_state(decisions, self._modelrun_name, timestep, iteration) + if not post_decision_state: + post_decision_state = [{'name': '', 'build_year': ''}] + self._store.write_state(post_decision_state, self._modelrun_name, timestep, iteration) class DecisionModule(metaclass=ABCMeta): @@ -287,6 +323,18 @@ def __init__(self, timesteps: List[int], register: MappingProxyType): def __next__(self) -> List[Dict]: return self._get_next_decision_iteration() + @property + def first_timestep(self): + return min(self.timesteps) + + @property + def last_timestep(self): + return max(self.timesteps) + + @abstractmethod + def get_previous_state(self, results_handle: ResultsHandle) -> List[Dict]: + pass + @property def interventions(self) -> Dict[str, Dict]: """Return the collection of available interventions @@ -375,7 +423,7 @@ def _get_next_decision_iteration(self) -> List[Dict]: raise NotImplementedError @abstractmethod - def get_decision(self, results_handle): + def get_decision(self, results_handle: ResultsHandle) -> List[Dict]: """Return decisions for a given timestep and decision iteration Parameters @@ -419,8 +467,19 @@ def _get_next_decision_iteration(self): 'timesteps': [x for x in self.timesteps] } - def get_decision(self, results_handle): - """Return a dict of intervention names built in timestep + def get_previous_state(self, results_handle: ResultsHandle) -> List[Dict]: + if results_handle.current_timestep > self.first_timestep: + prev_timestep = results_handle.previous_timestep + prev_iteration = results_handle.decision_iteration + return results_handle.get_state(prev_timestep, prev_iteration) + else: + return [] + + def get_decision(self, results_handle) -> List[Dict]: + """Return a dict of historical or planned interventions in current timestep + + Use lifetime attribute of named intervention to calculate if it is still + present in the current state of the system Arguments --------- @@ -441,8 +500,6 @@ def get_decision(self, results_handle): decisions = [] timestep = results_handle.current_timestep - assert isinstance(self._planned, list) - for intervention in self._planned: build_year = int(intervention['build_year']) @@ -522,6 +579,14 @@ def __init__(self, timesteps, register): self._max_iteration_by_timestep = {self.first_timestep: 0} self.logger = getLogger(__name__) + def get_previous_state(self, results_handle: ResultsHandle) -> List[Dict]: + if self.current_timestep > self.first_timestep: + prev_timestep = self.previous_timestep + prev_iteration = self.get_previous_year_iteration() + return results_handle.get_state(prev_timestep, prev_iteration) + else: + return [] + @property def next_timestep(self): index_current_timestep = self.timesteps.index(self.current_timestep) @@ -538,14 +603,6 @@ def previous_timestep(self): else: return None - @property - def first_timestep(self): - return min(self.timesteps) - - @property - def last_timestep(self): - return max(self.timesteps) - def _get_next_decision_iteration(self): if self.satisfied and (self.current_timestep == self.last_timestep): return None @@ -560,6 +617,10 @@ def _get_next_decision_iteration(self): self.current_iteration += 1 return self._make_bundle() + def get_previous_year_iteration(self): + iteration = self._max_iteration_by_timestep[self.previous_timestep] + return iteration + def _make_bundle(self): bundle = {'decision_iterations': [self.current_iteration], 'timesteps': [self.current_timestep]} @@ -571,5 +632,5 @@ def _make_bundle(self): } return bundle - def get_decision(self, results_handle): + def get_decision(self, results_handle) -> List[Dict]: return [] From 5f3131aff7e3bab97f372e0845f7620cb65a9491 Mon Sep 17 00:00:00 2001 From: Will Usher Date: Thu, 4 Apr 2019 11:05:48 +0100 Subject: [PATCH 10/61] Tests passing (fixed previous state under pre-spec only) --- src/smif/decision/decision.py | 88 ++++++++++++++++++++++----------- tests/decision/test_decision.py | 44 +++++++++++++++++ 2 files changed, 104 insertions(+), 28 deletions(-) diff --git a/src/smif/decision/decision.py b/src/smif/decision/decision.py index 1b822d885..ad2774f7c 100644 --- a/src/smif/decision/decision.py +++ b/src/smif/decision/decision.py @@ -17,11 +17,12 @@ from abc import ABCMeta, abstractmethod from logging import getLogger from types import MappingProxyType -from typing import Dict, List +from typing import Dict, List, Tuple from smif.data_layer.data_handle import ResultsHandle from smif.data_layer.model_loader import ModelLoader -from smif.exception import SmifDataNotFoundError +from smif.data_layer.store import Store +from smif.exception import SmifDataNotFoundError, SmifTimestepResolutionError class DecisionManager(object): @@ -49,7 +50,10 @@ class DecisionManager(object): store: smif.data_layer.store.Store """ - def __init__(self, store, timesteps, modelrun_name, sos_model): + def __init__(self, store: Store, + timesteps: List[int], + modelrun_name: str, + sos_model): self.logger = getLogger(__name__) @@ -57,7 +61,7 @@ def __init__(self, store, timesteps, modelrun_name, sos_model): self._modelrun_name = modelrun_name self._sos_model = sos_model self._timesteps = timesteps - self._decision_module = None # type: DecisionModule + self._decision_module = None self._register = {} # type: Dict for sector_model in sos_model.sector_models: @@ -124,7 +128,7 @@ def _set_up_decision_modules(self, modelrun_name, strategies): self.logger.debug("Trying to load strategy: %s", strategy['name']) decision_module = loader.load(strategy) - self._decision_module = decision_module + self._decision_module = decision_module # type: DecisionModule @property def available_interventions(self) -> Dict[str, Dict]: @@ -135,11 +139,10 @@ def available_interventions(self) -> Dict[str, Dict]: self.planned_interventions} return edited_register - def update_planned_interventions(self, decisions: List[Dict]): + def update_planned_interventions(self, decisions: List[Tuple]): """Adds a list of decisions to the set of planned interventions """ - for decision in decisions: - self.planned_interventions.add(decision['name']) + self.planned_interventions.update([x[1] for x in decisions]) def get_intervention(self, value): try: @@ -256,33 +259,37 @@ def get_and_save_decisions(self, iteration, timestep): decision_iteration=iteration ) - pre_decision_state = [] + # Decision module overrides pre-specified planning for obtaining state + # from previous iteration + pre_decision_state = set() if self._decision_module: - pre_decision_state.extend(self._decision_module.get_previous_state( - results_handle)) - - if self.pre_spec_planning: - pre_decision_state.extend(self.pre_spec_planning.get_previous_state( - results_handle)) + previous_state = self._get_previous_state(self._decision_module, results_handle) + pre_decision_state.update(previous_state) + elif self.pre_spec_planning: + previous_state = self._get_previous_state(self.pre_spec_planning, results_handle) + pre_decision_state.update(previous_state) msg = "Pre-decision state at timestep %s and iteration %s:\n%s" self.logger.debug(msg, timestep, iteration, pre_decision_state) - new_decisions = [] + new_decisions = set() if self._decision_module: - new_decisions.extend(self._decision_module.get_decision(results_handle)) + decisions = self._get_decisions(self._decision_module, + results_handle) + new_decisions.update(decisions) if self.pre_spec_planning: - new_decisions.extend(self.pre_spec_planning.get_decision(results_handle)) + decisions = self._get_decisions(self.pre_spec_planning, + results_handle) + new_decisions.update(decisions) self.logger.debug("New decisions at timestep %s and iteration %s:\n%s", timestep, iteration, new_decisions) - if new_decisions: - # self.update_planned_interventions(new_decisions) - post_decision_state = pre_decision_state + new_decisions - else: - post_decision_state = pre_decision_state + self.update_planned_interventions(new_decisions) + # Post decision state is the union of the pre decision state set + # and new decision set + post_decision_state = self._untuplize_state(pre_decision_state | new_decisions) self.logger.debug("Post-decision state at timestep %s and iteration %s:\n%s", timestep, iteration, post_decision_state) @@ -294,6 +301,26 @@ def get_and_save_decisions(self, iteration, timestep): post_decision_state = [{'name': '', 'build_year': ''}] self._store.write_state(post_decision_state, self._modelrun_name, timestep, iteration) + def _get_decisions(self, + decision_module: 'DecisionModule', + results_handle: ResultsHandle) -> List[Tuple[int, str]]: + decisions = decision_module.get_decision(results_handle) + return self._tuplize_state(decisions) + + def _get_previous_state(self, + decision_module: 'DecisionModule', + results_handle: ResultsHandle) -> List[Tuple[int, str]]: + state_dict = decision_module.get_previous_state(results_handle) + return self._tuplize_state(state_dict) + + @staticmethod + def _tuplize_state(state: List[Dict]) -> List[Tuple[int, str]]: + return [(x['build_year'], x['name']) for x in state] + + @staticmethod + def _untuplize_state(state: List[Tuple[int, str]]) -> List[Dict]: + return [{'build_year': x[0], 'name': x[1]} for x in state] + class DecisionModule(metaclass=ABCMeta): """Abstract class which provides the interface to user defined decision modules. @@ -333,7 +360,7 @@ def last_timestep(self): @abstractmethod def get_previous_state(self, results_handle: ResultsHandle) -> List[Dict]: - pass + raise NotImplementedError @property def interventions(self) -> Dict[str, Dict]: @@ -467,12 +494,17 @@ def _get_next_decision_iteration(self): 'timesteps': [x for x in self.timesteps] } - def get_previous_state(self, results_handle: ResultsHandle) -> List[Dict]: - if results_handle.current_timestep > self.first_timestep: + def get_previous_state(self, + results_handle: ResultsHandle, + iteration: int = None) -> List[Dict]: + try: prev_timestep = results_handle.previous_timestep - prev_iteration = results_handle.decision_iteration + if iteration: + prev_iteration = iteration + else: + prev_iteration = results_handle.decision_iteration return results_handle.get_state(prev_timestep, prev_iteration) - else: + except SmifTimestepResolutionError: return [] def get_decision(self, results_handle) -> List[Dict]: diff --git a/tests/decision/test_decision.py b/tests/decision/test_decision.py index b52e729a8..3d3a85fd3 100644 --- a/tests/decision/test_decision.py +++ b/tests/decision/test_decision.py @@ -1,7 +1,9 @@ +from typing import Dict, List from unittest.mock import Mock, PropertyMock from pytest import fixture, raises +from smif.data_layer.store import Store from smif.decision.decision import DecisionManager, PreSpecified, RuleBased from smif.exception import SmifDataNotFoundError @@ -321,3 +323,45 @@ def test_get_intervention(self, decision_manager: DecisionManager): with raises(SmifDataNotFoundError): df.get_intervention('z') + + +class TestDecisionManagerDecisions: + + @fixture(scope='function') + def decision_manager(self, empty_store) -> DecisionManager: + empty_store.write_model_run({'name': 'test', 'sos_model': 'test_sos_model'}) + empty_store.write_sos_model({'name': 'test_sos_model', 'sector_models': []}) + empty_store.write_strategies('test', []) + sos_model = Mock() + sos_model.name = 'test_sos_model' + sos_model.sector_models = [] + + df = DecisionManager(empty_store, [2010, 2015], 'test', sos_model) + return df + + def test_get_decisions(self, decision_manager: DecisionManager): + dm = decision_manager + + mock_handle = Mock() + dm._decision_module = Mock() + dm._decision_module.get_decision = Mock( + return_value=[{'name': 'test', 'build_year': 2010}]) + + actual = dm._get_decisions(dm._decision_module, mock_handle) + expected = [(2010, 'test')] + assert actual == expected + + def test_get_and_save_decisions(self, decision_manager: DecisionManager): + + dm = decision_manager + + dm._decision_module = Mock() + dm._decision_module.get_decision = Mock( + return_value=[{'name': 'test', 'build_year': 2010}]) + dm._decision_module.get_previous_state = Mock(return_value=[]) + + dm.get_and_save_decisions(0, 2010) + + actual = dm._store # type: Store + expected = [{'name': 'test', 'build_year': 2010}] # type: List[Dict] + assert actual.read_state('test', 2010, decision_iteration=0) == expected From a54afe6e14da998f2fe4ce054d322781fb1d6679 Mon Sep 17 00:00:00 2001 From: Will Usher Date: Thu, 4 Apr 2019 17:01:50 +0100 Subject: [PATCH 11/61] Further testing of decision manager and state --- src/smif/decision/decision.py | 80 +++++++------------ .../sample_project/planning/energyagent.py | 9 ++- tests/decision/test_decision.py | 62 +++++++++++--- 3 files changed, 88 insertions(+), 63 deletions(-) diff --git a/src/smif/decision/decision.py b/src/smif/decision/decision.py index ad2774f7c..dd09bb0b2 100644 --- a/src/smif/decision/decision.py +++ b/src/smif/decision/decision.py @@ -73,7 +73,9 @@ def __init__(self, store: Store, self.pre_spec_planning = self._set_up_pre_spec_planning(modelrun_name, strategies) self._set_up_decision_modules(modelrun_name, strategies) - def _set_up_pre_spec_planning(self, modelrun_name, strategies): + def _set_up_pre_spec_planning(self, + modelrun_name: str, + strategies: List[Dict]) -> 'PreSpecified': pre_spec_planning = None @@ -81,7 +83,7 @@ def _set_up_pre_spec_planning(self, modelrun_name, strategies): initial_conditions = self._store.read_all_initial_conditions(modelrun_name) # Read in strategies - planned_interventions = [] + planned_interventions = [] # type: List planned_interventions.extend(initial_conditions) for index, strategy in enumerate(strategies): @@ -274,19 +276,20 @@ def get_and_save_decisions(self, iteration, timestep): timestep, iteration, pre_decision_state) new_decisions = set() - if self._decision_module: - decisions = self._get_decisions(self._decision_module, - results_handle) - new_decisions.update(decisions) if self.pre_spec_planning: decisions = self._get_decisions(self.pre_spec_planning, results_handle) new_decisions.update(decisions) + self.update_planned_interventions(decisions) + if self._decision_module: + decisions = self._get_decisions(self._decision_module, + results_handle) + new_decisions.update(decisions) + self.update_planned_interventions(decisions) self.logger.debug("New decisions at timestep %s and iteration %s:\n%s", timestep, iteration, new_decisions) - self.update_planned_interventions(new_decisions) # Post decision state is the union of the pre decision state set # and new decision set post_decision_state = self._untuplize_state(pre_decision_state | new_decisions) @@ -360,10 +363,11 @@ def last_timestep(self): @abstractmethod def get_previous_state(self, results_handle: ResultsHandle) -> List[Dict]: + """Return the state of the previous timestep + """ raise NotImplementedError - @property - def interventions(self) -> Dict[str, Dict]: + def available_interventions(self, state: List[Dict]) -> List: """Return the collection of available interventions Available interventions are the subset of interventions that have not @@ -371,41 +375,10 @@ def interventions(self) -> Dict[str, Dict]: Returns ------- - list + List """ - edited_register = {name: self._register[name] for name in self._register.keys() - - self.decisions} - return edited_register - - @property - def decisions(self) -> set: - """The set of historical decisions - - Returns - ------- - set - - Raises - ------ - ValueError - If a duplicate decision is added to the set of historical decisions - """ - return self._decisions - - @decisions.setter - def decisions(self, value: str): - if value in self._decisions: - msg = "Decision {} already exists in decision history" - raise ValueError(msg.format(value)) - else: - self._decisions.add(value) - - def update_decisions(self, decisions: List[Dict]): - """Adds a list of decisions to the set of planned interventions - """ - for decision in decisions: - self.decisions = decision['name'] - self.logger.debug("Internal record of state updated to: %s", self.decisions) + return [name for name in self._register.keys() + - set([x['name'] for x in state])] def get_intervention(self, name): """Return an intervention dict @@ -495,14 +468,10 @@ def _get_next_decision_iteration(self): } def get_previous_state(self, - results_handle: ResultsHandle, - iteration: int = None) -> List[Dict]: + results_handle: ResultsHandle) -> List[Dict]: try: prev_timestep = results_handle.previous_timestep - if iteration: - prev_iteration = iteration - else: - prev_iteration = results_handle.decision_iteration + prev_iteration = results_handle.decision_iteration return results_handle.get_state(prev_timestep, prev_iteration) except SmifTimestepResolutionError: return [] @@ -612,9 +581,16 @@ def __init__(self, timesteps, register): self.logger = getLogger(__name__) def get_previous_state(self, results_handle: ResultsHandle) -> List[Dict]: - if self.current_timestep > self.first_timestep: - prev_timestep = self.previous_timestep - prev_iteration = self.get_previous_year_iteration() + + if (self.current_iteration > 1 + and self.current_timestep == results_handle.base_timestep): + prev_iteration = self.current_iteration - 1 + prev_timestep = self.current_timestep + return results_handle.get_state(prev_timestep, prev_iteration) + elif (self.current_iteration > 1 + and self.current_timestep > self.first_timestep): + prev_iteration = self._max_iteration_by_timestep[self.previous_timestep] + prev_timestep = results_handle.previous_timestep return results_handle.get_state(prev_timestep, prev_iteration) else: return [] diff --git a/src/smif/sample_project/planning/energyagent.py b/src/smif/sample_project/planning/energyagent.py index 4ee6e202d..3b8e75740 100644 --- a/src/smif/sample_project/planning/energyagent.py +++ b/src/smif/sample_project/planning/energyagent.py @@ -32,8 +32,10 @@ def from_dict(config): return EnergyAgent(timesteps, register) def get_decision(self, data_handle): + budget = self.run_regulator(data_handle) decisions = self.run_power_producer(data_handle, budget) + return decisions def run_regulator(self, data_handle): @@ -60,9 +62,14 @@ def run_power_producer(self, data_handle, budget): """ data_handle budget : float + """ cheapest_first = [] - for name, item in self.interventions.items(): + + state = self.get_previous_state(data_handle) + + for name in self.available_interventions(state): + item = self.get_intervention(name) cheapest_first.append((name, float(item['capital_cost']['value']))) sorted(cheapest_first, key=lambda x: float(x[1]), reverse=True) diff --git a/tests/decision/test_decision.py b/tests/decision/test_decision.py index 3d3a85fd3..fae760bb8 100644 --- a/tests/decision/test_decision.py +++ b/tests/decision/test_decision.py @@ -194,21 +194,22 @@ def test_timesteps_first_last(self): def test_interventions(self): - available_interventions = {'test': {'name': 'test_intervention'}} + all_interventions = {'test_intervention': {'name': 'test_intervention'}} timesteps = [2010, 2015, 2020] - dm = RuleBased(timesteps, available_interventions) - assert dm.interventions == available_interventions + dm = RuleBased(timesteps, all_interventions) + assert dm.available_interventions([]) == ['test_intervention'] def test_interventions_planned(self): - available_interventions = {'test': {'name': 'test_intervention'}, - 'planned': {'name': 'planned_intervention'}} + all_interventions = {'test_intervention': {'name': 'test_intervention'}, + 'planned_intervention': {'name': 'planned_intervention'}} timesteps = [2010, 2015, 2020] - dm = RuleBased(timesteps, available_interventions) - dm.update_decisions([{'name': 'planned'}]) - assert dm.interventions == {'test': {'name': 'test_intervention'}} + dm = RuleBased(timesteps, all_interventions) + actual = dm.available_interventions([{'name': 'planned_intervention'}]) + expected = ['test_intervention'] + assert actual == expected def test_get_intervention(self): @@ -351,8 +352,10 @@ def test_get_decisions(self, decision_manager: DecisionManager): expected = [(2010, 'test')] assert actual == expected - def test_get_and_save_decisions(self, decision_manager: DecisionManager): - + def test_get_and_save_decisions_dm(self, decision_manager: DecisionManager): + """Test that the ``get_and_save_decisions`` method updates pre-decision + state with a new decision and writes it to store + """ dm = decision_manager dm._decision_module = Mock() @@ -365,3 +368,42 @@ def test_get_and_save_decisions(self, decision_manager: DecisionManager): actual = dm._store # type: Store expected = [{'name': 'test', 'build_year': 2010}] # type: List[Dict] assert actual.read_state('test', 2010, decision_iteration=0) == expected + + def test_get_and_save_decisions_prespec(self, + decision_manager: DecisionManager): + """Test that the ``get_and_save_decisions`` method updates pre-decision + state with a pre-specified planning and writes it to store + """ + dm = decision_manager + + dm.pre_spec_planning = Mock() + dm.pre_spec_planning.get_decision = Mock( + return_value=[{'name': 'test', 'build_year': 2010}]) + dm.pre_spec_planning.get_previous_state = Mock(return_value=[]) + + dm.get_and_save_decisions(0, 2010) + + actual = dm._store # type: Store + expected = [{'name': 'test', 'build_year': 2010}] # type: List[Dict] + assert actual.read_state('test', 2010, decision_iteration=0) == expected + + def test_pre_spec_and_decision_module(self, + decision_manager: DecisionManager): + dm = decision_manager + + dm._decision_module = Mock() + dm._decision_module.get_decision = Mock( + return_value=[{'name': 'decided', 'build_year': 2010}]) + dm._decision_module.get_previous_state = Mock(return_value=[]) + + dm.pre_spec_planning = Mock() + dm.pre_spec_planning.get_decision = Mock( + return_value=[{'name': 'planned', 'build_year': 2010}]) + dm.pre_spec_planning.get_previous_state = Mock(return_value=[]) + + dm.get_and_save_decisions(0, 2010) + + actual = dm._store.read_state('test', 2010, decision_iteration=0) # type: List[Dict] + + expected = set([('decided', 2010), ('planned', 2010)]) + assert set([(x['name'], x['build_year']) for x in actual]) == expected From 52bbe1f6da13a52ac92da51074a5a9b981e0ee0e Mon Sep 17 00:00:00 2001 From: Will Usher Date: Fri, 5 Apr 2019 11:11:45 +0100 Subject: [PATCH 12/61] Better management of iterations and timesteps for rulebased approach --- src/smif/decision/decision.py | 46 +++++++++++++------ .../sample_project/planning/energyagent.py | 11 ++--- tests/decision/test_decision.py | 45 ++++++++++++++++++ 3 files changed, 81 insertions(+), 21 deletions(-) diff --git a/src/smif/decision/decision.py b/src/smif/decision/decision.py index dd09bb0b2..4a8e33ecb 100644 --- a/src/smif/decision/decision.py +++ b/src/smif/decision/decision.py @@ -354,11 +354,11 @@ def __next__(self) -> List[Dict]: return self._get_next_decision_iteration() @property - def first_timestep(self): + def first_timestep(self) -> int: return min(self.timesteps) @property - def last_timestep(self): + def last_timestep(self) -> int: return max(self.timesteps) @abstractmethod @@ -573,25 +573,41 @@ class RuleBased(DecisionModule): def __init__(self, timesteps, register): super().__init__(timesteps, register) - self.satisfied = False - self.current_timestep = self.first_timestep - self.current_iteration = 0 + self.satisfied = False # type: bool + self.current_timestep = self.first_timestep # type: int + self.current_iteration = 0 # type: int # keep internal account of max iteration reached per timestep self._max_iteration_by_timestep = {self.first_timestep: 0} self.logger = getLogger(__name__) + def get_previous_iteration_timestep(self) -> Tuple[int, int]: + """Returns the timestep, iteration pair that describes the previous + iteration + + Returns + ------- + tuple + Contains (timestep, iteration) + """ + if self.current_iteration > 1: + iteration = self.current_iteration - 1 + + if self.current_timestep == self.first_timestep: + timestep = self.current_timestep + elif (iteration == self._max_iteration_by_timestep[self.previous_timestep]): + timestep = self.previous_timestep + elif (iteration >= self._max_iteration_by_timestep[self.previous_timestep]): + timestep = self.current_timestep + else: + return tuple() + return timestep, iteration + def get_previous_state(self, results_handle: ResultsHandle) -> List[Dict]: - if (self.current_iteration > 1 - and self.current_timestep == results_handle.base_timestep): - prev_iteration = self.current_iteration - 1 - prev_timestep = self.current_timestep - return results_handle.get_state(prev_timestep, prev_iteration) - elif (self.current_iteration > 1 - and self.current_timestep > self.first_timestep): - prev_iteration = self._max_iteration_by_timestep[self.previous_timestep] - prev_timestep = results_handle.previous_timestep - return results_handle.get_state(prev_timestep, prev_iteration) + timestep_iteration = self.get_previous_iteration_timestep() + if timestep_iteration: + timestep, iteration = timestep_iteration + return results_handle.get_state(timestep, iteration) else: return [] diff --git a/src/smif/sample_project/planning/energyagent.py b/src/smif/sample_project/planning/energyagent.py index 3b8e75740..1983dcc10 100644 --- a/src/smif/sample_project/planning/energyagent.py +++ b/src/smif/sample_project/planning/energyagent.py @@ -44,18 +44,17 @@ def run_regulator(self, data_handle): """ budget = 100 - # TODO Should be the iteration previous to the current one? - if data_handle.current_timestep > data_handle.base_timestep: - previous_timestep = data_handle.previous_timestep - iteration = self._max_iteration_by_timestep[previous_timestep] + if self.current_iteration > 1: + timestep, iteration = self.get_previous_iteration_timestep() + output_name = 'cost' cost = data_handle.get_results(model_name='energy_demand', output_name=output_name, decision_iteration=iteration, - timestep=previous_timestep) + timestep=timestep) budget -= sum(cost.as_ndarray()) - self.satisfied = True + self.satisfied = True return budget def run_power_producer(self, data_handle, budget): diff --git a/tests/decision/test_decision.py b/tests/decision/test_decision.py index fae760bb8..49e5bac87 100644 --- a/tests/decision/test_decision.py +++ b/tests/decision/test_decision.py @@ -226,6 +226,51 @@ def test_get_intervention(self): assert msg in str(ex) +class TestRuleBasedIterationTimestepAccounting: + """Test that the iteration and timestep accounting methods properly follow + the path through the decision iterations + + 2010 - 0, 1 + 2015 - 2, 3 + """ + + @fixture(scope='function') + def dm(self): + timesteps = [2010, 2015, 2020] + dm = RuleBased(timesteps, Mock()) + return dm + + def test_first_iteration_base_year(self, dm): + + dm.current_timestep = 2010 + dm.current_iteration = 1 + dm._max_iteration_by_timestep[2010] = 1 + assert dm.get_previous_iteration_timestep() == tuple() + + def test_second_iteration_base_year(self, dm): + + dm.current_timestep = 2010 + dm.current_iteration = 2 + dm._max_iteration_by_timestep[2010] = 2 + assert dm.get_previous_iteration_timestep() == (2010, 1) + + def test_second_iteration_next_year(self, dm): + + dm.current_timestep = 2015 + dm.current_iteration = 3 + dm._max_iteration_by_timestep[2010] = 2 + dm._max_iteration_by_timestep[2015] = 3 + assert dm.get_previous_iteration_timestep() == (2010, 2) + + def test_third_iteration_next_year(self, dm): + + dm.current_timestep = 2015 + dm.current_iteration = 4 + dm._max_iteration_by_timestep[2010] = 2 + dm._max_iteration_by_timestep[2015] = 4 + assert dm.get_previous_iteration_timestep() == (2015, 3) + + class TestRuleBased: def test_initialisation(self): From 56a0a0bc794ab7788ffac20c6698021557cf95c2 Mon Sep 17 00:00:00 2001 From: Will Usher Date: Fri, 5 Apr 2019 13:09:13 +0100 Subject: [PATCH 13/61] Moved buildable logic from prespec to decision manager --- src/smif/decision/decision.py | 154 ++++++++++++++++++-------------- tests/decision/test_decision.py | 81 +++++++++-------- 2 files changed, 131 insertions(+), 104 deletions(-) diff --git a/src/smif/decision/decision.py b/src/smif/decision/decision.py index 4a8e33ecb..767676be6 100644 --- a/src/smif/decision/decision.py +++ b/src/smif/decision/decision.py @@ -292,7 +292,10 @@ def get_and_save_decisions(self, iteration, timestep): # Post decision state is the union of the pre decision state set # and new decision set - post_decision_state = self._untuplize_state(pre_decision_state | new_decisions) + post_decision_state = pre_decision_state | new_decisions + + post_decision_state = self.retire_interventions(post_decision_state, timestep) + post_decision_state = self._untuplize_state(post_decision_state) self.logger.debug("Post-decision state at timestep %s and iteration %s:\n%s", timestep, iteration, post_decision_state) @@ -301,9 +304,22 @@ def get_and_save_decisions(self, iteration, timestep): "Writing state for timestep %s and interation %s", timestep, iteration) if not post_decision_state: - post_decision_state = [{'name': '', 'build_year': ''}] + post_decision_state = [{'name': '', 'build_year': timestep}] self._store.write_state(post_decision_state, self._modelrun_name, timestep, iteration) + def retire_interventions(self, state: List[Tuple[int, str]], + timestep: int) -> List[Tuple[int, str]]: + + alive = [] + for intervention in state: + build_year = int(intervention[0]) + data = self._register[intervention[1]] + lifetime = data['technical_lifetime']['value'] + if (self.buildable(build_year, timestep) and + self.within_lifetime(build_year, timestep, lifetime)): + alive.append(intervention) + return alive + def _get_decisions(self, decision_module: 'DecisionModule', results_handle: ResultsHandle) -> List[Tuple[int, str]]: @@ -324,6 +340,72 @@ def _tuplize_state(state: List[Dict]) -> List[Tuple[int, str]]: def _untuplize_state(state: List[Tuple[int, str]]) -> List[Dict]: return [{'build_year': x[0], 'name': x[1]} for x in state] + def buildable(self, build_year, timestep) -> bool: + """Interventions are deemed available if build_year is less than next timestep + + For example, if `a` is built in 2011 and timesteps are + [2005, 2010, 2015, 2020] then buildable returns True for timesteps + 2010, 2015 and 2020 and False for 2005. + + Arguments + --------- + build_year: int + The build year of the intervention + timestep: int + The current timestep + """ + if not isinstance(build_year, (int, float)): + msg = "Build Year should be an integer but is a {}" + raise TypeError(msg.format(type(build_year))) + if timestep not in self._timesteps: + raise ValueError("Timestep not in model timesteps") + index = self._timesteps.index(timestep) + if index == len(self._timesteps) - 1: + next_year = timestep + 1 + else: + next_year = self._timesteps[index + 1] + + if int(build_year) < next_year: + return True + else: + return False + + @staticmethod + def within_lifetime(build_year, timestep, lifetime) -> bool: + """Interventions are deemed active if build_year + lifetime >= timestep + + Arguments + --------- + build_year : int + timestep : int + lifetime : int + + Returns + ------- + bool + """ + if not isinstance(build_year, (int, float)): + msg = "Build Year should be an integer but is a {}" + raise TypeError(msg.format(type(build_year))) + + try: + build_year = int(build_year) + except ValueError: + raise ValueError( + "A build year must be a valid integer. Received {}.".format(build_year)) + + try: + lifetime = int(lifetime) + except ValueError: + lifetime = float("inf") + if lifetime < 0: + msg = "The value of lifetime cannot be negative" + raise ValueError(msg) + if timestep <= build_year + lifetime: + return True + else: + return False + class DecisionModule(metaclass=ABCMeta): """Abstract class which provides the interface to user defined decision modules. @@ -498,73 +580,7 @@ def get_decision(self, results_handle) -> List[Dict]: >>> dm.get_decision(results_handle) [{'name': intervention_a', 'build_year': 2010}] """ - decisions = [] - timestep = results_handle.current_timestep - - for intervention in self._planned: - build_year = int(intervention['build_year']) - - data = self._register[intervention['name']] - lifetime = data['technical_lifetime']['value'] - - if self.buildable(build_year, timestep) and \ - self.within_lifetime(build_year, timestep, lifetime): - decisions.append(intervention) - return decisions - - def buildable(self, build_year, timestep): - """Interventions are deemed available if build_year is less than next timestep - - For example, if `a` is built in 2011 and timesteps are - [2005, 2010, 2015, 2020] then buildable returns True for timesteps - 2010, 2015 and 2020 and False for 2005. - """ - if not isinstance(build_year, (int, float)): - msg = "Build Year should be an integer but is a {}" - raise TypeError(msg.format(type(build_year))) - if timestep not in self.timesteps: - raise ValueError("Timestep not in model timesteps") - index = self.timesteps.index(timestep) - if index == len(self.timesteps) - 1: - next_year = timestep + 1 - else: - next_year = self.timesteps[index + 1] - - if int(build_year) < next_year: - return True - else: - return False - - def within_lifetime(self, build_year, timestep, lifetime): - """Interventions are deemed active if build_year + lifetime >= timestep - - Arguments - --------- - build_year : int - timestep : int - lifetime : int - """ - if not isinstance(build_year, (int, float)): - msg = "Build Year should be an integer but is a {}" - raise TypeError(msg.format(type(build_year))) - - try: - build_year = int(build_year) - except ValueError: - raise ValueError( - "A build year must be a valid integer. Received {}.".format(build_year)) - - try: - lifetime = int(lifetime) - except ValueError: - lifetime = float("inf") - if lifetime < 0: - msg = "The value of lifetime cannot be negative" - raise ValueError(msg) - if timestep <= build_year + lifetime: - return True - else: - return False + return self._planned class RuleBased(DecisionModule): diff --git a/tests/decision/test_decision.py b/tests/decision/test_decision.py index 49e5bac87..95dd30877 100644 --- a/tests/decision/test_decision.py +++ b/tests/decision/test_decision.py @@ -79,7 +79,12 @@ def test_get_decision(self, plan, get_register): actual = dm.get_decision(mock_handle) expected = [ {'name': 'small_pumping_station_oxford', - 'build_year': 2010}] + 'build_year': 2010}, + {'name': 'small_pumping_station_abingdon', + 'build_year': 2015}, + {'name': 'large_pumping_station_oxford', + 'build_year': 2020} + ] assert actual == expected type(mock_handle).current_timestep = PropertyMock(return_value=2015) @@ -88,7 +93,10 @@ def test_get_decision(self, plan, get_register): {'name': 'small_pumping_station_oxford', 'build_year': 2010}, {'name': 'small_pumping_station_abingdon', - 'build_year': 2015}] + 'build_year': 2015}, + {'name': 'large_pumping_station_oxford', + 'build_year': 2020} + ] assert actual == expected type(mock_handle).current_timestep = PropertyMock(return_value=2020) @@ -130,39 +138,6 @@ def test_get_decision_two(self, get_strategies, get_register): ] assert (actual) == (expected) - def test_buildable(self, get_strategies): - dm = PreSpecified([2010, 2015], Mock(), get_strategies[0]['interventions']) - assert dm.timesteps == [2010, 2015] - assert dm.buildable(2010, 2010) is True - assert dm.buildable(2011, 2010) is True - - def test_historical_intervention_buildable(self, get_strategies): - dm = PreSpecified([2020, 2030], Mock(), get_strategies[0]['interventions']) - assert dm.timesteps == [2020, 2030] - assert dm.buildable(1980, 2020) is True - assert dm.buildable(1990, 2020) is True - - def test_buildable_raises(self, get_strategies): - dm = PreSpecified([2010, 2015], Mock(), get_strategies[0]['interventions']) - with raises(ValueError): - dm.buildable(2015, 2014) - - def test_within_lifetime(self): - dm = PreSpecified([2010, 2015], Mock(), []) - assert dm.within_lifetime(2010, 2010, 1) - - def test_within_lifetime_does_not_check_start(self): - """Note that the ``within_lifetime`` method does not check - that the build year is compatible with timestep - """ - dm = PreSpecified([2010, 2015], Mock(), []) - assert dm.within_lifetime(2011, 2010, 1) - - def test_negative_lifetime_raises(self): - dm = PreSpecified([2010, 2015], Mock(), []) - with raises(ValueError): - dm.within_lifetime(2010, 2010, -1) - class TestRuleBasedProperties: @@ -370,6 +345,36 @@ def test_get_intervention(self, decision_manager: DecisionManager): with raises(SmifDataNotFoundError): df.get_intervention('z') + def test_buildable(self, decision_manager): + + decision_manager._timesteps = [2010, 2015] + assert decision_manager.buildable(2010, 2010) is True + assert decision_manager.buildable(2011, 2010) is True + + def test_historical_intervention_buildable(self, decision_manager): + decision_manager._timesteps = [2020, 2030] + assert decision_manager.buildable(1980, 2020) is True + assert decision_manager.buildable(1990, 2020) is True + + def test_buildable_raises(self, decision_manager): + + with raises(ValueError): + decision_manager.buildable(2015, 2014) + + def test_within_lifetime(self, decision_manager): + + assert decision_manager.within_lifetime(2010, 2010, 1) + + def test_within_lifetime_does_not_check_start(self, decision_manager): + """Note that the ``within_lifetime`` method does not check + that the build year is compatible with timestep + """ + assert decision_manager.within_lifetime(2011, 2010, 1) + + def test_negative_lifetime_raises(self, decision_manager): + with raises(ValueError): + decision_manager.within_lifetime(2010, 2010, -1) + class TestDecisionManagerDecisions: @@ -382,7 +387,13 @@ def decision_manager(self, empty_store) -> DecisionManager: sos_model.name = 'test_sos_model' sos_model.sector_models = [] + interventions = {'test': {'technical_lifetime': {'value': 99}}, + 'planned': {'technical_lifetime': {'value': 99}}, + 'decided': {'technical_lifetime': {'value': 99}} + } + df = DecisionManager(empty_store, [2010, 2015], 'test', sos_model) + df._register = interventions return df def test_get_decisions(self, decision_manager: DecisionManager): From 358a9cab2f61cdc5ab4c19794d70d4690f0a09a9 Mon Sep 17 00:00:00 2001 From: Will Usher Date: Fri, 5 Apr 2019 13:55:09 +0100 Subject: [PATCH 14/61] Remove pre-specified decision module --- src/smif/data_layer/abstract_data_store.py | 2 +- src/smif/data_layer/store.py | 6 +- src/smif/decision/decision.py | 107 +++---------------- tests/decision/test_decision.py | 118 ++------------------- 4 files changed, 26 insertions(+), 207 deletions(-) diff --git a/src/smif/data_layer/abstract_data_store.py b/src/smif/data_layer/abstract_data_store.py index c8645d360..ab86304d7 100644 --- a/src/smif/data_layer/abstract_data_store.py +++ b/src/smif/data_layer/abstract_data_store.py @@ -126,7 +126,7 @@ def write_interventions(self, key, interventions): """ @abstractmethod - def read_initial_conditions(self, key): + def read_initial_conditions(self, key) -> List[Dict]: """Read historical interventions for `key` Parameters diff --git a/src/smif/data_layer/store.py b/src/smif/data_layer/store.py index 2366595ff..0f3d753ff 100644 --- a/src/smif/data_layer/store.py +++ b/src/smif/data_layer/store.py @@ -667,7 +667,7 @@ def read_strategy_interventions(self, strategy): """ return self.data_store.read_strategy_interventions(strategy) - def read_initial_conditions(self, model_name): + def read_initial_conditions(self, model_name) -> List[Dict]: """Read historical interventions for `model_name` Returns @@ -695,14 +695,14 @@ def write_initial_conditions(self, model_name, initial_conditions): self.data_store.write_initial_conditions(model['initial_conditions'][0], initial_conditions) - def read_all_initial_conditions(self, model_run_name): + def read_all_initial_conditions(self, model_run_name) -> List[Dict]: """A list of all historical interventions Returns ------- list[dict] """ - historical_interventions = [] + historical_interventions = [] # type: List model_run = self.read_model_run(model_run_name) sos_model_name = model_run['sos_model'] sos_model = self.read_sos_model(sos_model_name) diff --git a/src/smif/decision/decision.py b/src/smif/decision/decision.py index 767676be6..92ecb4c5f 100644 --- a/src/smif/decision/decision.py +++ b/src/smif/decision/decision.py @@ -17,12 +17,12 @@ from abc import ABCMeta, abstractmethod from logging import getLogger from types import MappingProxyType -from typing import Dict, List, Tuple +from typing import Dict, List, Set, Tuple from smif.data_layer.data_handle import ResultsHandle from smif.data_layer.model_loader import ModelLoader from smif.data_layer.store import Store -from smif.exception import SmifDataNotFoundError, SmifTimestepResolutionError +from smif.exception import SmifDataNotFoundError class DecisionManager(object): @@ -66,7 +66,7 @@ def __init__(self, store: Store, self._register = {} # type: Dict for sector_model in sos_model.sector_models: self._register.update(self._store.read_interventions(sector_model.name)) - self.planned_interventions = set() # type: set + self.planned_interventions = [] # type: List strategies = self._store.read_strategies(modelrun_name) self.logger.info("%s strategies found", len(strategies)) @@ -75,9 +75,7 @@ def __init__(self, store: Store, def _set_up_pre_spec_planning(self, modelrun_name: str, - strategies: List[Dict]) -> 'PreSpecified': - - pre_spec_planning = None + strategies: List[Dict]): # Read in the historical interventions (initial conditions) directly initial_conditions = self._store.read_all_initial_conditions(modelrun_name) @@ -97,13 +95,7 @@ def _set_up_pre_spec_planning(self, # Create a Pre-Specified planning decision module with all # the planned interventions - if planned_interventions: - pre_spec_planning = PreSpecified(self._timesteps, - self._register, - planned_interventions) - self.planned_interventions = set([x['name'] for x in planned_interventions]) - - return pre_spec_planning + self.planned_interventions = self._tuplize_state(planned_interventions) def _set_up_decision_modules(self, modelrun_name, strategies): @@ -136,16 +128,12 @@ def _set_up_decision_modules(self, modelrun_name, strategies): def available_interventions(self) -> Dict[str, Dict]: """Returns a register of available interventions, i.e. those not planned """ + planned_names = set([x[1] for x in self.planned_interventions]) edited_register = {name: self._register[name] for name in self._register.keys() - - self.planned_interventions} + planned_names} return edited_register - def update_planned_interventions(self, decisions: List[Tuple]): - """Adds a list of decisions to the set of planned interventions - """ - self.planned_interventions.update([x[1] for x in decisions]) - def get_intervention(self, value): try: return self._register[value] @@ -267,42 +255,32 @@ def get_and_save_decisions(self, iteration, timestep): if self._decision_module: previous_state = self._get_previous_state(self._decision_module, results_handle) pre_decision_state.update(previous_state) - elif self.pre_spec_planning: - previous_state = self._get_previous_state(self.pre_spec_planning, results_handle) - pre_decision_state.update(previous_state) msg = "Pre-decision state at timestep %s and iteration %s:\n%s" self.logger.debug(msg, timestep, iteration, pre_decision_state) new_decisions = set() - if self.pre_spec_planning: - decisions = self._get_decisions(self.pre_spec_planning, - results_handle) - new_decisions.update(decisions) - self.update_planned_interventions(decisions) if self._decision_module: decisions = self._get_decisions(self._decision_module, results_handle) new_decisions.update(decisions) - self.update_planned_interventions(decisions) self.logger.debug("New decisions at timestep %s and iteration %s:\n%s", timestep, iteration, new_decisions) - # Post decision state is the union of the pre decision state set - # and new decision set - post_decision_state = pre_decision_state | new_decisions - + # Post decision state is the union of the pre decision state set, the + # new decision set and the set of planned interventions + post_decision_state = (pre_decision_state + | new_decisions + | set(self.planned_interventions)) post_decision_state = self.retire_interventions(post_decision_state, timestep) post_decision_state = self._untuplize_state(post_decision_state) self.logger.debug("Post-decision state at timestep %s and iteration %s:\n%s", timestep, iteration, post_decision_state) - self.logger.debug( - "Writing state for timestep %s and interation %s", timestep, iteration) - + # Workaround to avoid issue nismod/smif#345 if not post_decision_state: post_decision_state = [{'name': '', 'build_year': timestep}] self._store.write_state(post_decision_state, self._modelrun_name, timestep, iteration) @@ -430,7 +408,7 @@ def __init__(self, timesteps: List[int], register: MappingProxyType): self.timesteps = timesteps self._register = register self.logger = getLogger(__name__) - self._decisions = set() # type: set + self._decisions = set() # type: Set def __next__(self) -> List[Dict]: return self._get_next_decision_iteration() @@ -526,63 +504,6 @@ def get_decision(self, results_handle: ResultsHandle) -> List[Dict]: raise NotImplementedError -class PreSpecified(DecisionModule): - """Pre-specified planning - - Parameters - ---------- - timesteps : list - A list of the timesteps included in the model horizon - register : dict - A dict of intervention dictionaries keyed by unique intervention name - planned_interventions : list - A list of dicts ``{'name': 'intervention_name', 'build_year': 2010}`` - representing historical or planned interventions - """ - def __init__(self, timesteps, register, planned_interventions): - super().__init__(timesteps, register) - self._planned = planned_interventions - - def _get_next_decision_iteration(self): - return { - 'decision_iterations': [0], - 'timesteps': [x for x in self.timesteps] - } - - def get_previous_state(self, - results_handle: ResultsHandle) -> List[Dict]: - try: - prev_timestep = results_handle.previous_timestep - prev_iteration = results_handle.decision_iteration - return results_handle.get_state(prev_timestep, prev_iteration) - except SmifTimestepResolutionError: - return [] - - def get_decision(self, results_handle) -> List[Dict]: - """Return a dict of historical or planned interventions in current timestep - - Use lifetime attribute of named intervention to calculate if it is still - present in the current state of the system - - Arguments - --------- - results_handle : smif.data_layer.data_handle.ResultsHandle - A reference to a smif results handle - - Returns - ------- - list of dict - - Examples - -------- - >>> dm = PreSpecified([2010, 2015], register, - [{'name': 'intervention_a', 'build_year': 2010}]) - >>> dm.get_decision(results_handle) - [{'name': intervention_a', 'build_year': 2010}] - """ - return self._planned - - class RuleBased(DecisionModule): """Rule-base decision modules """ diff --git a/tests/decision/test_decision.py b/tests/decision/test_decision.py index 95dd30877..cb3912b75 100644 --- a/tests/decision/test_decision.py +++ b/tests/decision/test_decision.py @@ -1,10 +1,10 @@ from typing import Dict, List -from unittest.mock import Mock, PropertyMock +from unittest.mock import Mock from pytest import fixture, raises from smif.data_layer.store import Store -from smif.decision.decision import DecisionManager, PreSpecified, RuleBased +from smif.decision.decision import DecisionManager, RuleBased from smif.exception import SmifDataNotFoundError @@ -43,102 +43,6 @@ def get_register(): return register -class TestPreSpecified: - - def test_initialisation(self, plan): - - timesteps = [2010, 2015, 2020] - - actual = PreSpecified(timesteps, Mock(), plan) - - assert actual.timesteps == timesteps - - def test_generator(self, plan): - - timesteps = [2010, 2015, 2020] - dm = PreSpecified(timesteps, Mock(), plan) - - actual = next(dm) - - expected = { - 'decision_iterations': [0], - 'timesteps': timesteps - } - - assert actual == expected - - def test_get_decision(self, plan, get_register): - - register = get_register - - timesteps = [2010, 2015, 2020] - dm = PreSpecified(timesteps, register, plan) - - mock_handle = Mock() - type(mock_handle).current_timestep = PropertyMock(return_value=2010) - actual = dm.get_decision(mock_handle) - expected = [ - {'name': 'small_pumping_station_oxford', - 'build_year': 2010}, - {'name': 'small_pumping_station_abingdon', - 'build_year': 2015}, - {'name': 'large_pumping_station_oxford', - 'build_year': 2020} - ] - assert actual == expected - - type(mock_handle).current_timestep = PropertyMock(return_value=2015) - actual = dm.get_decision(mock_handle) - expected = [ - {'name': 'small_pumping_station_oxford', - 'build_year': 2010}, - {'name': 'small_pumping_station_abingdon', - 'build_year': 2015}, - {'name': 'large_pumping_station_oxford', - 'build_year': 2020} - ] - assert actual == expected - - type(mock_handle).current_timestep = PropertyMock(return_value=2020) - actual = dm.get_decision(mock_handle) - expected = [ - {'name': 'small_pumping_station_oxford', - 'build_year': 2010}, - {'name': 'small_pumping_station_abingdon', - 'build_year': 2015}, - {'name': 'large_pumping_station_oxford', - 'build_year': 2020} - ] - assert actual == expected - - def test_get_decision_two(self, get_strategies, get_register): - register = get_register - dm = PreSpecified([2010, 2015], register, get_strategies[0]['interventions']) - - mock_handle = Mock() - type(mock_handle).current_timestep = PropertyMock(return_value=2010) - actual = dm.get_decision(mock_handle) - expected = [ - {'name': 'nuclear_large', 'build_year': 2012}, - {'name': 'carrington_retire', 'build_year': 2011} - ] - # assert actual == expected - # we don't mind the order - assert (actual) == (expected) - - # actual = dm.get_decision(2015) - # expected = [('carrington_retire', 2011)] - # assert actual == expected - - type(mock_handle).current_timestep = PropertyMock(return_value=2015) - actual = dm.get_decision(mock_handle) - expected = [ - {'name': 'nuclear_large', 'build_year': 2012}, - {'name': 'carrington_retire', 'build_year': 2011} - ] - assert (actual) == (expected) - - class TestRuleBasedProperties: def test_timesteps(self): @@ -328,7 +232,7 @@ def test_available_interventions(self, decision_manager: DecisionManager): assert df.available_interventions == df._register - df.planned_interventions = {'a', 'b'} + df.planned_interventions = {(2010, 'a'), (2010, 'b')} expected = {'c': {'name': 'c'}} @@ -416,13 +320,13 @@ def test_get_and_save_decisions_dm(self, decision_manager: DecisionManager): dm._decision_module = Mock() dm._decision_module.get_decision = Mock( - return_value=[{'name': 'test', 'build_year': 2010}]) + return_value=[{'name': 'decided', 'build_year': 2010}]) dm._decision_module.get_previous_state = Mock(return_value=[]) dm.get_and_save_decisions(0, 2010) actual = dm._store # type: Store - expected = [{'name': 'test', 'build_year': 2010}] # type: List[Dict] + expected = [{'name': 'decided', 'build_year': 2010}] # type: List[Dict] assert actual.read_state('test', 2010, decision_iteration=0) == expected def test_get_and_save_decisions_prespec(self, @@ -432,15 +336,12 @@ def test_get_and_save_decisions_prespec(self, """ dm = decision_manager - dm.pre_spec_planning = Mock() - dm.pre_spec_planning.get_decision = Mock( - return_value=[{'name': 'test', 'build_year': 2010}]) - dm.pre_spec_planning.get_previous_state = Mock(return_value=[]) + dm.planned_interventions = [(2010, 'planned')] dm.get_and_save_decisions(0, 2010) actual = dm._store # type: Store - expected = [{'name': 'test', 'build_year': 2010}] # type: List[Dict] + expected = [{'name': 'planned', 'build_year': 2010}] # type: List[Dict] assert actual.read_state('test', 2010, decision_iteration=0) == expected def test_pre_spec_and_decision_module(self, @@ -452,10 +353,7 @@ def test_pre_spec_and_decision_module(self, return_value=[{'name': 'decided', 'build_year': 2010}]) dm._decision_module.get_previous_state = Mock(return_value=[]) - dm.pre_spec_planning = Mock() - dm.pre_spec_planning.get_decision = Mock( - return_value=[{'name': 'planned', 'build_year': 2010}]) - dm.pre_spec_planning.get_previous_state = Mock(return_value=[]) + dm.planned_interventions = [(2010, 'planned')] dm.get_and_save_decisions(0, 2010) From 73df7f1751761d261738b7edbd9370fa7a77d77a Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Thu, 11 Apr 2019 12:08:42 +0100 Subject: [PATCH 15/61] Apply suggestions from code review Co-Authored-By: willu47 --- src/smif/decision/decision.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/smif/decision/decision.py b/src/smif/decision/decision.py index 92ecb4c5f..60563b792 100644 --- a/src/smif/decision/decision.py +++ b/src/smif/decision/decision.py @@ -128,7 +128,7 @@ def _set_up_decision_modules(self, modelrun_name, strategies): def available_interventions(self) -> Dict[str, Dict]: """Returns a register of available interventions, i.e. those not planned """ - planned_names = set([x[1] for x in self.planned_interventions]) + planned_names = set(name for build_year, name in self.planned_interventions) edited_register = {name: self._register[name] for name in self._register.keys() - planned_names} @@ -238,7 +238,7 @@ def get_and_save_decisions(self, iteration, timestep): of their lifetime, new decisions are added to the list of current interventions. - Finally, the new state file is written to disk. + Finally, the new state is written to the store. """ results_handle = ResultsHandle( store=self._store, From fb6e04f359526ba555c40866c9f4bcd18afc79bd Mon Sep 17 00:00:00 2001 From: Will Usher Date: Thu, 11 Apr 2019 12:46:13 +0100 Subject: [PATCH 16/61] Fixed return type of get_previous_iteration_timestep --- src/smif/decision/decision.py | 6 +++--- tests/decision/test_decision.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/smif/decision/decision.py b/src/smif/decision/decision.py index 60563b792..2d1e53010 100644 --- a/src/smif/decision/decision.py +++ b/src/smif/decision/decision.py @@ -17,7 +17,7 @@ from abc import ABCMeta, abstractmethod from logging import getLogger from types import MappingProxyType -from typing import Dict, List, Set, Tuple +from typing import Dict, List, Optional, Set, Tuple from smif.data_layer.data_handle import ResultsHandle from smif.data_layer.model_loader import ModelLoader @@ -517,7 +517,7 @@ def __init__(self, timesteps, register): self._max_iteration_by_timestep = {self.first_timestep: 0} self.logger = getLogger(__name__) - def get_previous_iteration_timestep(self) -> Tuple[int, int]: + def get_previous_iteration_timestep(self) -> Optional[Tuple[int, int]]: """Returns the timestep, iteration pair that describes the previous iteration @@ -536,7 +536,7 @@ def get_previous_iteration_timestep(self) -> Tuple[int, int]: elif (iteration >= self._max_iteration_by_timestep[self.previous_timestep]): timestep = self.current_timestep else: - return tuple() + return None return timestep, iteration def get_previous_state(self, results_handle: ResultsHandle) -> List[Dict]: diff --git a/tests/decision/test_decision.py b/tests/decision/test_decision.py index cb3912b75..b15d03a51 100644 --- a/tests/decision/test_decision.py +++ b/tests/decision/test_decision.py @@ -124,7 +124,7 @@ def test_first_iteration_base_year(self, dm): dm.current_timestep = 2010 dm.current_iteration = 1 dm._max_iteration_by_timestep[2010] = 1 - assert dm.get_previous_iteration_timestep() == tuple() + assert dm.get_previous_iteration_timestep() is None def test_second_iteration_base_year(self, dm): From 1f0d0f2230530af80578d04c22da5e326ce64057 Mon Sep 17 00:00:00 2001 From: Will Usher Date: Thu, 11 Apr 2019 13:30:51 +0100 Subject: [PATCH 17/61] First stab at decision docs --- docs/decisions.rst | 91 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 docs/decisions.rst diff --git a/docs/decisions.rst b/docs/decisions.rst new file mode 100644 index 000000000..948d7ea1d --- /dev/null +++ b/docs/decisions.rst @@ -0,0 +1,91 @@ +.. _decisions: + +Strategies, Interventions and Decision Modules +============================================== + +**smif** makes a sharp distinction between *simulating* the operation of a system, and +*deciding* on which interventions to introduce to meet goals or constraints on the whole +system-of-systems. + +The decision aspects of **smif** include a number of components. + +- The DecisionManager interacts with the ModelRunner and provides a list of + timesteps and iterations to run +- The DecisionManager also acts as the interface to a user implemented DecisionModule, + which may implement a particular decision approach. + +A decision module might use one of three approaches: + +- a rule based approach (using some heuristic rules), or +- an optimisation approach. + +A pre-specified approach (testing a given planning pipeline) is included in the +core **smif** code. + +The Decision Manager +-------------------- + +A DecisionManager is initialised with a DecisionModule implementation. This is +referenced in the strategy section of a Run configuration. + +The DecisionManager presents a simple decision loop interface to the model runner, +in the form of a generator which allows the model runner to iterate over the +collection of independent simulations required at each step. + +The DecisionManager collates the output of the decision algorithm and +writes the post-decision state to the store. This allows Models +to access a given decision state in each timestep and decision iteration id. + +Decision Module Implementations +------------------------------- + +Users must implement a DecisionModule and pass this to the DecisionModule by +declaring it under a ``strategy`` section of a Run configuration. + +The DecisionModule implementation influences the combination and ordering of +decision iterations and model timesteps that need to be performed to complete +the run. To do this, the DecisionModule implementation must yield a bundle +of interventions and planning timesteps, which are then simulated, +after which the decision module may request further simulation of different +timesteps and/or combinations of interventions. + +The composition of the yielded bundle will change as a function of the implementation +type. For example, a rule-based approach is likely to iterate over individual +years until a threshold is met before proceeding. + +A DecisionModule implementation can access results of previous iterations using +methods available on the ResultsHandle it is passed at runtime. These include +``ResultsHandle.get_results``. The property ``DecisionModule.available_interventions`` +returns the entire collection of interventions that are available for deployment +in a particular iteration. + +Interventions +------------- + +Interventions change how a simulated system operates. +An intervention can represent a building or upgrading a physical thing +(like a reservoir or power station), or could be something less +tangible like imposing a congestion charging zone over a city centre. + +A system of interest can in principle be composed entirely of a series of interventions. For +example, the electricity generation and transmission system is composed of a set of generation +sites (power stations, wind farms...), transmission lines and bus bars. + +A simulation model has access to several methods to obtain its current *state*. +The DataHandle.get_state and DataHandle.get_current_interventions provide +direct access the database of interventions relevant for the current timestep. + +Deciding on Interventions +------------------------- + +The set of all interventions $I$ includes all interventions for all models in a +system of systems. +As the Run proceeds, +and interventions are chosen by the DecisionModule implementation, +then the set of available interventions is modified. + +Set of pre-specified or planned interventions $P{\subset}I$ + +Available interventions $A=P{\cap}I$ + +Decisions at time t ${D_t}\subset{A}-{D_{t-1}}$ From 68f2d9f446212c0caa21d6b83a1b8b4e3d2d21a9 Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Thu, 11 Apr 2019 11:19:44 +0100 Subject: [PATCH 18/61] Handle reading and writing empty state to data store Closes #345 --- src/smif/data_layer/file/file_data_store.py | 11 +++++++++-- tests/data_layer/test_data_store.py | 10 ++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/smif/data_layer/file/file_data_store.py b/src/smif/data_layer/file/file_data_store.py index c9ab602ce..6fda502d8 100644 --- a/src/smif/data_layer/file/file_data_store.py +++ b/src/smif/data_layer/file/file_data_store.py @@ -415,7 +415,11 @@ def _write_data_array(self, path, data_array, timestep=None): def _read_list_of_dicts(self, path): """Read file to list[dict] """ - return pandas.read_csv(path).to_dict('records') + try: + data = pandas.read_csv(path).to_dict('records') + except pandas.errors.EmptyDataError: + data = [] + return data def _write_list_of_dicts(self, path, data): """Write list[dict] to file @@ -491,7 +495,10 @@ def _read_list_of_dicts(self, path): def _write_list_of_dicts(self, path, data): """Write list[dict] to file """ - pandas.DataFrame.from_records(data).to_parquet(path, engine='pyarrow') + if data: + pandas.DataFrame.from_records(data).to_parquet(path, engine='pyarrow') + else: + pandas.DataFrame(columns=['placeholder']).to_parquet(path, engine='pyarrow') def _read_ndarray(self, path): """Read numpy.ndarray diff --git a/tests/data_layer/test_data_store.py b/tests/data_layer/test_data_store.py index a4cb981d4..60627c8a0 100644 --- a/tests/data_layer/test_data_store.py +++ b/tests/data_layer/test_data_store.py @@ -140,6 +140,16 @@ def test_read_write_state(self, handler, state): actual = handler.read_state(modelrun_name, timestep, decision_iteration) assert actual == expected + def test_read_write_empty_state(self, handler): + expected = [] + modelrun_name = 'test_modelrun' + timestep = 2020 + decision_iteration = None + + handler.write_state(expected, modelrun_name, timestep, decision_iteration) + actual = handler.read_state(modelrun_name, timestep, decision_iteration) + assert actual == expected + class TestCoefficients(): """Read/write conversion coefficients From bc8ade269021dfdc95796b5e6cd81e7ed32d0bf2 Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Thu, 11 Apr 2019 11:22:04 +0100 Subject: [PATCH 19/61] Remove workaround for #345 in decision module state-saving --- src/smif/decision/decision.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/smif/decision/decision.py b/src/smif/decision/decision.py index 2d1e53010..5f6c7fb2d 100644 --- a/src/smif/decision/decision.py +++ b/src/smif/decision/decision.py @@ -280,9 +280,6 @@ def get_and_save_decisions(self, iteration, timestep): self.logger.debug("Post-decision state at timestep %s and iteration %s:\n%s", timestep, iteration, post_decision_state) - # Workaround to avoid issue nismod/smif#345 - if not post_decision_state: - post_decision_state = [{'name': '', 'build_year': timestep}] self._store.write_state(post_decision_state, self._modelrun_name, timestep, iteration) def retire_interventions(self, state: List[Tuple[int, str]], From 05a5866c0d8274d8bc41e042c0bbe5ba2cf6301e Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Thu, 11 Apr 2019 15:03:51 +0100 Subject: [PATCH 20/61] Drop conda-forge build from Appveyor --- .appveyor.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index 7cd7b73c5..e07d79ba2 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -18,9 +18,8 @@ install: - conda update conda - conda config --set changeps1 false - conda config --set channel_priority strict - - conda config --add channels conda-forge - conda info -a - - "conda create -n testenv python=%PYTHON_VERSION% \ + - "conda create -n testenv python=3.7 \ fiona \ flask \ gdal \ @@ -29,13 +28,11 @@ install: networkx \ numpy \ pandas \ - pint \ psycopg2 \ pyarrow \ pytest \ python-dateutil \ rtree \ - ruamel.yaml \ shapely \ xarray" - activate testenv From c3c0edd1d901ff0259ec0b592d8fd6c53b49e84b Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Thu, 11 Apr 2019 18:04:39 +0100 Subject: [PATCH 21/61] #351 begin work on functionality to list available results --- src/smif/cli/__init__.py | 56 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/src/smif/cli/__init__.py b/src/smif/cli/__init__.py index eece5707a..77d5f30cf 100644 --- a/src/smif/cli/__init__.py +++ b/src/smif/cli/__init__.py @@ -108,6 +108,57 @@ def list_model_runs(args): print(run['name']) +def list_available_results(args): + """List the available results from previous model runs + """ + store = _get_store(args) + model_run_configs = store.read_model_runs() + + # print(sos_configs['name']) + all_output_names = [] + models = store.read_models() + for model in models: + all_output_names += [output['name'] for output in model['outputs']] + max_output_length = max([len(output_name) for output_name in all_output_names]) + + for run in model_run_configs: + run_name = run['name'] + + available_results = store.available_results(run_name) + timesteps = sorted(run['timesteps']) + + # Name of the model run + print('\nmodel run: {}'.format(run_name)) + + # Name of the associated sos model + sos_model_name = run['sos_model'] + print(' - sos model: {}'.format(run['sos_model'])) + + # Names of each associated sector model + sos_config = store.read_sos_model(sos_model_name) + for sec_model_name in sos_config['sector_models']: + print(' - sector model: {}'.format(sec_model_name)) + + sec_model_config = store.read_model(sec_model_name) + outputs = sec_model_config['outputs'] + + # Names of each output for the sector model + for output in outputs: + output_name = output['name'] + + expected_tuples = [(t, 0, sec_model_name, output_name) for t in timesteps] + times_with_data = [str(t[0]) for t in expected_tuples if + t in available_results] + + res_str = 'results: {}'.format( + ', '.join(times_with_data)) if times_with_data else 'no results' + + base = ' - output:' + ljust_width = len(base) + max_output_length + 7 + ljust_output = '{} {} '.format(base, output_name).ljust(ljust_width, '.') + print('{} {}'.format(ljust_output, res_str)) + + def run_model_runs(args): """Run the model runs as requested. Check if results exist and asks user for permission to overwrite @@ -245,6 +296,11 @@ def parse_arguments(): 'list', help='List available model runs', parents=[parent_parser]) parser_list.set_defaults(func=list_model_runs) + # RESULTS + parser_list = subparsers.add_parser( + 'available_results', help='List available results', parents=[parent_parser]) + parser_list.set_defaults(func=list_available_results) + # APP parser_app = subparsers.add_parser( 'app', help='Open smif app', parents=[parent_parser]) From c63ec42ce05b6f9d513712507e587f081724460b Mon Sep 17 00:00:00 2001 From: Will Usher Date: Fri, 12 Apr 2019 16:23:54 +0100 Subject: [PATCH 22/61] #356 - add section on adding a pre-specified planning strategy to decision docs --- docs/decisions.rst | 68 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/docs/decisions.rst b/docs/decisions.rst index 948d7ea1d..a665d7ea2 100644 --- a/docs/decisions.rst +++ b/docs/decisions.rst @@ -89,3 +89,71 @@ Set of pre-specified or planned interventions $P{\subset}I$ Available interventions $A=P{\cap}I$ Decisions at time t ${D_t}\subset{A}-{D_{t-1}}$ + +Pre-Specified Planning +---------------------- + +In a pre-specified planning strategy, a pipeline of interventions is forced into +the system-of-systems. + +This requires the provision of data and configuration, described step by step below + +- define the set of interventions +- define the planning strategy +- add the pre-specified strategy to the model run configuration + +Define interventions +~~~~~~~~~~~~~~~~~~~~ + +Interventions are associated with an individual model, listed in a csv file and +added to the model configuration as described in the project configuration part +of the documentation . + +Note that each intervention is identified by a ``name`` entry that must be unique +across the system of systems. To ensure this, one suggestion is to use a pre-fix +with the initals of the sector model to which the intervention belows. + +An example intervention file has the headers + +- name +- location +- capacity_value +- capacity_units +- operational_lifetime_value +- operational_lifetime_units +- technical_lifetime_value +- technical_lifetime_units +- capital_cost_value +- capital_cost_units + +and contents as follows:: + + nuclear_large,Oxford,1000,MW,40,years,25,years,2000,million £ + carrington_retire,Oxford,-500,MW,0,years,0,years,0,million £ + +Define the planning strategy +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A planning strategy consists of the set of (name, build_timestep) tuples, where +each name must belong to the set of interventions. + +An example from the sample project looks like this:: + + name,build_year + nuclear_large,2010 + carrington_retire,2015 + ac_line1,2010 + +Add the pre-specified strategy to the model run configuration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The final step is to add the pre-specified planning stategy to the run +configuration:: + + strategies: + - type: pre-specified-planning + description: Future energy plan + filename: energy_supply/strategies/plan.csv + +The entry should take the above format, where the filename entry refers to the +planning strategy file composed in step two. \ No newline at end of file From e568148cdf9f003ba64cf6a4ccf0bb7508cc7eb2 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Mon, 15 Apr 2019 10:47:10 +0100 Subject: [PATCH 23/61] smif list will now append an asterisk to model run names with complete results available --- src/smif/cli/__init__.py | 74 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/src/smif/cli/__init__.py b/src/smif/cli/__init__.py index 77d5f30cf..6024368dc 100644 --- a/src/smif/cli/__init__.py +++ b/src/smif/cli/__init__.py @@ -65,6 +65,7 @@ """ from __future__ import print_function +import itertools import logging import os import sys @@ -104,8 +105,79 @@ def list_model_runs(args): """ store = _get_store(args) model_run_configs = store.read_model_runs() + + print('Model runs with an asterisk (*) have complete results available\n') + for run in model_run_configs: - print(run['name']) + run_name = run['name'] + + expected_results = _get_canonical_expected_results(store, run_name) + available_results = _get_canonical_available_results(store, run_name) + + complete = ' *' if expected_results == available_results else '' + + print('{}{}'.format(run_name, complete)) + + +def _get_canonical_expected_results(store, model_run_name): + """Helper to list the results that are expected from a model run, collapsing all decision + iterations. + + For a complete model run, this would coincide with the unique list returned + from `available_results`, where all decision iterations are set to 0. + + This method is used to determine whether a model run is complete, given that it is + impossible to know how many decision iterations to expect: we simply check that each + expected timestep has been completed. + """ + + # Model results are returned as a tuple + # (timestep, decision_it, sec_model_name, output_name) + # so we first build the full list of expected results tuples. + + expected_results = [] + + # Get the sos model name given the model run name, and the full list of timesteps + model_run = store.read_model_run(model_run_name) + timesteps = sorted(model_run['timesteps']) + sos_model_name = model_run['sos_model'] + + # Get the list of sector models in the sos model + sos_config = store.read_sos_model(sos_model_name) + + # For each sector model, get the outputs and create the tuples + for sec_model_name in sos_config['sector_models']: + + sec_model_config = store.read_model(sec_model_name) + outputs = sec_model_config['outputs'] + + for output, t in itertools.product(outputs, timesteps): + expected_results.append((t, 0, sec_model_name, output['name'])) + + # Return as a set to remove duplicates + return set(expected_results) + + +def _get_canonical_available_results(store, model_run_name): + """Helper to list the results that are actually available from a model run, collapsing all + decision iterations. + + This is the unique list from calling `available_results`, with all decision iterations set + to 0. + + This method is used to determine whether a model run is complete, given that it is + impossible to know how many decision iterations to expect: we simply check that each + expected timestep has been completed. + """ + available_results = store.available_results(model_run_name) + + canonical_list = [] + + for t, d, sec_model_name, output_name in available_results: + canonical_list.append((t, 0, sec_model_name, output_name)) + + # Return as a set to remove duplicates + return set(canonical_list) def list_available_results(args): From e820c83cb33411bd7e6c14261edbe7581d256f12 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Mon, 15 Apr 2019 14:41:42 +0100 Subject: [PATCH 24/61] #351 available_results now works with iterations and individual model runs --- src/smif/cli/__init__.py | 96 ++++++++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 39 deletions(-) diff --git a/src/smif/cli/__init__.py b/src/smif/cli/__init__.py index 6024368dc..d0b573923 100644 --- a/src/smif/cli/__init__.py +++ b/src/smif/cli/__init__.py @@ -180,55 +180,69 @@ def _get_canonical_available_results(store, model_run_name): return set(canonical_list) -def list_available_results(args): - """List the available results from previous model runs +def _list_available_results(store, run): + """Helper to print the available results for a specific model run config. """ - store = _get_store(args) - model_run_configs = store.read_model_runs() - # print(sos_configs['name']) - all_output_names = [] - models = store.read_models() - for model in models: - all_output_names += [output['name'] for output in model['outputs']] - max_output_length = max([len(output_name) for output_name in all_output_names]) + run_name = run['name'] - for run in model_run_configs: - run_name = run['name'] + available_results = store.available_results(run_name) + + # Name of the model run + print('\nmodel run: {}'.format(run_name)) + + # Name of the associated sos model + sos_model_name = run['sos_model'] + print('{}- sos model: {}'.format(' ' * 2, run['sos_model'])) + + # Names of each associated sector model + sos_config = store.read_sos_model(sos_model_name) + for sec_model_name in sos_config['sector_models']: + print('{}- sector model: {}'.format(' ' * 4, sec_model_name)) + + sec_model_config = store.read_model(sec_model_name) + outputs = sec_model_config['outputs'] + + # Names of each output for the sector model + for output in outputs: + output_name = output['name'] + print('{}- output: {}'.format(' ' * 6, output_name)) - available_results = store.available_results(run_name) - timesteps = sorted(run['timesteps']) + output_results = [res for res in available_results if + res[2] == sec_model_name and res[3] == output_name] - # Name of the model run - print('\nmodel run: {}'.format(run_name)) + # Sorted list of all the decision iterations matching this model and output + decs = sorted(list(set([res[1] for res in output_results]))) - # Name of the associated sos model - sos_model_name = run['sos_model'] - print(' - sos model: {}'.format(run['sos_model'])) + if len(decs) == 0: + print('{}- no results'.format(' ' * 8)) - # Names of each associated sector model - sos_config = store.read_sos_model(sos_model_name) - for sec_model_name in sos_config['sector_models']: - print(' - sector model: {}'.format(sec_model_name)) + for dec in decs: + base_str = '{}- decision {}:'.format(' ' * 8, dec) - sec_model_config = store.read_model(sec_model_name) - outputs = sec_model_config['outputs'] + # Get the timesteps in the results corresponding to this decision iteration + timesteps = [t for t, d, _sec, _out in output_results if d == dec] + time_strings = [str(t) for t in sorted(timesteps)] - # Names of each output for the sector model - for output in outputs: - output_name = output['name'] + res_str = ', '.join(time_strings) if len(timesteps) > 0 else 'no results' - expected_tuples = [(t, 0, sec_model_name, output_name) for t in timesteps] - times_with_data = [str(t[0]) for t in expected_tuples if - t in available_results] + print('{} {}'.format(base_str, res_str)) - res_str = 'results: {}'.format( - ', '.join(times_with_data)) if times_with_data else 'no results' - base = ' - output:' - ljust_width = len(base) + max_output_length + 7 - ljust_output = '{} {} '.format(base, output_name).ljust(ljust_width, '.') - print('{} {}'.format(ljust_output, res_str)) +def list_available_results(args): + """List the available results from previous model runs. A specific model run may be + specified my the subcommand `model_run`, else all available information is displayed. + """ + + store = _get_store(args) + + if args.model_run: + list_of_model_runs = [store.read_model_run(args.model_run)] + else: + list_of_model_runs = store.read_model_runs() + + for model_run in list_of_model_runs: + _list_available_results(store, model_run) def run_model_runs(args): @@ -369,9 +383,13 @@ def parse_arguments(): parser_list.set_defaults(func=list_model_runs) # RESULTS - parser_list = subparsers.add_parser( + parser_results = subparsers.add_parser( 'available_results', help='List available results', parents=[parent_parser]) - parser_list.set_defaults(func=list_available_results) + parser_results.set_defaults(func=list_available_results) + parser_results.add_argument('model_run', + help="Name of the model run to list available results", + nargs='?', + default=None) # APP parser_app = subparsers.add_parser( From 9bbbeaa6a917f5549c73aec98dc26fb16007a1e1 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Tue, 16 Apr 2019 10:38:47 +0100 Subject: [PATCH 25/61] #351 Make listing complete results optional Now requires smif list <-c/--complete> --- src/smif/cli/__init__.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/smif/cli/__init__.py b/src/smif/cli/__init__.py index d0b573923..3b67e09cf 100644 --- a/src/smif/cli/__init__.py +++ b/src/smif/cli/__init__.py @@ -101,22 +101,27 @@ def list_model_runs(args): - """List the model runs defined in the config + """List the model runs defined in the config, optionally indicating whether complete + results exist. """ store = _get_store(args) model_run_configs = store.read_model_runs() - print('Model runs with an asterisk (*) have complete results available\n') + if args.complete: + print('Model runs with an asterisk (*) have complete results available\n') for run in model_run_configs: run_name = run['name'] - expected_results = _get_canonical_expected_results(store, run_name) - available_results = _get_canonical_available_results(store, run_name) + if args.complete: + expected_results = _get_canonical_expected_results(store, run_name) + available_results = _get_canonical_available_results(store, run_name) - complete = ' *' if expected_results == available_results else '' + complete = ' *' if expected_results == available_results else '' - print('{}{}'.format(run_name, complete)) + print('{}{}'.format(run_name, complete)) + else: + print(run_name) def _get_canonical_expected_results(store, model_run_name): @@ -381,6 +386,9 @@ def parse_arguments(): parser_list = subparsers.add_parser( 'list', help='List available model runs', parents=[parent_parser]) parser_list.set_defaults(func=list_model_runs) + parser_list.add_argument('-c', '--complete', + help="Show which model runs have complete results", + action='store_true') # RESULTS parser_results = subparsers.add_parser( From 8832224d0a87a5e9f924ecb6036966ab4a04def9 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Tue, 16 Apr 2019 11:48:57 +0100 Subject: [PATCH 26/61] #351 Migrate canonical results methods to store and add tests --- src/smif/cli/__init__.py | 66 +--------------------------- src/smif/data_layer/store.py | 78 +++++++++++++++++++++++++++++++++- tests/data_layer/test_store.py | 36 ++++++++++++++++ 3 files changed, 115 insertions(+), 65 deletions(-) diff --git a/src/smif/cli/__init__.py b/src/smif/cli/__init__.py index 3b67e09cf..3bbba41ee 100644 --- a/src/smif/cli/__init__.py +++ b/src/smif/cli/__init__.py @@ -65,7 +65,6 @@ """ from __future__ import print_function -import itertools import logging import os import sys @@ -114,8 +113,8 @@ def list_model_runs(args): run_name = run['name'] if args.complete: - expected_results = _get_canonical_expected_results(store, run_name) - available_results = _get_canonical_available_results(store, run_name) + expected_results = store.get_canonical_expected_results(run_name) + available_results = store.get_canonical_available_results(run_name) complete = ' *' if expected_results == available_results else '' @@ -124,67 +123,6 @@ def list_model_runs(args): print(run_name) -def _get_canonical_expected_results(store, model_run_name): - """Helper to list the results that are expected from a model run, collapsing all decision - iterations. - - For a complete model run, this would coincide with the unique list returned - from `available_results`, where all decision iterations are set to 0. - - This method is used to determine whether a model run is complete, given that it is - impossible to know how many decision iterations to expect: we simply check that each - expected timestep has been completed. - """ - - # Model results are returned as a tuple - # (timestep, decision_it, sec_model_name, output_name) - # so we first build the full list of expected results tuples. - - expected_results = [] - - # Get the sos model name given the model run name, and the full list of timesteps - model_run = store.read_model_run(model_run_name) - timesteps = sorted(model_run['timesteps']) - sos_model_name = model_run['sos_model'] - - # Get the list of sector models in the sos model - sos_config = store.read_sos_model(sos_model_name) - - # For each sector model, get the outputs and create the tuples - for sec_model_name in sos_config['sector_models']: - - sec_model_config = store.read_model(sec_model_name) - outputs = sec_model_config['outputs'] - - for output, t in itertools.product(outputs, timesteps): - expected_results.append((t, 0, sec_model_name, output['name'])) - - # Return as a set to remove duplicates - return set(expected_results) - - -def _get_canonical_available_results(store, model_run_name): - """Helper to list the results that are actually available from a model run, collapsing all - decision iterations. - - This is the unique list from calling `available_results`, with all decision iterations set - to 0. - - This method is used to determine whether a model run is complete, given that it is - impossible to know how many decision iterations to expect: we simply check that each - expected timestep has been completed. - """ - available_results = store.available_results(model_run_name) - - canonical_list = [] - - for t, d, sec_model_name, output_name in available_results: - canonical_list.append((t, 0, sec_model_name, output_name)) - - # Return as a set to remove duplicates - return set(canonical_list) - - def _list_available_results(store, run): """Helper to print the available results for a specific model run config. """ diff --git a/src/smif/data_layer/store.py b/src/smif/data_layer/store.py index 0f3d753ff..8c2a979cd 100644 --- a/src/smif/data_layer/store.py +++ b/src/smif/data_layer/store.py @@ -14,13 +14,13 @@ When unable to read data e.g. unable to handle file type or connect to database """ +import itertools from copy import deepcopy from logging import getLogger from operator import itemgetter from typing import Dict, List, Optional import numpy as np # type: ignore - from smif.data_layer import DataArray from smif.data_layer.abstract_data_store import DataStore from smif.data_layer.abstract_metadata_store import MetadataStore @@ -871,6 +871,82 @@ def prepare_warm_start(self, model_run_name): else: max_timestep = None return max_timestep + + def get_canonical_available_results(self, model_run_name): + """List the results that are available from a model run, collapsing all decision + iterations. + + This is the unique items from calling `available_results`, with all decision iterations + set to 0. + + This method is used to determine whether a model run is complete, given that it is + impossible to know how many decision iterations to expect: we simply check that each + expected timestep has been completed. + + Parameters + ---------- + model_run_name : str + + Returns + ------- + set Set of tuples representing available results + """ + + available_results = self.available_results(model_run_name) + + canonical_list = [] + + for t, d, sec_model_name, output_name in available_results: + canonical_list.append((t, 0, sec_model_name, output_name)) + + # Return as a set to remove duplicates + return set(canonical_list) + + def get_canonical_expected_results(self, model_run_name): + """List the results that are expected from a model run, collapsing all decision + iterations. + + For a complete model run, this would coincide with the unique list returned from + `available_results`, where all decision iterations are set to 0. + + This method is used to determine whether a model run is complete, given that it is + impossible to know how many decision iterations to expect: we simply check that each + expected timestep has been completed. + + Parameters + ---------- + model_run_name : str + + Returns + ------- + set Set of tuples representing expected results + """ + + # Model results are returned as a tuple + # (timestep, decision_it, sec_model_name, output_name) + # so we first build the full list of expected results tuples. + + expected_results = [] + + # Get the sos model name given the model run name, and the full list of timesteps + model_run = self.read_model_run(model_run_name) + timesteps = sorted(model_run['timesteps']) + sos_model_name = model_run['sos_model'] + + # Get the list of sector models in the sos model + sos_config = self.read_sos_model(sos_model_name) + + # For each sector model, get the outputs and create the tuples + for sec_model_name in sos_config['sector_models']: + + sec_model_config = self.read_model(sec_model_name) + outputs = sec_model_config['outputs'] + + for output, t in itertools.product(outputs, timesteps): + expected_results.append((t, 0, sec_model_name, output['name'])) + + # Return as a set to remove duplicates + return set(expected_results) # endregion # region data store utilities diff --git a/tests/data_layer/test_store.py b/tests/data_layer/test_store.py index e4faa2530..e8539376b 100644 --- a/tests/data_layer/test_store.py +++ b/tests/data_layer/test_store.py @@ -314,3 +314,39 @@ def test_warm_start(self, store, sample_results): timestep = 2020 store.write_results(sample_results, 'test_model_run', 'model_name', timestep) assert store.prepare_warm_start('test_model_run') == timestep + + def test_get_canonical_available_results(self, store, sample_results): + + store.write_results(sample_results, 'model_run_name', 'model_name', 2010, 0) + store.write_results(sample_results, 'model_run_name', 'model_name', 2015, 0) + store.write_results(sample_results, 'model_run_name', 'model_name', 2010, 1) + store.write_results(sample_results, 'model_run_name', 'model_name', 2015, 1) + store.write_results(sample_results, 'model_run_name', 'model_name', 2020, 1) + + output_name = sample_results.spec.name + + correct_results = set() + correct_results.add((2010, 0, 'model_name', output_name)) + correct_results.add((2015, 0, 'model_name', output_name)) + correct_results.add((2020, 0, 'model_name', output_name)) + + assert(store.get_canonical_available_results('model_run_name') == correct_results) + + def test_get_canonical_expected_results( + self, store, sample_dimensions, get_sos_model, get_sector_model, + energy_supply_sector_model, model_run + ): + + for dim in sample_dimensions: + store.write_dimension(dim) + store.write_sos_model(get_sos_model) + store.write_model_run(model_run) + store.write_model(get_sector_model) + store.write_model(energy_supply_sector_model) + + correct_results = set() + correct_results.add((2015, 0, 'energy_demand', 'gas_demand')) + correct_results.add((2020, 0, 'energy_demand', 'gas_demand')) + correct_results.add((2025, 0, 'energy_demand', 'gas_demand')) + + assert(store.get_canonical_expected_results(model_run['name']) == correct_results) From ffdf0bfb583beb7b29afa7e96fc92ee71c3707d9 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Tue, 16 Apr 2019 12:01:17 +0100 Subject: [PATCH 27/61] #351 Simplify available_results: now requires model_run_name --- src/smif/cli/__init__.py | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/src/smif/cli/__init__.py b/src/smif/cli/__init__.py index 3bbba41ee..d1eed13b7 100644 --- a/src/smif/cli/__init__.py +++ b/src/smif/cli/__init__.py @@ -123,16 +123,15 @@ def list_model_runs(args): print(run_name) -def _list_available_results(store, run): +def _list_available_results(store, model_run_name): """Helper to print the available results for a specific model run config. """ - run_name = run['name'] - - available_results = store.available_results(run_name) + run = store.read_model_run(model_run_name) + available_results = store.available_results(model_run_name) # Name of the model run - print('\nmodel run: {}'.format(run_name)) + print('\nmodel run: {}'.format(model_run_name)) # Name of the associated sos model sos_model_name = run['sos_model'] @@ -178,14 +177,7 @@ def list_available_results(args): """ store = _get_store(args) - - if args.model_run: - list_of_model_runs = [store.read_model_run(args.model_run)] - else: - list_of_model_runs = store.read_model_runs() - - for model_run in list_of_model_runs: - _list_available_results(store, model_run) + _list_available_results(store, args.model_run) def run_model_runs(args): @@ -333,9 +325,7 @@ def parse_arguments(): 'available_results', help='List available results', parents=[parent_parser]) parser_results.set_defaults(func=list_available_results) parser_results.add_argument('model_run', - help="Name of the model run to list available results", - nargs='?', - default=None) + help="Name of the model run to list available results") # APP parser_app = subparsers.add_parser( From f4114cbd9320415303a2c28067f232eb0c9aa41c Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Tue, 16 Apr 2019 13:57:52 +0100 Subject: [PATCH 28/61] #351 Refactor to better match other naming --- src/smif/cli/__init__.py | 4 ++-- src/smif/data_layer/store.py | 4 ++-- tests/data_layer/test_store.py | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/smif/cli/__init__.py b/src/smif/cli/__init__.py index d1eed13b7..c6a2a1e43 100644 --- a/src/smif/cli/__init__.py +++ b/src/smif/cli/__init__.py @@ -113,8 +113,8 @@ def list_model_runs(args): run_name = run['name'] if args.complete: - expected_results = store.get_canonical_expected_results(run_name) - available_results = store.get_canonical_available_results(run_name) + expected_results = store.canonical_expected_results(run_name) + available_results = store.canonical_available_results(run_name) complete = ' *' if expected_results == available_results else '' diff --git a/src/smif/data_layer/store.py b/src/smif/data_layer/store.py index 8c2a979cd..8f92b4d52 100644 --- a/src/smif/data_layer/store.py +++ b/src/smif/data_layer/store.py @@ -872,7 +872,7 @@ def prepare_warm_start(self, model_run_name): max_timestep = None return max_timestep - def get_canonical_available_results(self, model_run_name): + def canonical_available_results(self, model_run_name): """List the results that are available from a model run, collapsing all decision iterations. @@ -902,7 +902,7 @@ def get_canonical_available_results(self, model_run_name): # Return as a set to remove duplicates return set(canonical_list) - def get_canonical_expected_results(self, model_run_name): + def canonical_expected_results(self, model_run_name): """List the results that are expected from a model run, collapsing all decision iterations. diff --git a/tests/data_layer/test_store.py b/tests/data_layer/test_store.py index e8539376b..528e3685a 100644 --- a/tests/data_layer/test_store.py +++ b/tests/data_layer/test_store.py @@ -315,7 +315,7 @@ def test_warm_start(self, store, sample_results): store.write_results(sample_results, 'test_model_run', 'model_name', timestep) assert store.prepare_warm_start('test_model_run') == timestep - def test_get_canonical_available_results(self, store, sample_results): + def test_canonical_available_results(self, store, sample_results): store.write_results(sample_results, 'model_run_name', 'model_name', 2010, 0) store.write_results(sample_results, 'model_run_name', 'model_name', 2015, 0) @@ -330,9 +330,9 @@ def test_get_canonical_available_results(self, store, sample_results): correct_results.add((2015, 0, 'model_name', output_name)) correct_results.add((2020, 0, 'model_name', output_name)) - assert(store.get_canonical_available_results('model_run_name') == correct_results) + assert(store.canonical_available_results('model_run_name') == correct_results) - def test_get_canonical_expected_results( + def test_canonical_expected_results( self, store, sample_dimensions, get_sos_model, get_sector_model, energy_supply_sector_model, model_run ): @@ -349,4 +349,4 @@ def test_get_canonical_expected_results( correct_results.add((2020, 0, 'energy_demand', 'gas_demand')) correct_results.add((2025, 0, 'energy_demand', 'gas_demand')) - assert(store.get_canonical_expected_results(model_run['name']) == correct_results) + assert(store.canonical_expected_results(model_run['name']) == correct_results) From 40ce6400e35134f08c122535ea1115de6ad50a9a Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Tue, 16 Apr 2019 13:58:32 +0100 Subject: [PATCH 29/61] #351 Refactor list_available_results in terms of new store method Method now just calls canonical_expected_results and available_results, and compares the two lists of tuples --- src/smif/cli/__init__.py | 62 ++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 37 deletions(-) diff --git a/src/smif/cli/__init__.py b/src/smif/cli/__init__.py index c6a2a1e43..80ade4e73 100644 --- a/src/smif/cli/__init__.py +++ b/src/smif/cli/__init__.py @@ -123,38 +123,35 @@ def list_model_runs(args): print(run_name) -def _list_available_results(store, model_run_name): - """Helper to print the available results for a specific model run config. +def list_available_results(args): + """List the available results from previous model runs. A specific model run may be + specified my the subcommand `model_run`, else all available information is displayed. """ - run = store.read_model_run(model_run_name) - available_results = store.available_results(model_run_name) - - # Name of the model run - print('\nmodel run: {}'.format(model_run_name)) + store = _get_store(args) + expected = store.canonical_expected_results(args.model_run) + available = store.available_results(args.model_run) - # Name of the associated sos model - sos_model_name = run['sos_model'] + # Print run and sos model + run = store.read_model_run(args.model_run) + print('\nmodel run: {}'.format(args.model_run)) print('{}- sos model: {}'.format(' ' * 2, run['sos_model'])) - # Names of each associated sector model - sos_config = store.read_sos_model(sos_model_name) - for sec_model_name in sos_config['sector_models']: - print('{}- sector model: {}'.format(' ' * 4, sec_model_name)) + # List of expected sector models + sec_models = sorted({sec for _t, _d, sec, _out in expected}) - sec_model_config = store.read_model(sec_model_name) - outputs = sec_model_config['outputs'] + for sec_model in sec_models: + print('{}- sector model: {}'.format(' ' * 4, sec_model)) - # Names of each output for the sector model - for output in outputs: - output_name = output['name'] - print('{}- output: {}'.format(' ' * 6, output_name)) + # List expected outputs for this sector model + outputs = sorted({out for _t, _d, sec, out in expected if sec == sec_model}) - output_results = [res for res in available_results if - res[2] == sec_model_name and res[3] == output_name] + for output in outputs: + print('{}- output: {}'.format(' ' * 6, output)) - # Sorted list of all the decision iterations matching this model and output - decs = sorted(list(set([res[1] for res in output_results]))) + # List available decisions for this sector model and output + decs = sorted({d for _t, d, sec, out in available if + sec == sec_model and out == output}) if len(decs) == 0: print('{}- no results'.format(' ' * 8)) @@ -162,24 +159,15 @@ def _list_available_results(store, model_run_name): for dec in decs: base_str = '{}- decision {}:'.format(' ' * 8, dec) - # Get the timesteps in the results corresponding to this decision iteration - timesteps = [t for t, d, _sec, _out in output_results if d == dec] - time_strings = [str(t) for t in sorted(timesteps)] - - res_str = ', '.join(time_strings) if len(timesteps) > 0 else 'no results' + # List available time steps for this decision, sector model and output + ts = sorted({t for t, d, sec, out in available if + d == dec and sec == sec_model and out == output}) + assert(len(ts) > 0), "If a decision is available, so is at least one time step" + res_str = ', '.join([str(t) for t in ts]) print('{} {}'.format(base_str, res_str)) -def list_available_results(args): - """List the available results from previous model runs. A specific model run may be - specified my the subcommand `model_run`, else all available information is displayed. - """ - - store = _get_store(args) - _list_available_results(store, args.model_run) - - def run_model_runs(args): """Run the model runs as requested. Check if results exist and asks user for permission to overwrite From 4d83e5323d03dab8cc8129aa29d39a5303588341 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Tue, 16 Apr 2019 14:23:31 +0100 Subject: [PATCH 30/61] #351 Add cli tests for new functionality --- src/smif/cli/__init__.py | 3 +-- tests/cli/test_cli.py | 48 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/src/smif/cli/__init__.py b/src/smif/cli/__init__.py index 80ade4e73..1f27dddb2 100644 --- a/src/smif/cli/__init__.py +++ b/src/smif/cli/__init__.py @@ -124,8 +124,7 @@ def list_model_runs(args): def list_available_results(args): - """List the available results from previous model runs. A specific model run may be - specified my the subcommand `model_run`, else all available information is displayed. + """List the available results for a specified model run. """ store = _get_store(args) diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index d756f0c1b..e6ff0052b 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -7,9 +7,8 @@ from tempfile import TemporaryDirectory from unittest.mock import call, patch -from pytest import fixture - import smif +from pytest import fixture from smif.cli import confirm, parse_arguments, setup_project_folder @@ -110,6 +109,51 @@ def test_fixture_list_runs(tmp_sample_project): assert "energy_water_cp_cr" in str(output.stdout) assert "energy_central" in str(output.stdout) + # Run energy_central and re-check output with optional flag for completed results + subprocess.run(["smif", "run", "energy_central", "-d", config_dir], stdout=subprocess.PIPE) + output = subprocess.run(["smif", "list", "-c", "-d", config_dir], stdout=subprocess.PIPE) + assert "energy_central *" in str(output.stdout) + + +def test_fixture_available_results(tmp_sample_project): + """Test running the filesystem-based single_run fixture + """ + config_dir = tmp_sample_project + output = subprocess.run(["smif", "available_results", "energy_central", "-d", config_dir], + stdout=subprocess.PIPE) + + out_str = str(output.stdout) + assert(out_str.count('model run: energy_central') == 1) + assert(out_str.count('sos model: energy') == 1) + assert(out_str.count('sector model:') == 1) + assert(out_str.count('output:') == 2) + assert(out_str.count('output: cost') == 1) + assert(out_str.count('output: water_demand') == 1) + assert(out_str.count('no results') == 2) + assert(out_str.count('decision') == 0) + + # Run energy_central and re-check output with optional flag for completed results + subprocess.run(["smif", "run", "energy_central", "-d", config_dir], stdout=subprocess.PIPE) + output = subprocess.run(["smif", "available_results", "energy_central", "-d", config_dir], + stdout=subprocess.PIPE) + + out_str = str(output.stdout) + assert(out_str.count('model run: energy_central') == 1) + assert(out_str.count('sos model: energy') == 1) + assert(out_str.count('sector model:') == 1) + assert(out_str.count('output:') == 2) + assert(out_str.count('output: cost') == 1) + assert(out_str.count('output: water_demand') == 1) + assert(out_str.count('no results') == 0) + assert(out_str.count('decision') == 8) + assert(out_str.count('decision 1') == 2) + assert(out_str.count('decision 2') == 2) + assert(out_str.count('decision 3') == 2) + assert(out_str.count('decision 4') == 2) + assert(out_str.count(': 2010') == 4) + assert(out_str.count(': 2015') == 2) + assert(out_str.count(': 2020') == 2) + def test_setup_project_folder(): """Test contents of the setup project folder From 73881b7dbc96dec3c0df44dcaf49c13ea842aeec Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Tue, 16 Apr 2019 17:21:35 +0100 Subject: [PATCH 31/61] #352 Implement and test canonical_missing_results --- src/smif/data_layer/store.py | 20 ++++++++++++++++++++ tests/data_layer/test_store.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/src/smif/data_layer/store.py b/src/smif/data_layer/store.py index 8f92b4d52..9aa125e99 100644 --- a/src/smif/data_layer/store.py +++ b/src/smif/data_layer/store.py @@ -947,6 +947,26 @@ def canonical_expected_results(self, model_run_name): # Return as a set to remove duplicates return set(expected_results) + + def canonical_missing_results(self, model_run_name): + """List the results that are missing from a model run, collapsing all decision + iterations. + + For a complete model run, this is what is left after removing + canonical_available_results from canonical_expected_results. + + Parameters + ---------- + model_run_name : str + + Returns + ------- + set Set of tuples representing missing results + """ + + return self.canonical_expected_results( + model_run_name) - self.canonical_available_results(model_run_name) + # endregion # region data store utilities diff --git a/tests/data_layer/test_store.py b/tests/data_layer/test_store.py index 528e3685a..a42e33959 100644 --- a/tests/data_layer/test_store.py +++ b/tests/data_layer/test_store.py @@ -3,12 +3,15 @@ Many methods simply proxy to config/metadata/data store implementations, but there is some cross-coordination and there are some convenience methods implemented at this layer. """ +import numpy as np import numpy.testing from pytest import fixture from smif.data_layer import Store +from smif.data_layer.data_array import DataArray from smif.data_layer.memory_interface import (MemoryConfigStore, MemoryDataStore, MemoryMetadataStore) +from smif.metadata import Spec @fixture @@ -350,3 +353,32 @@ def test_canonical_expected_results( correct_results.add((2025, 0, 'energy_demand', 'gas_demand')) assert(store.canonical_expected_results(model_run['name']) == correct_results) + + def test_canonical_missing_results( + self, store, sample_dimensions, get_sos_model, get_sector_model, + energy_supply_sector_model, model_run + ): + + for dim in sample_dimensions: + store.write_dimension(dim) + store.write_sos_model(get_sos_model) + store.write_model_run(model_run) + store.write_model(get_sector_model) + store.write_model(energy_supply_sector_model) + + # All the results are missing + missing_results = set() + missing_results.add((2015, 0, 'energy_demand', 'gas_demand')) + missing_results.add((2020, 0, 'energy_demand', 'gas_demand')) + missing_results.add((2025, 0, 'energy_demand', 'gas_demand')) + + assert(store.canonical_missing_results(model_run['name']) == missing_results) + + spec = Spec(name='gas_demand', dtype='float') + data = np.array(1, dtype=float) + fake_data = DataArray(spec, data) + + store.write_results(fake_data, model_run['name'], 'energy_demand', 2015, 0) + missing_results.remove((2015, 0, 'energy_demand', 'gas_demand')) + + assert(store.canonical_missing_results(model_run['name']) == missing_results) From 856bf1ad5533165563258386960dc47cd91dd8d5 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Tue, 16 Apr 2019 17:40:04 +0100 Subject: [PATCH 32/61] #352 Add cli option for listing missing results, with tests --- src/smif/cli/__init__.py | 61 +++++++++++++++++++++++++++++++++++----- tests/cli/test_cli.py | 35 ++++++++++++++++++++++- 2 files changed, 88 insertions(+), 8 deletions(-) diff --git a/src/smif/cli/__init__.py b/src/smif/cli/__init__.py index 1f27dddb2..c89495c82 100644 --- a/src/smif/cli/__init__.py +++ b/src/smif/cli/__init__.py @@ -86,14 +86,13 @@ except ImportError: import thread as _thread - try: import win32api + USE_WIN32 = True except ImportError: USE_WIN32 = False - __author__ = "Will Usher, Tom Russell" __copyright__ = "Will Usher, Tom Russell" __license__ = "mit" @@ -161,12 +160,50 @@ def list_available_results(args): # List available time steps for this decision, sector model and output ts = sorted({t for t, d, sec, out in available if d == dec and sec == sec_model and out == output}) - assert(len(ts) > 0), "If a decision is available, so is at least one time step" + assert (len( + ts) > 0), "If a decision is available, so is at least one time step" res_str = ', '.join([str(t) for t in ts]) print('{} {}'.format(base_str, res_str)) +def list_missing_results(args): + """List the missing results for a specified model run. + """ + + store = _get_store(args) + expected = store.canonical_expected_results(args.model_run) + missing = store.canonical_missing_results(args.model_run) + + # Print run and sos model + run = store.read_model_run(args.model_run) + print('\nmodel run: {}'.format(args.model_run)) + print('{}- sos model: {}'.format(' ' * 2, run['sos_model'])) + + # List of expected sector models + sec_models = sorted({sec for _t, _d, sec, _out in expected}) + + for sec_model in sec_models: + print('{}- sector model: {}'.format(' ' * 4, sec_model)) + + # List expected outputs for this sector model + outputs = sorted({out for _t, _d, sec, out in expected if sec == sec_model}) + + for output in outputs: + print('{}- output: {}'.format(' ' * 6, output)) + + # List missing time steps for this sector model and output + ts = sorted({t for t, d, sec, out in missing if + sec == sec_model and out == output}) + + if len(ts) == 0: + print('{}- no missing results'.format(' ' * 8)) + else: + base_str = '{}- results missing for:'.format(' ' * 8) + res_str = ', '.join([str(t) for t in ts]) + print('{} {}'.format(base_str, res_str)) + + def run_model_runs(args): """Run the model runs as requested. Check if results exist and asks user for permission to overwrite @@ -308,11 +345,21 @@ def parse_arguments(): action='store_true') # RESULTS - parser_results = subparsers.add_parser( + parser_available_results = subparsers.add_parser( 'available_results', help='List available results', parents=[parent_parser]) - parser_results.set_defaults(func=list_available_results) - parser_results.add_argument('model_run', - help="Name of the model run to list available results") + parser_available_results.set_defaults(func=list_available_results) + parser_available_results.add_argument( + 'model_run', + help="Name of the model run to list available results" + ) + + parser_missing_results = subparsers.add_parser( + 'missing_results', help='List missing results', parents=[parent_parser]) + parser_missing_results.set_defaults(func=list_missing_results) + parser_missing_results.add_argument( + 'model_run', + help="Name of the model run to list missing results" + ) # APP parser_app = subparsers.add_parser( diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index e6ff0052b..2cc4478b8 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -116,7 +116,7 @@ def test_fixture_list_runs(tmp_sample_project): def test_fixture_available_results(tmp_sample_project): - """Test running the filesystem-based single_run fixture + """Test cli for listing available results """ config_dir = tmp_sample_project output = subprocess.run(["smif", "available_results", "energy_central", "-d", config_dir], @@ -155,6 +155,39 @@ def test_fixture_available_results(tmp_sample_project): assert(out_str.count(': 2020') == 2) +def test_fixture_missing_results(tmp_sample_project): + """Test cli for listing missing results + """ + config_dir = tmp_sample_project + output = subprocess.run(["smif", "missing_results", "energy_central", "-d", config_dir], + stdout=subprocess.PIPE) + + out_str = str(output.stdout) + assert(out_str.count('model run: energy_central') == 1) + assert(out_str.count('sos model: energy') == 1) + assert(out_str.count('sector model:') == 1) + assert(out_str.count('output:') == 2) + assert(out_str.count('output: cost') == 1) + assert(out_str.count('output: water_demand') == 1) + assert(out_str.count('no missing results') == 0) + assert(out_str.count('results missing for:') == 2) + + # Run energy_central and re-check output with optional flag for completed results + subprocess.run(["smif", "run", "energy_central", "-d", config_dir], stdout=subprocess.PIPE) + output = subprocess.run(["smif", "missing_results", "energy_central", "-d", config_dir], + stdout=subprocess.PIPE) + + out_str = str(output.stdout) + assert(out_str.count('model run: energy_central') == 1) + assert(out_str.count('sos model: energy') == 1) + assert(out_str.count('sector model:') == 1) + assert(out_str.count('output:') == 2) + assert(out_str.count('output: cost') == 1) + assert(out_str.count('output: water_demand') == 1) + assert(out_str.count('no missing results') == 2) + assert(out_str.count('results missing for:') == 0) + + def test_setup_project_folder(): """Test contents of the setup project folder """ From c957cef760b8563c19b079496716a241ea8fabf3 Mon Sep 17 00:00:00 2001 From: Thibault Lestang Date: Wed, 17 Apr 2019 11:48:47 +0100 Subject: [PATCH 33/61] Add draft for Store.get_results_darray method --- src/smif/data_layer/store.py | 37 ++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/smif/data_layer/store.py b/src/smif/data_layer/store.py index 9aa125e99..dade90f2c 100644 --- a/src/smif/data_layer/store.py +++ b/src/smif/data_layer/store.py @@ -985,3 +985,40 @@ def _pick_from_list(list_of_dicts, name): if 'name' in item and item['name'] == name: return item return None + + def get_result_darray(self, + timesteps, + model_name, + output_name, + model_run_name, + decision_iteration): + """ Read results and build the corresponding DataArray + + Returns + ------- + DataArray + """ + model = self.read_model(model_name) + i=0 + for output in model['outputs']: + if(output_name == model['outputs'][i]['name']): + output_spec = Spec.from_dict(output) + data_container = np.zeros + for timestep in timesteps: + # ------------------------------------------------------- + dArray = self.read_results(model_run_name, model_name, + output_spec, timestep, + decision_iteration) + if 'result_data' in locals(): + result_data = np.vstack([result_data,dArray.data]) + else: + result_data = dArray.data + # --------------------------------------------------- + output_dict = output_spec.as_dict() + output_dict['dims'].append('timestep') + output_dict['coords']['timestep'] = timesteps + output_spec = Spec.from_dict(output_dict) + + result_dArray = DataArray(output_spec,np.transpose(result_data)) + i=i+1 + return result_dArray From 53477fb0df18c46abd47bee4c2c090b185ebbb4a Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Wed, 17 Apr 2019 18:10:23 +0100 Subject: [PATCH 34/61] #359 First attempt at data access method --- src/smif/data_layer/store.py | 178 +++++++++++++++++++++++++++-------- 1 file changed, 141 insertions(+), 37 deletions(-) diff --git a/src/smif/data_layer/store.py b/src/smif/data_layer/store.py index dade90f2c..083a5ccf9 100644 --- a/src/smif/data_layer/store.py +++ b/src/smif/data_layer/store.py @@ -967,6 +967,147 @@ def canonical_missing_results(self, model_run_name): return self.canonical_expected_results( model_run_name) - self.canonical_available_results(model_run_name) + def _get_result_darray_internal(self, model_run_name, sec_model_name, output_name, + t_d_tuples): + """Internal implementation for `get_result_darray`, after the unique list of + (timestep, decision) tuples has been generated and validated. + + This method gets the spec for the output defined by the model_run_name, sec_model_name + and output_name and expands the spec to include an additional dimension for the list of + tuples. + + Then, for each tuple, the data array from the corresponding read_results call is + stacked, and together with the new spec this information is returned as a new + DataArray. + + Parameters + ---------- + model_run_name : str + sec_model_name : str + output_name : str + t_d_tuples : list of unique (timestep, decision) tuples + + Returns + ------- + DataArray with expanded spec and data for each (timestep, decision) tuple + """ + + # Get the output spec given the name of the sector model and output + output_spec = None + sec_model = self.read_model(sec_model_name) + + for output in sec_model['outputs']: + + # Ignore if the output name doesn't match + if output_name != output['name']: + continue + + output_spec = Spec.from_dict(output) + + assert output_spec, "Output name was not found in model outputs" + + # Read the results for each (timestep, decision) tuple and stack them + list_of_numpy_arrays = [] + + for t, d in t_d_tuples: + d_array = self.read_results(model_run_name, sec_model_name, output_spec, t, d) + list_of_numpy_arrays.append(d_array.data) + + stacked_data = np.vstack(list_of_numpy_arrays) + + # Add new dimensions to the data spec + output_dict = output_spec.as_dict() + output_dict['dims'].append('timestep_decision') + output_dict['coords']['timestep_decision'] = t_d_tuples + + output_spec = Spec.from_dict(output_dict) + + # Create a new DataArray from the modified spec and stacked data + return DataArray(output_spec, np.transpose(stacked_data)) + + def get_result_darray(self, model_run_name, sec_model_name, output_name, timesteps=None, + decision_iteration=None, t_d_tuples=None): + """Return data for multiple timesteps and decision iterations for a given output from + a given sector model in a specific model run. + + You can specify either: + a list of (timestep, decision) tuples + in which case data for all of those tuples matching the available results will + be returned + or: + a list of timesteps + in which case data for all of those timesteps (and any decision iterations) + matching the available results will be returned + or: + a list of decision iterations + in which case data for all of those decision iterations (and any timesteps) + matching the available results will be returned + or: + a list of timesteps and a list of decision iterations + in which case data for the Cartesian product of those timesteps and those + decision iterations matching the available results will be returned + or: + nothing + in which case all available results will be returned + + Then, for each tuple, the data array from the corresponding read_results call is + stacked, and together with the new spec this information is returned as a new + DataArray. + + Parameters + ---------- + model_run_name : str + sec_model_name : str + output_name : str + timesteps : optional list of timesteps + decision_iteration : optional list of decision iterations + t_d_tuples : optional list of unique (timestep, decision) tuples + + Returns + ------- + DataArray with expanded spec and the data requested + """ + + # If a list of (t,d) tuples is supplied, disallow specifying timesteps or decision + # iterations + if t_d_tuples: + assert (not timesteps and not decision_iteration) + + available = self.available_results(model_run_name) + + # Build up the necessary list of tuples + if not timesteps and not decision_iteration and not t_d_tuples: + list_of_tuples = [(t, d) for t, d, sec, out in available if + sec == sec_model_name and out == output_name] + + elif timesteps and not decision_iteration and not t_d_tuples: + list_of_tuples = [(t, d) for t, d, sec, out in available if + sec == sec_model_name and out == output_name and t in timesteps] + + elif decision_iteration and not timesteps and not t_d_tuples: + list_of_tuples = [(t, d) for t, d, sec, out in available if + sec == sec_model_name and out == output_name and + d in decision_iteration] + + elif t_d_tuples and not timesteps and not decision_iteration: + list_of_tuples = [(t, d) for t, d, sec, out in available if + sec == sec_model_name and out == output_name and ( + t, d) in t_d_tuples] + + elif timesteps and decision_iteration and not t_d_tuples: + t_d = list(itertools.product(timesteps, decision_iteration)) + list_of_tuples = [(t, d) for t, d, sec, out in available if + sec == sec_model_name and out == output_name and (t, d) in t_d] + + else: + assert False, "It should not have been possible to reach this line of code." + + assert (len(list_of_tuples) > 0), "None of the requested data is available." + + return self._get_result_darray_internal( + model_run_name, sec_model_name, output_name, sorted(list_of_tuples) + ) + # endregion # region data store utilities @@ -985,40 +1126,3 @@ def _pick_from_list(list_of_dicts, name): if 'name' in item and item['name'] == name: return item return None - - def get_result_darray(self, - timesteps, - model_name, - output_name, - model_run_name, - decision_iteration): - """ Read results and build the corresponding DataArray - - Returns - ------- - DataArray - """ - model = self.read_model(model_name) - i=0 - for output in model['outputs']: - if(output_name == model['outputs'][i]['name']): - output_spec = Spec.from_dict(output) - data_container = np.zeros - for timestep in timesteps: - # ------------------------------------------------------- - dArray = self.read_results(model_run_name, model_name, - output_spec, timestep, - decision_iteration) - if 'result_data' in locals(): - result_data = np.vstack([result_data,dArray.data]) - else: - result_data = dArray.data - # --------------------------------------------------- - output_dict = output_spec.as_dict() - output_dict['dims'].append('timestep') - output_dict['coords']['timestep'] = timesteps - output_spec = Spec.from_dict(output_dict) - - result_dArray = DataArray(output_spec,np.transpose(result_data)) - i=i+1 - return result_dArray From 3fadb2426f5f5e52a7e9d9d84327f001e8e18212 Mon Sep 17 00:00:00 2001 From: Thibault Lestang Date: Tue, 23 Apr 2019 14:57:11 +0100 Subject: [PATCH 35/61] Add store.get_results_fixed_output() that returns dict of DataArray for single sector model, single output and *multiple* model runs --- src/smif/data_layer/store.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/smif/data_layer/store.py b/src/smif/data_layer/store.py index 083a5ccf9..e50fb04e2 100644 --- a/src/smif/data_layer/store.py +++ b/src/smif/data_layer/store.py @@ -1108,6 +1108,32 @@ def get_result_darray(self, model_run_name, sec_model_name, output_name, timeste model_run_name, sec_model_name, output_name, sorted(list_of_tuples) ) + def get_results_fixed_output(self, model_runs, sec_model_name, output_name, timesteps=None, + decision_iteration=None, t_d_tuples=None): + """Return data for multiple timesteps and decision iterations for a given output from + a given sector model for multiple model runs. + + Parameters + ---------- + model_runs : List of str (model run names) + sec_model_name : str + output_name : str + timesteps : optional list of timesteps + decision_iteration : optional list of decision iterations + t_d_tuples : optional list of unique (timestep, decision) tuples + + Returns + ------- + Dictionary of DataArray objects keyed on model run names. + Returned DataArrays include one extra (timestep,decision_iteration) + """ + results_dict = {} + for model_run_name in model_runs: + results_dict[model_run_name] = self.get_result_darray(model_run_name, sec_model_name, + output_name, timesteps, + decision_iteration, t_d_tuples) + return results_dict + # endregion # region data store utilities From d0686813714d414010d276119ac490e1bd2b03d6 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Tue, 23 Apr 2019 16:39:53 +0100 Subject: [PATCH 36/61] #359 Work towards read-only Results interface --- src/smif/data_layer/__init__.py | 3 +- src/smif/data_layer/results.py | 116 +++++++++++++++++++++++++++++++ tests/data_layer/test_results.py | 94 +++++++++++++++++++++++++ 3 files changed, 212 insertions(+), 1 deletion(-) create mode 100644 src/smif/data_layer/results.py create mode 100644 tests/data_layer/test_results.py diff --git a/src/smif/data_layer/__init__.py b/src/smif/data_layer/__init__.py index ac8748d17..42e949135 100644 --- a/src/smif/data_layer/__init__.py +++ b/src/smif/data_layer/__init__.py @@ -6,8 +6,9 @@ # from smif.data_layer import DataHandle` from smif.data_layer.data_array import DataArray from smif.data_layer.data_handle import DataHandle +from smif.data_layer.results import Results from smif.data_layer.store import Store # Define what should be imported as * :: # from smif.data_layer import * -__all__ = ['DataArray', 'DataHandle', 'Store'] +__all__ = ['DataArray', 'DataHandle', 'Results', 'Store'] diff --git a/src/smif/data_layer/results.py b/src/smif/data_layer/results.py new file mode 100644 index 000000000..efddf1c20 --- /dev/null +++ b/src/smif/data_layer/results.py @@ -0,0 +1,116 @@ +"""Results provides a common interface to access results from model runs. + +Raises +------ +SmifDataNotFoundError + If data cannot be found in the store when try to read from the store +SmifDataMismatchError + Data presented to read, write and update methods is in the + incorrect format or of wrong dimensions to that expected +SmifDataReadError + When unable to read data e.g. unable to handle file type or connect + to database +""" + +import os + +from smif.data_layer.file import (CSVDataStore, FileMetadataStore, + ParquetDataStore, YamlConfigStore) +from smif.data_layer.store import Store + + +class Results: + """Common interface to access results from model runs. + + Parameters + ---------- + interface: str the requested interface (local_csv or local_parquet currently supported) + model_base_dir: str the base directory of the model + """ + def __init__(self, interface='local_csv', model_base_dir='.'): + + # Check that the provided interface is supported + file_store = self._get_file_store(interface) + if file_store is None: + raise ValueError( + 'Unsupported interface "{}". Supply local_csv or local_parquet'.format( + interface)) + + # Check that the directory is valid + if not os.path.isdir(model_base_dir): + raise ValueError('Expected {} to be a valid directory'.format(model_base_dir)) + + self._store = Store( + config_store=YamlConfigStore(model_base_dir), + metadata_store=FileMetadataStore(model_base_dir), + data_store=file_store(model_base_dir), + model_base_folder=model_base_dir + ) + + @staticmethod + def _get_file_store(interface): + """ Return the appropriate derived FileDataStore class, or None if the requested + interface is invalid. + + Parameters + ---------- + interface: str the requested interface + + Returns + ------- + The appropriate derived FileDataStore class + """ + return { + 'local_csv': CSVDataStore, + 'local_parquet': ParquetDataStore, + }.get(interface, None) + + def list_model_runs(self): + """ Return a list of model run names. + + Returns + ------- + List of model run names + """ + return sorted([x['name'] for x in self._store.read_model_runs()]) + + def available_results(self, model_run_name): + """ Return the results available for a given model run. + + Parameters + ---------- + model_run_name: str the requested model run + + Returns + ------- + A nested dictionary data structure of the results available for the given model run + """ + + available = self._store.available_results(model_run_name) + + results = { + 'model_run': model_run_name, + 'sos_model': self._store.read_model_run(model_run_name)['sos_model'], + 'sector_models': dict(), + } + + model_names = {sec for _t, _d, sec, _out in available} + for model_name in model_names: + results['sector_models'][model_name] = { + 'outputs': dict(), + } + + outputs = {out for _t, _d, sec, out in available if sec == model_name} + + for output in outputs: + results['sector_models'][model_name]['outputs'][output] = dict() + + decs = {d for _t, d, sec, out in available if + sec == model_name and out == output} + + for dec in decs: + ts = sorted({t for t, d, sec, out in available if + d == dec and sec == model_name and out == output}) + results['sector_models'][model_name]['outputs'][output][dec] = ts + + return results diff --git a/tests/data_layer/test_results.py b/tests/data_layer/test_results.py new file mode 100644 index 000000000..76fb21a74 --- /dev/null +++ b/tests/data_layer/test_results.py @@ -0,0 +1,94 @@ +"""Test the Store interface + +Many methods simply proxy to config/metadata/data store implementations, but there is some +cross-coordination and there are some convenience methods implemented at this layer. +""" + +import os +import subprocess + +from pytest import fixture, raises +from smif.data_layer import Results + + +@fixture(scope="session") +def tmp_sample_project_no_results(tmpdir_factory): + test_folder = tmpdir_factory.mktemp("smif") + subprocess.run( + ['smif', 'setup', '-d', str(test_folder), '-v'], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + return str(test_folder) + + +@fixture(scope="session") +def tmp_sample_project_with_results(tmpdir_factory): + test_folder = tmpdir_factory.mktemp("smif") + subprocess.run( + ['smif', 'setup', '-d', str(test_folder), '-v'], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + subprocess.run( + ['smif', 'run', '-d', str(test_folder), 'energy_central'], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + return str(test_folder) + + +class TestNoResults: + + def test_exceptions(self, tmp_sample_project_no_results): + # Check that invalid interface is dealt with properly + with raises(ValueError) as e: + Results(interface='unexpected') + assert ('Unsupported interface' in str(e.value)) + + # Check that invalid directories are dealt with properly + with raises(ValueError) as e: + fake_path = os.path.join(tmp_sample_project_no_results, 'not', 'valid') + Results(model_base_dir=fake_path) + assert ('to be a valid directory' in str(e.value)) + + # Check that valid options DO work + Results(interface='local_csv', model_base_dir=tmp_sample_project_no_results) + Results(interface='local_parquet', model_base_dir=tmp_sample_project_no_results) + + def test_list_model_runs(self, tmp_sample_project_no_results): + res = Results(interface='local_csv', model_base_dir=tmp_sample_project_no_results) + model_runs = res.list_model_runs() + + assert ('energy_central' in model_runs) + assert ('energy_water_cp_cr' in model_runs) + assert (len(model_runs) == 2) + + def test_available_results(self, tmp_sample_project_no_results): + res = Results(interface='local_csv', model_base_dir=tmp_sample_project_no_results) + available = res.available_results('energy_central') + + assert (available['model_run'] == 'energy_central') + assert (available['sos_model'] == 'energy') + assert (available['sector_models'] == dict()) + + +class TestSomeResults: + + def test_available_results(self, tmp_sample_project_with_results): + res = Results(interface='local_csv', model_base_dir=tmp_sample_project_with_results) + available = res.available_results('energy_central') + + assert (available['model_run'] == 'energy_central') + assert (available['sos_model'] == 'energy') + + sec_models = available['sector_models'] + assert (sorted(sec_models.keys()) == ['energy_demand']) + + outputs = sec_models['energy_demand']['outputs'] + assert (sorted(outputs.keys()) == ['cost', 'water_demand']) + + output_answer = {1: [2010], 2: [2010], 3: [2015], 4: [2020]} + + assert outputs['cost'] == output_answer + assert outputs['water_demand'] == output_answer From 6dc91d72eaba29dadd06afd6ef609a6839846fab Mon Sep 17 00:00:00 2001 From: Thibault Lestang Date: Thu, 25 Apr 2019 13:56:35 +0100 Subject: [PATCH 37/61] Change name of get_results_fixed_output() to read_results() --- src/smif/data_layer/store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/smif/data_layer/store.py b/src/smif/data_layer/store.py index e50fb04e2..3324830f9 100644 --- a/src/smif/data_layer/store.py +++ b/src/smif/data_layer/store.py @@ -1108,7 +1108,7 @@ def get_result_darray(self, model_run_name, sec_model_name, output_name, timeste model_run_name, sec_model_name, output_name, sorted(list_of_tuples) ) - def get_results_fixed_output(self, model_runs, sec_model_name, output_name, timesteps=None, + def read_results(self, model_runs, sec_model_name, output_name, timesteps=None, decision_iteration=None, t_d_tuples=None): """Return data for multiple timesteps and decision iterations for a given output from a given sector model for multiple model runs. From d5682dc3842b10054b6c4f743e3a4c8e859680a2 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Fri, 26 Apr 2019 13:48:01 +0100 Subject: [PATCH 38/61] Also rework tests to no longer use sample project --- src/smif/data_layer/results.py | 39 +++++-- tests/data_layer/test_results.py | 169 +++++++++++++++++++------------ 2 files changed, 137 insertions(+), 71 deletions(-) diff --git a/src/smif/data_layer/results.py b/src/smif/data_layer/results.py index efddf1c20..89d1161db 100644 --- a/src/smif/data_layer/results.py +++ b/src/smif/data_layer/results.py @@ -24,10 +24,31 @@ class Results: Parameters ---------- - interface: str the requested interface (local_csv or local_parquet currently supported) - model_base_dir: str the base directory of the model + details_dict: dict optional dictionary of the form {'interface': , 'dir': } + where is either 'local_csv' or 'local_parquet', and is the model base + directory + store: Store optional pre-created Store object """ - def __init__(self, interface='local_csv', model_base_dir='.'): + def __init__(self, details_dict: dict = None, store: Store = None): + + assert bool(details_dict) != bool(store),\ + 'Results() accepts either a details dict or a store' + + self._store = store + if store: + return + + try: + interface = details_dict['interface'] + except KeyError: + print('No interface provided for Results(). Assuming local_csv.') + interface = 'local_csv' + + try: + directory = details_dict['dir'] + except KeyError: + print('No directory provided for Results(). Assuming \'.\'.') + directory = '.' # Check that the provided interface is supported file_store = self._get_file_store(interface) @@ -37,14 +58,14 @@ def __init__(self, interface='local_csv', model_base_dir='.'): interface)) # Check that the directory is valid - if not os.path.isdir(model_base_dir): - raise ValueError('Expected {} to be a valid directory'.format(model_base_dir)) + if not os.path.isdir(directory): + raise ValueError('Expected {} to be a valid directory'.format(directory)) self._store = Store( - config_store=YamlConfigStore(model_base_dir), - metadata_store=FileMetadataStore(model_base_dir), - data_store=file_store(model_base_dir), - model_base_folder=model_base_dir + config_store=YamlConfigStore(directory), + metadata_store=FileMetadataStore(directory), + data_store=file_store(directory), + model_base_folder=directory ) @staticmethod diff --git a/tests/data_layer/test_results.py b/tests/data_layer/test_results.py index 76fb21a74..c0e4035d8 100644 --- a/tests/data_layer/test_results.py +++ b/tests/data_layer/test_results.py @@ -5,90 +5,135 @@ """ import os -import subprocess from pytest import fixture, raises -from smif.data_layer import Results +from smif.data_layer import Results, Store +from smif.data_layer.memory_interface import (MemoryConfigStore, + MemoryDataStore, + MemoryMetadataStore) +from smif.exception import SmifDataNotFoundError + + +@fixture +def store(): + """Store fixture + """ + return Store( + config_store=MemoryConfigStore(), + metadata_store=MemoryMetadataStore(), + data_store=MemoryDataStore() + ) -@fixture(scope="session") -def tmp_sample_project_no_results(tmpdir_factory): - test_folder = tmpdir_factory.mktemp("smif") - subprocess.run( - ['smif', 'setup', '-d', str(test_folder), '-v'], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE - ) - return str(test_folder) +@fixture +def results(store): + """Results fixture + """ + return Results(store=store) -@fixture(scope="session") -def tmp_sample_project_with_results(tmpdir_factory): - test_folder = tmpdir_factory.mktemp("smif") - subprocess.run( - ['smif', 'setup', '-d', str(test_folder), '-v'], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE - ) - subprocess.run( - ['smif', 'run', '-d', str(test_folder), 'energy_central'], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE - ) - return str(test_folder) +@fixture +def results_with_model_run(store, model_run): + """Results fixture + """ + store.write_model_run(model_run) + return Results(store=store) class TestNoResults: - def test_exceptions(self, tmp_sample_project_no_results): - # Check that invalid interface is dealt with properly - with raises(ValueError) as e: - Results(interface='unexpected') - assert ('Unsupported interface' in str(e.value)) + def test_exceptions(self, store): + + # No arguments is not allowed + with raises(AssertionError) as e: + Results() + assert 'either a details dict or a store' in str(e.value) + + # Both arguments is also not allowed + with raises(AssertionError) as e: + Results(details_dict={'some': 'dict'}, store=store) + assert 'either a details dict or a store' in str(e.value) + + # Check that constructing with just a store works fine + Results(store=store) - # Check that invalid directories are dealt with properly + # Check that valid configurations do work (but expect a SmifDataNotFoundError + # because the store creation will fall over + with raises(SmifDataNotFoundError) as e: + Results(details_dict={'interface': 'local_csv', 'dir': '.'}) + assert 'Expected configuration folder' in str(e.value) + + with raises(SmifDataNotFoundError) as e: + Results(details_dict={'interface': 'local_parquet', 'dir': '.'}) + assert 'Expected configuration folder' in str(e.value) + + # Interface left blank will default to local_csv + with raises(SmifDataNotFoundError) as e: + Results(details_dict={'dir': '.'}) + assert 'Expected configuration folder' in str(e.value) + + # Dir left blank will default to '.' + with raises(SmifDataNotFoundError) as e: + Results(details_dict={'interface': 'local_parquet'}) + assert 'Expected configuration folder' in str(e.value) + + # Invalid interface will raise a ValueError with raises(ValueError) as e: - fake_path = os.path.join(tmp_sample_project_no_results, 'not', 'valid') - Results(model_base_dir=fake_path) - assert ('to be a valid directory' in str(e.value)) + Results(details_dict={'interface': 'invalid', 'dir': '.'}) + assert 'Unsupported interface "invalid"' in str(e.value) - # Check that valid options DO work - Results(interface='local_csv', model_base_dir=tmp_sample_project_no_results) - Results(interface='local_parquet', model_base_dir=tmp_sample_project_no_results) + # Invalid directory will raise a ValueError + with raises(ValueError) as e: + invalid_dir = os.path.join(os.path.dirname(__file__), 'does', 'not', 'exist') + Results(details_dict={'interface': 'local_csv', 'dir': invalid_dir}) + assert 'to be a valid directory' in str(e.value) - def test_list_model_runs(self, tmp_sample_project_no_results): - res = Results(interface='local_csv', model_base_dir=tmp_sample_project_no_results) - model_runs = res.list_model_runs() + def test_list_model_runs(self, results, model_run): - assert ('energy_central' in model_runs) - assert ('energy_water_cp_cr' in model_runs) - assert (len(model_runs) == 2) + # Should be no model runs in an empty Results() + assert results.list_model_runs() == [] - def test_available_results(self, tmp_sample_project_no_results): - res = Results(interface='local_csv', model_base_dir=tmp_sample_project_no_results) - available = res.available_results('energy_central') + model_run_a = model_run.copy() + model_run_a['name'] = 'a_model_run' - assert (available['model_run'] == 'energy_central') - assert (available['sos_model'] == 'energy') - assert (available['sector_models'] == dict()) + model_run_b = model_run.copy() + model_run_b['name'] = 'b_model_run' + results._store.write_model_run(model_run_a) + results._store.write_model_run(model_run_b) -class TestSomeResults: + assert results.list_model_runs() == ['a_model_run', 'b_model_run'] - def test_available_results(self, tmp_sample_project_with_results): - res = Results(interface='local_csv', model_base_dir=tmp_sample_project_with_results) - available = res.available_results('energy_central') + def test_available_results(self, results_with_model_run): - assert (available['model_run'] == 'energy_central') - assert (available['sos_model'] == 'energy') + available = results_with_model_run.available_results('unique_model_run_name') - sec_models = available['sector_models'] - assert (sorted(sec_models.keys()) == ['energy_demand']) + assert available['model_run'] == 'unique_model_run_name' + assert available['sos_model'] == 'energy' + assert available['sector_models'] == dict() - outputs = sec_models['energy_demand']['outputs'] - assert (sorted(outputs.keys()) == ['cost', 'water_demand']) - output_answer = {1: [2010], 2: [2010], 3: [2015], 4: [2020]} +class TestSomeResults: - assert outputs['cost'] == output_answer - assert outputs['water_demand'] == output_answer + def test_available_results(self, results_with_model_run, sample_results): + + results_with_model_run._store.write_results( + sample_results, 'model_run_name', 'model_name', 0 + ) + + available = results_with_model_run.available_results('unique_model_run_name') + assert available + + # assert (available['model_run'] == 'energy_central') + # assert (available['sos_model'] == 'energy') + # + # sec_models = available['sector_models'] + # assert (sorted(sec_models.keys()) == ['energy_demand']) + # + # outputs = sec_models['energy_demand']['outputs'] + # assert (sorted(outputs.keys()) == ['cost', 'water_demand']) + # + # output_answer = {1: [2010], 2: [2010], 3: [2015], 4: [2020]} + # + # assert outputs['cost'] == output_answer + # assert outputs['water_demand'] == output_answer From d4e0e65668a2804d3564baf6d5de8d88236e5967 Mon Sep 17 00:00:00 2001 From: Thibault Lestang Date: Fri, 26 Apr 2019 13:55:10 +0100 Subject: [PATCH 39/61] Fix conflicting method name read_results for Results API in store.py (-->get_results) --- src/smif/data_layer/store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/smif/data_layer/store.py b/src/smif/data_layer/store.py index 3324830f9..183d56c5c 100644 --- a/src/smif/data_layer/store.py +++ b/src/smif/data_layer/store.py @@ -1108,7 +1108,7 @@ def get_result_darray(self, model_run_name, sec_model_name, output_name, timeste model_run_name, sec_model_name, output_name, sorted(list_of_tuples) ) - def read_results(self, model_runs, sec_model_name, output_name, timesteps=None, + def get_results(self, model_runs, sec_model_name, output_name, timesteps=None, decision_iteration=None, t_d_tuples=None): """Return data for multiple timesteps and decision iterations for a given output from a given sector model for multiple model runs. From 4ee685a0b202d6203fe719b5ad74408f0b53eaed Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Fri, 26 Apr 2019 14:20:08 +0100 Subject: [PATCH 40/61] #359 Add read() method to Results and add interface tests --- src/smif/data_layer/results.py | 43 ++++++++++++++++++++++++++++++++ tests/data_layer/test_results.py | 35 ++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/src/smif/data_layer/results.py b/src/smif/data_layer/results.py index 89d1161db..3b1f29a35 100644 --- a/src/smif/data_layer/results.py +++ b/src/smif/data_layer/results.py @@ -135,3 +135,46 @@ def available_results(self, model_run_name): results['sector_models'][model_name]['outputs'][output][dec] = ts return results + + def read(self, + model_run_names: list, + sec_model_names: list, + output_names: list, + timesteps: list = None, + decisions: list = None, + time_decision_tuples: list = None, + ): + """ Return the results from the store. + + Parameters + ---------- + model_run_names: list the requested model run names + sec_model_names: list the requested sector model names (exactly one required) + output_names: list the requested output names (exactly one required) + timesteps: list the requested timesteps + decisions: list the requested decision iterations + time_decision_tuples: list a list of requested (timestep, decision) tuples + + Returns + ------- + A dictionary of DataArrays, a single DataArray for each model run + """ + + if len(sec_model_names) != 1: + raise NotImplementedError( + 'Results.read() currently requires exactly one sector model' + ) + + if len(output_names) != 1: + raise NotImplementedError( + 'Results.read() currently requires exactly one output' + ) + + return self._store.get_results( + model_run_names, + sec_model_names[0], + output_names[0], + timesteps, + decisions, + time_decision_tuples + ) diff --git a/tests/data_layer/test_results.py b/tests/data_layer/test_results.py index c0e4035d8..ef61cfd5a 100644 --- a/tests/data_layer/test_results.py +++ b/tests/data_layer/test_results.py @@ -137,3 +137,38 @@ def test_available_results(self, results_with_model_run, sample_results): # # assert outputs['cost'] == output_answer # assert outputs['water_demand'] == output_answer + + def test_read_exceptions(self, results_with_model_run): + + # Passing anything other than one sector model or output is current not implemented + with raises(NotImplementedError) as e: + results_with_model_run.read( + model_run_names=['one', 'two'], + sec_model_names=[], + output_names=['one'] + ) + assert 'requires exactly one sector model' in str(e.value) + + with raises(NotImplementedError) as e: + results_with_model_run.read( + model_run_names=['one', 'two'], + sec_model_names=['one', 'two'], + output_names=['one'] + ) + assert 'requires exactly one sector model' in str(e.value) + + with raises(NotImplementedError) as e: + results_with_model_run.read( + model_run_names=['one', 'two'], + sec_model_names=['one'], + output_names=[] + ) + assert 'requires exactly one output' in str(e.value) + + with raises(NotImplementedError) as e: + results_with_model_run.read( + model_run_names=['one', 'two'], + sec_model_names=['one'], + output_names=['one', 'two'] + ) + assert 'requires exactly one output' in str(e.value) From c575306cce00d5a8b71263d39b426f693edb379a Mon Sep 17 00:00:00 2001 From: Thibault Lestang Date: Fri, 26 Apr 2019 16:45:21 +0100 Subject: [PATCH 41/61] Attempt to fix SmifDataMismatchError in Store._get_result_darray_internal() for outputs with ndims>1 --- src/smif/data_layer/store.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/smif/data_layer/store.py b/src/smif/data_layer/store.py index 183d56c5c..df8739ebd 100644 --- a/src/smif/data_layer/store.py +++ b/src/smif/data_layer/store.py @@ -1014,7 +1014,8 @@ def _get_result_darray_internal(self, model_run_name, sec_model_name, output_nam list_of_numpy_arrays.append(d_array.data) stacked_data = np.vstack(list_of_numpy_arrays) - + data = np.transpose(stacked_data) + # Add new dimensions to the data spec output_dict = output_spec.as_dict() output_dict['dims'].append('timestep_decision') @@ -1023,7 +1024,7 @@ def _get_result_darray_internal(self, model_run_name, sec_model_name, output_nam output_spec = Spec.from_dict(output_dict) # Create a new DataArray from the modified spec and stacked data - return DataArray(output_spec, np.transpose(stacked_data)) + return DataArray(output_spec, np.reshape(data, output_sec.shape)) def get_result_darray(self, model_run_name, sec_model_name, output_name, timesteps=None, decision_iteration=None, t_d_tuples=None): From c29c97a397389628742d27bcfb7f1643978a5950 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Fri, 26 Apr 2019 16:56:48 +0100 Subject: [PATCH 42/61] #359 Improve testing of Results() --- tests/data_layer/test_results.py | 114 +++++++++++++++++-------------- 1 file changed, 62 insertions(+), 52 deletions(-) diff --git a/tests/data_layer/test_results.py b/tests/data_layer/test_results.py index ef61cfd5a..99eb7db69 100644 --- a/tests/data_layer/test_results.py +++ b/tests/data_layer/test_results.py @@ -7,42 +7,44 @@ import os from pytest import fixture, raises -from smif.data_layer import Results, Store -from smif.data_layer.memory_interface import (MemoryConfigStore, - MemoryDataStore, - MemoryMetadataStore) +from smif.data_layer import Results from smif.exception import SmifDataNotFoundError @fixture -def store(): - """Store fixture +def results(empty_store, model_run): + """Results fixture """ - return Store( - config_store=MemoryConfigStore(), - metadata_store=MemoryMetadataStore(), - data_store=MemoryDataStore() - ) + empty_store.write_model_run(model_run) + return Results(store=empty_store) @fixture -def results(store): - """Results fixture +def results_with_results(empty_store, model_run, sample_results): + """Results fixture with a model run and fictional results """ - return Results(store=store) + empty_store.write_model_run(model_run) + empty_store.write_results(sample_results, 'unique_model_run_name', 'a_model', 2010, 0) + empty_store.write_results(sample_results, 'unique_model_run_name', 'a_model', 2015, 0) + empty_store.write_results(sample_results, 'unique_model_run_name', 'a_model', 2020, 0) + empty_store.write_results(sample_results, 'unique_model_run_name', 'a_model', 2015, 1) + empty_store.write_results(sample_results, 'unique_model_run_name', 'a_model', 2020, 1) + empty_store.write_results(sample_results, 'unique_model_run_name', 'a_model', 2015, 2) + empty_store.write_results(sample_results, 'unique_model_run_name', 'a_model', 2020, 2) -@fixture -def results_with_model_run(store, model_run): - """Results fixture - """ - store.write_model_run(model_run) - return Results(store=store) + empty_store.write_results(sample_results, 'unique_model_run_name', 'b_model', 2010, 0) + empty_store.write_results(sample_results, 'unique_model_run_name', 'b_model', 2015, 0) + empty_store.write_results(sample_results, 'unique_model_run_name', 'b_model', 2020, 0) + empty_store.write_results(sample_results, 'unique_model_run_name', 'b_model', 2025, 0) + empty_store.write_results(sample_results, 'unique_model_run_name', 'b_model', 2030, 0) + + return Results(store=empty_store) class TestNoResults: - def test_exceptions(self, store): + def test_exceptions(self, empty_store): # No arguments is not allowed with raises(AssertionError) as e: @@ -51,11 +53,11 @@ def test_exceptions(self, store): # Both arguments is also not allowed with raises(AssertionError) as e: - Results(details_dict={'some': 'dict'}, store=store) + Results(details_dict={'some': 'dict'}, store=empty_store) assert 'either a details dict or a store' in str(e.value) # Check that constructing with just a store works fine - Results(store=store) + Results(store=empty_store) # Check that valid configurations do work (but expect a SmifDataNotFoundError # because the store creation will fall over @@ -88,9 +90,10 @@ def test_exceptions(self, store): Results(details_dict={'interface': 'local_csv', 'dir': invalid_dir}) assert 'to be a valid directory' in str(e.value) - def test_list_model_runs(self, results, model_run): + def test_list_model_runs(self, empty_store, model_run): # Should be no model runs in an empty Results() + results = Results(store=empty_store) assert results.list_model_runs() == [] model_run_a = model_run.copy() @@ -99,14 +102,14 @@ def test_list_model_runs(self, results, model_run): model_run_b = model_run.copy() model_run_b['name'] = 'b_model_run' - results._store.write_model_run(model_run_a) - results._store.write_model_run(model_run_b) + empty_store.write_model_run(model_run_a) + empty_store.write_model_run(model_run_b) assert results.list_model_runs() == ['a_model_run', 'b_model_run'] - def test_available_results(self, results_with_model_run): + def test_available_results(self, results): - available = results_with_model_run.available_results('unique_model_run_name') + available = results.available_results('unique_model_run_name') assert available['model_run'] == 'unique_model_run_name' assert available['sos_model'] == 'energy' @@ -115,34 +118,35 @@ def test_available_results(self, results_with_model_run): class TestSomeResults: - def test_available_results(self, results_with_model_run, sample_results): + def test_available_results(self, results_with_results): - results_with_model_run._store.write_results( - sample_results, 'model_run_name', 'model_name', 0 - ) + available = results_with_results.available_results('unique_model_run_name') - available = results_with_model_run.available_results('unique_model_run_name') - assert available + assert available['model_run'] == 'unique_model_run_name' + assert available['sos_model'] == 'energy' + + sec_models = available['sector_models'] + assert sorted(sec_models.keys()) == ['a_model', 'b_model'] + + # Check a_model outputs are correct + outputs_a = sec_models['a_model']['outputs'] + assert sorted(outputs_a.keys()) == ['energy_use'] - # assert (available['model_run'] == 'energy_central') - # assert (available['sos_model'] == 'energy') - # - # sec_models = available['sector_models'] - # assert (sorted(sec_models.keys()) == ['energy_demand']) - # - # outputs = sec_models['energy_demand']['outputs'] - # assert (sorted(outputs.keys()) == ['cost', 'water_demand']) - # - # output_answer = {1: [2010], 2: [2010], 3: [2015], 4: [2020]} - # - # assert outputs['cost'] == output_answer - # assert outputs['water_demand'] == output_answer + output_answer_a = {0: [2010, 2015, 2020], 1: [2015, 2020], 2: [2015, 2020]} + assert outputs_a['energy_use'] == output_answer_a - def test_read_exceptions(self, results_with_model_run): + # Check b_model outputs are correct + outputs_b = sec_models['b_model']['outputs'] + assert sorted(outputs_b.keys()) == ['energy_use'] + + output_answer_b = {0: [2010, 2015, 2020, 2025, 2030]} + assert outputs_b['energy_use'] == output_answer_b + + def test_read_exceptions(self, results_with_results): # Passing anything other than one sector model or output is current not implemented with raises(NotImplementedError) as e: - results_with_model_run.read( + results_with_results.read( model_run_names=['one', 'two'], sec_model_names=[], output_names=['one'] @@ -150,7 +154,7 @@ def test_read_exceptions(self, results_with_model_run): assert 'requires exactly one sector model' in str(e.value) with raises(NotImplementedError) as e: - results_with_model_run.read( + results_with_results.read( model_run_names=['one', 'two'], sec_model_names=['one', 'two'], output_names=['one'] @@ -158,7 +162,7 @@ def test_read_exceptions(self, results_with_model_run): assert 'requires exactly one sector model' in str(e.value) with raises(NotImplementedError) as e: - results_with_model_run.read( + results_with_results.read( model_run_names=['one', 'two'], sec_model_names=['one'], output_names=[] @@ -166,9 +170,15 @@ def test_read_exceptions(self, results_with_model_run): assert 'requires exactly one output' in str(e.value) with raises(NotImplementedError) as e: - results_with_model_run.read( + results_with_results.read( model_run_names=['one', 'two'], sec_model_names=['one'], output_names=['one', 'two'] ) assert 'requires exactly one output' in str(e.value) + + def test_read(self): + # This is difficult to test without fixtures defining an entire canonical project. + # See smif issue #304 (https://github.com/nismod/smif/issues/304). For now, we rely + # on tests of the underling get_results() method on the Store. + pass From 4b886edc30ad84abca1a4f7dd71e1d553e4cdf03 Mon Sep 17 00:00:00 2001 From: Thibault Lestang Date: Fri, 26 Apr 2019 17:23:32 +0100 Subject: [PATCH 43/61] Fix typo in Store._get_result_darray_internal() --- src/smif/data_layer/store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/smif/data_layer/store.py b/src/smif/data_layer/store.py index df8739ebd..59c088b1a 100644 --- a/src/smif/data_layer/store.py +++ b/src/smif/data_layer/store.py @@ -1024,7 +1024,7 @@ def _get_result_darray_internal(self, model_run_name, sec_model_name, output_nam output_spec = Spec.from_dict(output_dict) # Create a new DataArray from the modified spec and stacked data - return DataArray(output_spec, np.reshape(data, output_sec.shape)) + return DataArray(output_spec, np.reshape(data, output_spec.shape)) def get_result_darray(self, model_run_name, sec_model_name, output_name, timesteps=None, decision_iteration=None, t_d_tuples=None): From ac88b361704bda2b3c2fac57560c39593df70357 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Mon, 29 Apr 2019 14:54:05 +0100 Subject: [PATCH 44/61] #359 Return a dataframe with cols for model run, timestep and decision --- src/smif/data_layer/results.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/smif/data_layer/results.py b/src/smif/data_layer/results.py index 3b1f29a35..166284090 100644 --- a/src/smif/data_layer/results.py +++ b/src/smif/data_layer/results.py @@ -14,6 +14,7 @@ import os +import pandas as pd from smif.data_layer.file import (CSVDataStore, FileMetadataStore, ParquetDataStore, YamlConfigStore) from smif.data_layer.store import Store @@ -29,9 +30,10 @@ class Results: directory store: Store optional pre-created Store object """ + def __init__(self, details_dict: dict = None, store: Store = None): - assert bool(details_dict) != bool(store),\ + assert bool(details_dict) != bool(store), \ 'Results() accepts either a details dict or a store' self._store = store @@ -170,7 +172,7 @@ def read(self, 'Results.read() currently requires exactly one output' ) - return self._store.get_results( + results_dict = self._store.get_results( model_run_names, sec_model_names[0], output_names[0], @@ -178,3 +180,16 @@ def read(self, decisions, time_decision_tuples ) + + # Get each DataArray as a pandas data frame and concatenate, resetting the index to + # give back a flat data array + list_of_df = [x.as_df() for x in results_dict.values()] + names_of_df = [x for x in results_dict.keys()] + + results = pd.concat(list_of_df, keys=names_of_df, names=['model_run']).reset_index() + + # Unpack the timestep_decision tuples into individual columns and return + results[['timestep', 'decision']] = pd.DataFrame(results['timestep_decision'].tolist(), + index=results.index) + + return results.drop(columns=['timestep_decision']) From 0ee60a172c3a2814a02a27edf225fb884ea07d24 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Mon, 29 Apr 2019 16:53:56 +0100 Subject: [PATCH 45/61] #359 Tidying --- src/smif/data_layer/results.py | 2 +- src/smif/data_layer/store.py | 19 ++++++++++++------- tests/data_layer/test_results.py | 5 +---- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/smif/data_layer/results.py b/src/smif/data_layer/results.py index 166284090..54b44e80d 100644 --- a/src/smif/data_layer/results.py +++ b/src/smif/data_layer/results.py @@ -159,7 +159,7 @@ def read(self, Returns ------- - A dictionary of DataArrays, a single DataArray for each model run + A Pandas dataframe """ if len(sec_model_names) != 1: diff --git a/src/smif/data_layer/store.py b/src/smif/data_layer/store.py index 59c088b1a..a86aa24fd 100644 --- a/src/smif/data_layer/store.py +++ b/src/smif/data_layer/store.py @@ -1015,7 +1015,7 @@ def _get_result_darray_internal(self, model_run_name, sec_model_name, output_nam stacked_data = np.vstack(list_of_numpy_arrays) data = np.transpose(stacked_data) - + # Add new dimensions to the data spec output_dict = output_spec.as_dict() output_dict['dims'].append('timestep_decision') @@ -1110,7 +1110,7 @@ def get_result_darray(self, model_run_name, sec_model_name, output_name, timeste ) def get_results(self, model_runs, sec_model_name, output_name, timesteps=None, - decision_iteration=None, t_d_tuples=None): + decision_iteration=None, t_d_tuples=None): """Return data for multiple timesteps and decision iterations for a given output from a given sector model for multiple model runs. @@ -1126,15 +1126,20 @@ def get_results(self, model_runs, sec_model_name, output_name, timesteps=None, Returns ------- Dictionary of DataArray objects keyed on model run names. - Returned DataArrays include one extra (timestep,decision_iteration) + Returned DataArrays include one extra (timestep, decision_iteration) dimension. """ results_dict = {} for model_run_name in model_runs: - results_dict[model_run_name] = self.get_result_darray(model_run_name, sec_model_name, - output_name, timesteps, - decision_iteration, t_d_tuples) + results_dict[model_run_name] = self.get_result_darray( + model_run_name, + sec_model_name, + output_name, + timesteps, + decision_iteration, + t_d_tuples + ) return results_dict - + # endregion # region data store utilities diff --git a/tests/data_layer/test_results.py b/tests/data_layer/test_results.py index 99eb7db69..0cc869d94 100644 --- a/tests/data_layer/test_results.py +++ b/tests/data_layer/test_results.py @@ -1,7 +1,4 @@ -"""Test the Store interface - -Many methods simply proxy to config/metadata/data store implementations, but there is some -cross-coordination and there are some convenience methods implemented at this layer. +"""Test the Results interface """ import os From 079152da682ec6bd062a13b94d46958c962ca33f Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Mon, 29 Apr 2019 17:17:41 +0100 Subject: [PATCH 46/61] #359 Add test stub and todo for testing --- tests/data_layer/test_store.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/data_layer/test_store.py b/tests/data_layer/test_store.py index a42e33959..b34b2537e 100644 --- a/tests/data_layer/test_store.py +++ b/tests/data_layer/test_store.py @@ -382,3 +382,12 @@ def test_canonical_missing_results( missing_results.remove((2015, 0, 'energy_demand', 'gas_demand')) assert(store.canonical_missing_results(model_run['name']) == missing_results) + + def test_get_results(self): + # This is difficult to test without fixtures defining an entire canonical project. + # See smif issue #304 (https://github.com/nismod/smif/issues/304). + # Todo: mock a store with known results that can be obtained with get_results(...) + # This requires a model run with sector model, and a sector model with valid inputs and + # outputs, and results with valid spec, etc. Some of this functionality exists in + # fixtures provided in `conftest.py`. + pass From f71b570667ead03c98941e688919f14f1c2234f9 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Wed, 1 May 2019 11:47:21 +0100 Subject: [PATCH 47/61] #359 Add functionality to keep tabs on units --- src/smif/data_layer/results.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/smif/data_layer/results.py b/src/smif/data_layer/results.py index 54b44e80d..0129518c3 100644 --- a/src/smif/data_layer/results.py +++ b/src/smif/data_layer/results.py @@ -70,6 +70,9 @@ def __init__(self, details_dict: dict = None, store: Store = None): model_base_folder=directory ) + # Create an empty dictionary for keeping tabs on the units of any read outputs + self._output_units = dict() + @staticmethod def _get_file_store(interface): """ Return the appropriate derived FileDataStore class, or None if the requested @@ -181,6 +184,10 @@ def read(self, time_decision_tuples ) + # Keep tabs on the units for each output + for x in results_dict.values(): + self._output_units[x.name] = x.unit + # Get each DataArray as a pandas data frame and concatenate, resetting the index to # give back a flat data array list_of_df = [x.as_df() for x in results_dict.values()] @@ -193,3 +200,22 @@ def read(self, index=results.index) return results.drop(columns=['timestep_decision']) + + # Rename the output columns to include units + renamed_cols = dict() + for key, val in self._output_units.items(): + renamed_cols[key] = '{}_({})'.format(key, val) + results = results.rename(index=str, columns=renamed_cols) + + def get_units(self, output_name: str): + """ Return the units of a given output. + + Parameters + ---------- + output_name: the name of the output + + Returns + ------- + str the units of the output + """ + return self._output_units[output_name] From 844fb8ff69f1bf53ae43f203ac846647fc401fa5 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Wed, 1 May 2019 11:47:54 +0100 Subject: [PATCH 48/61] #359 Reorder columns model_run -> timestep -> decision --- src/smif/data_layer/results.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/smif/data_layer/results.py b/src/smif/data_layer/results.py index 0129518c3..6773aa96b 100644 --- a/src/smif/data_layer/results.py +++ b/src/smif/data_layer/results.py @@ -195,11 +195,11 @@ def read(self, results = pd.concat(list_of_df, keys=names_of_df, names=['model_run']).reset_index() - # Unpack the timestep_decision tuples into individual columns and return + # Unpack the timestep_decision tuples into individual columns and drop the combined results[['timestep', 'decision']] = pd.DataFrame(results['timestep_decision'].tolist(), index=results.index) - return results.drop(columns=['timestep_decision']) + results = results.drop(columns=['timestep_decision']) # Rename the output columns to include units renamed_cols = dict() @@ -207,6 +207,16 @@ def read(self, renamed_cols[key] = '{}_({})'.format(key, val) results = results.rename(index=str, columns=renamed_cols) + # Now reorder the columns. Want model_run then timestep then decision + cols = results.columns.tolist() + + assert (cols[0] == 'model_run') + cols.insert(1, cols.pop(cols.index('timestep'))) + cols.insert(2, cols.pop(cols.index('decision'))) + assert(cols[0:3] == ['model_run', 'timestep', 'decision']) + + return results[cols] + def get_units(self, output_name: str): """ Return the units of a given output. From 92b8a13bd6a63b0025c7c740421b4da648ff0d84 Mon Sep 17 00:00:00 2001 From: Thibault Lestang Date: Wed, 1 May 2019 12:08:35 +0100 Subject: [PATCH 49/61] Modify store.get_results to return multiple outputs and check for availability of quieried output as well as dimensionality Issue #359 --- src/smif/data_layer/store.py | 42 +++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/src/smif/data_layer/store.py b/src/smif/data_layer/store.py index a86aa24fd..e7c672481 100644 --- a/src/smif/data_layer/store.py +++ b/src/smif/data_layer/store.py @@ -15,6 +15,7 @@ to database """ import itertools +import warnings from copy import deepcopy from logging import getLogger from operator import itemgetter @@ -1109,7 +1110,7 @@ def get_result_darray(self, model_run_name, sec_model_name, output_name, timeste model_run_name, sec_model_name, output_name, sorted(list_of_tuples) ) - def get_results(self, model_runs, sec_model_name, output_name, timesteps=None, + def get_results(self, model_runs, sec_model_name, outputs, timesteps=None, decision_iteration=None, t_d_tuples=None): """Return data for multiple timesteps and decision iterations for a given output from a given sector model for multiple model runs. @@ -1128,15 +1129,40 @@ def get_results(self, model_runs, sec_model_name, output_name, timesteps=None, Dictionary of DataArray objects keyed on model run names. Returned DataArrays include one extra (timestep, decision_iteration) dimension. """ + list_model_outputs = [] + # Build list of model outputs name + sec_model = self.read_model(sec_model_name) + for output in sec_model['outputs']: + list_model_outputs.append(output['name']) + # Check wether each queried output is available + found_outputs = [] + for output_name in outputs: + if output_name in list_model_outputs: + found_outputs.append(output_name) + else: + warnings.warn('Output {} not found and ignored.'.format(output_name)) + # Get Spec shape of first found output + for output in sec_model['outputs']: + if output['name'] == found_outputs[0]: + spec_shape = Spec.from_dict(output).shape + # Check subsequent found outputs and check that the shape matches + # Must loop over all sector model outputs again to extract spec object + for output in sec_model['outputs']: + for output_name in found_outputs[1:]: + if output['name'] == output_name: + if not(Spec.from_dict(output).shape == spec_shape): + raise ValueError('spec dimension for output {} not matching {}'.format(output_name, spec_shape)) results_dict = {} for model_run_name in model_runs: - results_dict[model_run_name] = self.get_result_darray( - model_run_name, - sec_model_name, - output_name, - timesteps, - decision_iteration, - t_d_tuples + results_dict[model_run_name] = {} + for output_name in found_outputs: + results_dict[model_run_name][output_name] = self.get_result_darray( + model_run_name, + sec_model_name, + output_name, + timesteps, + decision_iteration, + t_d_tuples ) return results_dict From 655785c8231253ccafaaccb70d2217a38015789c Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Wed, 1 May 2019 14:26:47 +0100 Subject: [PATCH 50/61] #359 Update wrt multiple outputs on store class --- src/smif/data_layer/results.py | 55 ++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/src/smif/data_layer/results.py b/src/smif/data_layer/results.py index 6773aa96b..4e5c055fd 100644 --- a/src/smif/data_layer/results.py +++ b/src/smif/data_layer/results.py @@ -170,52 +170,63 @@ def read(self, 'Results.read() currently requires exactly one sector model' ) - if len(output_names) != 1: - raise NotImplementedError( - 'Results.read() currently requires exactly one output' - ) - results_dict = self._store.get_results( model_run_names, sec_model_names[0], - output_names[0], + output_names, timesteps, decisions, time_decision_tuples ) # Keep tabs on the units for each output - for x in results_dict.values(): - self._output_units[x.name] = x.unit - - # Get each DataArray as a pandas data frame and concatenate, resetting the index to - # give back a flat data array - list_of_df = [x.as_df() for x in results_dict.values()] - names_of_df = [x for x in results_dict.keys()] - - results = pd.concat(list_of_df, keys=names_of_df, names=['model_run']).reset_index() + for model_run_name in model_run_names: + for output_name in output_names: + res = results_dict[model_run_name][output_name] + self._output_units[res.name] = res.unit + + # For each output, concatenate all requested model runs into a single data frame + formatted_frames = [] + for output_name in output_names: + # Get each DataArray as a pandas data frame and concatenate, resetting the index to + # give back a flat data array + list_of_df = [results_dict[x][output_name].as_df() for x in model_run_names] + names_of_df = [x for x in results_dict.keys()] + + formatted_frames.append( + pd.concat(list_of_df, keys=names_of_df, names=['model_run']).reset_index()) + + # Append the other output columns to the first data frame + formatted_frame = formatted_frames.pop(0) + output_names.pop(0) + + for other_frame, output_name in zip(formatted_frames, output_names): + assert (formatted_frame['model_run'] == other_frame['model_run']).all() + assert (formatted_frame['timestep_decision'] == other_frame[ + 'timestep_decision']).all() + formatted_frame[output_name] = other_frame[output_name] # Unpack the timestep_decision tuples into individual columns and drop the combined - results[['timestep', 'decision']] = pd.DataFrame(results['timestep_decision'].tolist(), - index=results.index) + formatted_frame[['timestep', 'decision']] = pd.DataFrame( + formatted_frame['timestep_decision'].tolist(), index=formatted_frame.index) - results = results.drop(columns=['timestep_decision']) + formatted_frame = formatted_frame.drop(columns=['timestep_decision']) # Rename the output columns to include units renamed_cols = dict() for key, val in self._output_units.items(): renamed_cols[key] = '{}_({})'.format(key, val) - results = results.rename(index=str, columns=renamed_cols) + formatted_frame = formatted_frame.rename(index=str, columns=renamed_cols) # Now reorder the columns. Want model_run then timestep then decision - cols = results.columns.tolist() + cols = formatted_frame.columns.tolist() assert (cols[0] == 'model_run') cols.insert(1, cols.pop(cols.index('timestep'))) cols.insert(2, cols.pop(cols.index('decision'))) - assert(cols[0:3] == ['model_run', 'timestep', 'decision']) + assert (cols[0:3] == ['model_run', 'timestep', 'decision']) - return results[cols] + return formatted_frame[cols] def get_units(self, output_name: str): """ Return the units of a given output. From bf6cebaf4a9fba9c032dbd4437853878fdff9706 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Wed, 1 May 2019 17:05:32 +0100 Subject: [PATCH 51/61] #359 Update Results.read() validation --- src/smif/data_layer/results.py | 46 +++++++++++++++++++++++++++----- tests/data_layer/test_results.py | 18 ++++++------- 2 files changed, 49 insertions(+), 15 deletions(-) diff --git a/src/smif/data_layer/results.py b/src/smif/data_layer/results.py index 4e5c055fd..bc64bbfad 100644 --- a/src/smif/data_layer/results.py +++ b/src/smif/data_layer/results.py @@ -149,13 +149,33 @@ def read(self, decisions: list = None, time_decision_tuples: list = None, ): - """ Return the results from the store. + """ Return results from the store as a formatted pandas data frame. There are a number + of ways of requesting specific timesteps/decisions. You can specify either: + + a list of (timestep, decision) tuples + in which case data for all of those tuples matching the available results will + be returned + or: + a list of timesteps + in which case data for all of those timesteps (and any decision iterations) + matching the available results will be returned + or: + a list of decision iterations + in which case data for all of those decision iterations (and any timesteps) + matching the available results will be returned + or: + a list of timesteps and a list of decision iterations + in which case data for the Cartesian product of those timesteps and those + decision iterations matching the available results will be returned + or: + nothing + in which case all available results will be returned Parameters ---------- model_run_names: list the requested model run names sec_model_names: list the requested sector model names (exactly one required) - output_names: list the requested output names (exactly one required) + output_names: list the requested output names (output specs must all match) timesteps: list the requested timesteps decisions: list the requested decision iterations time_decision_tuples: list a list of requested (timestep, decision) tuples @@ -165,10 +185,7 @@ def read(self, A Pandas dataframe """ - if len(sec_model_names) != 1: - raise NotImplementedError( - 'Results.read() currently requires exactly one sector model' - ) + self.validate_names(model_run_names, sec_model_names, output_names) results_dict = self._store.get_results( model_run_names, @@ -240,3 +257,20 @@ def get_units(self, output_name: str): str the units of the output """ return self._output_units[output_name] + + def validate_names(self, model_run_names, sec_model_names, output_names): + + if len(sec_model_names) != 1: + raise NotImplementedError( + 'Results.read() currently requires exactly one sector model' + ) + + if len(model_run_names) < 1: + raise ValueError( + 'Results.read() requires at least one sector model name' + ) + + if len(output_names) < 1: + raise ValueError( + 'Results.read() requires at least one output name' + ) diff --git a/tests/data_layer/test_results.py b/tests/data_layer/test_results.py index 0cc869d94..94fa23c8a 100644 --- a/tests/data_layer/test_results.py +++ b/tests/data_layer/test_results.py @@ -139,7 +139,7 @@ def test_available_results(self, results_with_results): output_answer_b = {0: [2010, 2015, 2020, 2025, 2030]} assert outputs_b['energy_use'] == output_answer_b - def test_read_exceptions(self, results_with_results): + def test_read_validate_names(self, results_with_results): # Passing anything other than one sector model or output is current not implemented with raises(NotImplementedError) as e: @@ -158,21 +158,21 @@ def test_read_exceptions(self, results_with_results): ) assert 'requires exactly one sector model' in str(e.value) - with raises(NotImplementedError) as e: + with raises(ValueError) as e: results_with_results.read( - model_run_names=['one', 'two'], + model_run_names=[], sec_model_names=['one'], - output_names=[] + output_names=['one'] ) - assert 'requires exactly one output' in str(e.value) + assert 'requires at least one sector model name' in str(e.value) - with raises(NotImplementedError) as e: + with raises(ValueError) as e: results_with_results.read( - model_run_names=['one', 'two'], + model_run_names=['one'], sec_model_names=['one'], - output_names=['one', 'two'] + output_names=[] ) - assert 'requires exactly one output' in str(e.value) + assert 'requires at least one output name' in str(e.value) def test_read(self): # This is difficult to test without fixtures defining an entire canonical project. From a14f35bfd887043a0a4775ed19b97ec83bb6dc10 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Wed, 1 May 2019 17:08:01 +0100 Subject: [PATCH 52/61] #359 Tidy Store.get_results() --- src/smif/data_layer/store.py | 83 ++++++++++++++++++------------------ 1 file changed, 42 insertions(+), 41 deletions(-) diff --git a/src/smif/data_layer/store.py b/src/smif/data_layer/store.py index e7c672481..ad34b5974 100644 --- a/src/smif/data_layer/store.py +++ b/src/smif/data_layer/store.py @@ -15,7 +15,6 @@ to database """ import itertools -import warnings from copy import deepcopy from logging import getLogger from operator import itemgetter @@ -805,7 +804,7 @@ def read_results(self, Parameters ---------- - model_run_id : str + model_run_name : str model_name : str output_spec : smif.metadata.Spec timestep : int, default=None @@ -1110,60 +1109,62 @@ def get_result_darray(self, model_run_name, sec_model_name, output_name, timeste model_run_name, sec_model_name, output_name, sorted(list_of_tuples) ) - def get_results(self, model_runs, sec_model_name, outputs, timesteps=None, - decision_iteration=None, t_d_tuples=None): + def get_results(self, + model_run_names: list, + sec_model_name: str, + output_names: list, + timesteps: list = None, + decisions: list = None, + time_decision_tuples: list = None, + ): """Return data for multiple timesteps and decision iterations for a given output from a given sector model for multiple model runs. Parameters ---------- - model_runs : List of str (model run names) - sec_model_name : str - output_name : str - timesteps : optional list of timesteps - decision_iteration : optional list of decision iterations - t_d_tuples : optional list of unique (timestep, decision) tuples + model_run_names: list the requested model run names + sec_model_name: the requested sector model name + output_names: list the requested output names (output specs must all match) + timesteps: list the requested timesteps + decisions: list the requested decision iterations + time_decision_tuples: list a list of requested (timestep, decision) tuples Returns ------- - Dictionary of DataArray objects keyed on model run names. + Nested dictionary of DataArray objects, keyed on model run name and output name. Returned DataArrays include one extra (timestep, decision_iteration) dimension. """ - list_model_outputs = [] - # Build list of model outputs name - sec_model = self.read_model(sec_model_name) - for output in sec_model['outputs']: - list_model_outputs.append(output['name']) - # Check wether each queried output is available - found_outputs = [] - for output_name in outputs: - if output_name in list_model_outputs: - found_outputs.append(output_name) - else: - warnings.warn('Output {} not found and ignored.'.format(output_name)) - # Get Spec shape of first found output - for output in sec_model['outputs']: - if output['name'] == found_outputs[0]: - spec_shape = Spec.from_dict(output).shape - # Check subsequent found outputs and check that the shape matches - # Must loop over all sector model outputs again to extract spec object - for output in sec_model['outputs']: - for output_name in found_outputs[1:]: - if output['name'] == output_name: - if not(Spec.from_dict(output).shape == spec_shape): - raise ValueError('spec dimension for output {} not matching {}'.format(output_name, spec_shape)) - results_dict = {} - for model_run_name in model_runs: - results_dict[model_run_name] = {} - for output_name in found_outputs: + + # List the available output names and verify requested outputs match + outputs = self.read_model(sec_model_name)['outputs'] + available_outputs = [output['name'] for output in outputs] + + for output_name in output_names: + assert output_name in available_outputs, \ + '{} is not an output of sector model {}.'.format(output_name, sec_model_name) + + # The spec for each requested output must be the same. We check they have the same + # coordinates + coords = [Spec.from_dict(output).coords for output in outputs if + output['name'] in output_names] + + for coord in coords: + if coord != coords[0]: + raise ValueError('Different outputs must have the same coordinates') + + # Now actually obtain the requested results + results_dict = dict() + for model_run_name in model_run_names: + results_dict[model_run_name] = dict() + for output_name in output_names: results_dict[model_run_name][output_name] = self.get_result_darray( model_run_name, sec_model_name, output_name, timesteps, - decision_iteration, - t_d_tuples - ) + decisions, + time_decision_tuples + ) return results_dict # endregion From 7ef838988ad67a622ec57d037b518c812e8cc331 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Thu, 2 May 2019 16:39:17 +0100 Subject: [PATCH 53/61] #359 Remove units from column names --- src/smif/data_layer/results.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/smif/data_layer/results.py b/src/smif/data_layer/results.py index bc64bbfad..e4a26d48d 100644 --- a/src/smif/data_layer/results.py +++ b/src/smif/data_layer/results.py @@ -229,12 +229,6 @@ def read(self, formatted_frame = formatted_frame.drop(columns=['timestep_decision']) - # Rename the output columns to include units - renamed_cols = dict() - for key, val in self._output_units.items(): - renamed_cols[key] = '{}_({})'.format(key, val) - formatted_frame = formatted_frame.rename(index=str, columns=renamed_cols) - # Now reorder the columns. Want model_run then timestep then decision cols = formatted_frame.columns.tolist() From 6aa5d2867417b582afc5a43ec0eee31307359874 Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Fri, 3 May 2019 12:34:29 +0100 Subject: [PATCH 54/61] Move store creation to Store.from_dict Include style niggles: - docstring parameter description on next line - prefer raising relevant errors to assertions - avoid abbrev.s in param. names (sec_model_name > model_name) --- src/smif/data_layer/results.py | 135 ++++++++---------------- src/smif/data_layer/store.py | 184 +++++++++++++++++++++------------ 2 files changed, 162 insertions(+), 157 deletions(-) diff --git a/src/smif/data_layer/results.py b/src/smif/data_layer/results.py index e4a26d48d..d363060a6 100644 --- a/src/smif/data_layer/results.py +++ b/src/smif/data_layer/results.py @@ -1,22 +1,9 @@ """Results provides a common interface to access results from model runs. - -Raises ------- -SmifDataNotFoundError - If data cannot be found in the store when try to read from the store -SmifDataMismatchError - Data presented to read, write and update methods is in the - incorrect format or of wrong dimensions to that expected -SmifDataReadError - When unable to read data e.g. unable to handle file type or connect - to database """ -import os +from typing import Union import pandas as pd -from smif.data_layer.file import (CSVDataStore, FileMetadataStore, - ParquetDataStore, YamlConfigStore) from smif.data_layer.store import Store @@ -25,74 +12,23 @@ class Results: Parameters ---------- - details_dict: dict optional dictionary of the form {'interface': , 'dir': } - where is either 'local_csv' or 'local_parquet', and is the model base - directory - store: Store optional pre-created Store object + store: Store or dict + pre-created Store object or dictionary of the form {'interface': , + 'dir': } where is either 'local_csv' or 'local_parquet', and is + the model base directory """ + def __init__(self, store: Union[Store, dict]): - def __init__(self, details_dict: dict = None, store: Store = None): - - assert bool(details_dict) != bool(store), \ - 'Results() accepts either a details dict or a store' - - self._store = store - if store: - return - - try: - interface = details_dict['interface'] - except KeyError: - print('No interface provided for Results(). Assuming local_csv.') - interface = 'local_csv' - - try: - directory = details_dict['dir'] - except KeyError: - print('No directory provided for Results(). Assuming \'.\'.') - directory = '.' - - # Check that the provided interface is supported - file_store = self._get_file_store(interface) - if file_store is None: - raise ValueError( - 'Unsupported interface "{}". Supply local_csv or local_parquet'.format( - interface)) - - # Check that the directory is valid - if not os.path.isdir(directory): - raise ValueError('Expected {} to be a valid directory'.format(directory)) - - self._store = Store( - config_store=YamlConfigStore(directory), - metadata_store=FileMetadataStore(directory), - data_store=file_store(directory), - model_base_folder=directory - ) - - # Create an empty dictionary for keeping tabs on the units of any read outputs - self._output_units = dict() - - @staticmethod - def _get_file_store(interface): - """ Return the appropriate derived FileDataStore class, or None if the requested - interface is invalid. - - Parameters - ---------- - interface: str the requested interface + if type(store) is dict: + self._store = Store.from_dict(store) + else: + self._store = store # type: Store - Returns - ------- - The appropriate derived FileDataStore class - """ - return { - 'local_csv': CSVDataStore, - 'local_parquet': ParquetDataStore, - }.get(interface, None) + # keep tabs on the units of any read outputs + self._output_units = dict() # type: dict def list_model_runs(self): - """ Return a list of model run names. + """Return a list of model run names. Returns ------- @@ -101,7 +37,7 @@ def list_model_runs(self): return sorted([x['name'] for x in self._store.read_model_runs()]) def available_results(self, model_run_name): - """ Return the results available for a given model run. + """Return the results available for a given model run. Parameters ---------- @@ -143,13 +79,13 @@ def available_results(self, model_run_name): def read(self, model_run_names: list, - sec_model_names: list, + model_names: list, output_names: list, timesteps: list = None, decisions: list = None, time_decision_tuples: list = None, ): - """ Return results from the store as a formatted pandas data frame. There are a number + """Return results from the store as a formatted pandas data frame. There are a number of ways of requesting specific timesteps/decisions. You can specify either: a list of (timestep, decision) tuples @@ -173,23 +109,40 @@ def read(self, Parameters ---------- - model_run_names: list the requested model run names - sec_model_names: list the requested sector model names (exactly one required) - output_names: list the requested output names (output specs must all match) - timesteps: list the requested timesteps - decisions: list the requested decision iterations - time_decision_tuples: list a list of requested (timestep, decision) tuples + model_run_names: list + the requested model run names + model_names: list + the requested sector model names (exactly one required) + output_names: list + the requested output names (output specs must all match) + timesteps: list + the requested timesteps + decisions: list + the requested decision iterations + time_decision_tuples: list + a list of requested (timestep, decision) tuples + + Raises + ------ + SmifDataNotFoundError + If data cannot be found in the store when try to read from the store + SmifDataMismatchError + Data presented to read, write and update methods is in the + incorrect format or of wrong dimensions to that expected + SmifDataReadError + When unable to read data e.g. unable to handle file type or connect + to database Returns ------- - A Pandas dataframe + pandas.DataFrame """ - self.validate_names(model_run_names, sec_model_names, output_names) + self.validate_names(model_run_names, model_names, output_names) results_dict = self._store.get_results( model_run_names, - sec_model_names[0], + model_names[0], output_names, timesteps, decisions, @@ -244,11 +197,11 @@ def get_units(self, output_name: str): Parameters ---------- - output_name: the name of the output + output_name: str Returns ------- - str the units of the output + str """ return self._output_units[output_name] diff --git a/src/smif/data_layer/store.py b/src/smif/data_layer/store.py index ad34b5974..57b3d8368 100644 --- a/src/smif/data_layer/store.py +++ b/src/smif/data_layer/store.py @@ -15,8 +15,9 @@ to database """ import itertools +import logging +import os from copy import deepcopy -from logging import getLogger from operator import itemgetter from typing import Dict, List, Optional @@ -24,7 +25,8 @@ from smif.data_layer import DataArray from smif.data_layer.abstract_data_store import DataStore from smif.data_layer.abstract_metadata_store import MetadataStore -from smif.data_layer.file import CSVDataStore, ParquetDataStore +from smif.data_layer.file import (CSVDataStore, FileMetadataStore, + ParquetDataStore, YamlConfigStore) from smif.data_layer.validate import (validate_sos_model_config, validate_sos_model_format) from smif.exception import SmifDataNotFoundError @@ -32,7 +34,8 @@ class Store(): - """Common interface to data store, composed of config, metadata and data store implementations. + """Common interface to data store, composed of config, metadata and data store + implementations. Parameters ---------- @@ -42,13 +45,50 @@ class Store(): """ def __init__(self, config_store, metadata_store: MetadataStore, data_store: DataStore, model_base_folder="."): - self.logger = getLogger(__name__) + self.logger = logging.getLogger(__name__) self.config_store = config_store self.metadata_store = metadata_store self.data_store = data_store # base folder for any relative paths to models self.model_base_folder = str(model_base_folder) + @classmethod + def from_dict(cls, config): + """Create Store from configuration dict + """ + + try: + interface = config['interface'] + except KeyError: + logging.warning('No interface provided for Results(). Assuming local_csv') + interface = 'local_csv' + + try: + directory = config['dir'] + except KeyError: + logging.warning("No directory provided for Results(). Assuming '.'") + directory = '.' + + # Check that the directory is valid + if not os.path.isdir(directory): + raise ValueError('Expected {} to be a valid directory'.format(directory)) + + if interface == 'local_csv': + data_store = CSVDataStore(directory) + elif interface == 'local_parquet': + data_store = ParquetDataStore(directory) + else: + raise ValueError( + 'Unsupported interface "{}". Supply local_csv or local_parquet'.format( + interface)) + + return cls( + config_store=YamlConfigStore(directory), + metadata_store=FileMetadataStore(directory), + data_store=data_store, + model_base_folder=directory + ) + # # CONFIG # @@ -896,8 +936,8 @@ def canonical_available_results(self, model_run_name): canonical_list = [] - for t, d, sec_model_name, output_name in available_results: - canonical_list.append((t, 0, sec_model_name, output_name)) + for t, d, model_name, output_name in available_results: + canonical_list.append((t, 0, model_name, output_name)) # Return as a set to remove duplicates return set(canonical_list) @@ -923,7 +963,7 @@ def canonical_expected_results(self, model_run_name): """ # Model results are returned as a tuple - # (timestep, decision_it, sec_model_name, output_name) + # (timestep, decision_it, model_name, output_name) # so we first build the full list of expected results tuples. expected_results = [] @@ -937,13 +977,13 @@ def canonical_expected_results(self, model_run_name): sos_config = self.read_sos_model(sos_model_name) # For each sector model, get the outputs and create the tuples - for sec_model_name in sos_config['sector_models']: + for model_name in sos_config['sector_models']: - sec_model_config = self.read_model(sec_model_name) - outputs = sec_model_config['outputs'] + model_config = self.read_model(model_name) + outputs = model_config['outputs'] for output, t in itertools.product(outputs, timesteps): - expected_results.append((t, 0, sec_model_name, output['name'])) + expected_results.append((t, 0, model_name, output['name'])) # Return as a set to remove duplicates return set(expected_results) @@ -967,12 +1007,12 @@ def canonical_missing_results(self, model_run_name): return self.canonical_expected_results( model_run_name) - self.canonical_available_results(model_run_name) - def _get_result_darray_internal(self, model_run_name, sec_model_name, output_name, - t_d_tuples): + def _get_result_darray_internal(self, model_run_name, model_name, output_name, + time_decision_tuples): """Internal implementation for `get_result_darray`, after the unique list of (timestep, decision) tuples has been generated and validated. - This method gets the spec for the output defined by the model_run_name, sec_model_name + This method gets the spec for the output defined by the model_run_name, model_name and output_name and expands the spec to include an additional dimension for the list of tuples. @@ -983,9 +1023,9 @@ def _get_result_darray_internal(self, model_run_name, sec_model_name, output_nam Parameters ---------- model_run_name : str - sec_model_name : str + model_name : str output_name : str - t_d_tuples : list of unique (timestep, decision) tuples + time_decision_tuples : list of unique (timestep, decision) tuples Returns ------- @@ -994,9 +1034,9 @@ def _get_result_darray_internal(self, model_run_name, sec_model_name, output_nam # Get the output spec given the name of the sector model and output output_spec = None - sec_model = self.read_model(sec_model_name) + model = self.read_model(model_name) - for output in sec_model['outputs']: + for output in model['outputs']: # Ignore if the output name doesn't match if output_name != output['name']: @@ -1009,8 +1049,8 @@ def _get_result_darray_internal(self, model_run_name, sec_model_name, output_nam # Read the results for each (timestep, decision) tuple and stack them list_of_numpy_arrays = [] - for t, d in t_d_tuples: - d_array = self.read_results(model_run_name, sec_model_name, output_spec, t, d) + for t, d in time_decision_tuples: + d_array = self.read_results(model_run_name, model_name, output_spec, t, d) list_of_numpy_arrays.append(d_array.data) stacked_data = np.vstack(list_of_numpy_arrays) @@ -1019,15 +1059,15 @@ def _get_result_darray_internal(self, model_run_name, sec_model_name, output_nam # Add new dimensions to the data spec output_dict = output_spec.as_dict() output_dict['dims'].append('timestep_decision') - output_dict['coords']['timestep_decision'] = t_d_tuples + output_dict['coords']['timestep_decision'] = time_decision_tuples output_spec = Spec.from_dict(output_dict) # Create a new DataArray from the modified spec and stacked data return DataArray(output_spec, np.reshape(data, output_spec.shape)) - def get_result_darray(self, model_run_name, sec_model_name, output_name, timesteps=None, - decision_iteration=None, t_d_tuples=None): + def get_result_darray(self, model_run_name, model_name, output_name, timesteps=None, + decision_iterations=None, time_decision_tuples=None): """Return data for multiple timesteps and decision iterations for a given output from a given sector model in a specific model run. @@ -1058,60 +1098,65 @@ def get_result_darray(self, model_run_name, sec_model_name, output_name, timeste Parameters ---------- model_run_name : str - sec_model_name : str + model_name : str output_name : str timesteps : optional list of timesteps - decision_iteration : optional list of decision iterations - t_d_tuples : optional list of unique (timestep, decision) tuples + decision_iterations : optional list of decision iterations + time_decision_tuples : optional list of unique (timestep, decision) tuples Returns ------- DataArray with expanded spec and the data requested """ - - # If a list of (t,d) tuples is supplied, disallow specifying timesteps or decision - # iterations - if t_d_tuples: - assert (not timesteps and not decision_iteration) - available = self.available_results(model_run_name) # Build up the necessary list of tuples - if not timesteps and not decision_iteration and not t_d_tuples: - list_of_tuples = [(t, d) for t, d, sec, out in available if - sec == sec_model_name and out == output_name] + if not timesteps and not decision_iterations and not time_decision_tuples: + list_of_tuples = [ + (t, d) for t, d, m, out in available + if m == model_name and out == output_name + ] - elif timesteps and not decision_iteration and not t_d_tuples: - list_of_tuples = [(t, d) for t, d, sec, out in available if - sec == sec_model_name and out == output_name and t in timesteps] + elif timesteps and not decision_iterations and not time_decision_tuples: + list_of_tuples = [ + (t, d) for t, d, m, out in available + if m == model_name and out == output_name and t in timesteps + ] - elif decision_iteration and not timesteps and not t_d_tuples: - list_of_tuples = [(t, d) for t, d, sec, out in available if - sec == sec_model_name and out == output_name and - d in decision_iteration] + elif decision_iterations and not timesteps and not time_decision_tuples: + list_of_tuples = [ + (t, d) for t, d, m, out in available + if m == model_name and out == output_name and d in decision_iterations + ] - elif t_d_tuples and not timesteps and not decision_iteration: - list_of_tuples = [(t, d) for t, d, sec, out in available if - sec == sec_model_name and out == output_name and ( - t, d) in t_d_tuples] + elif time_decision_tuples and not timesteps and not decision_iterations: + list_of_tuples = [ + (t, d) for t, d, m, out in available + if m == model_name and out == output_name and (t, d) in time_decision_tuples + ] - elif timesteps and decision_iteration and not t_d_tuples: - t_d = list(itertools.product(timesteps, decision_iteration)) - list_of_tuples = [(t, d) for t, d, sec, out in available if - sec == sec_model_name and out == output_name and (t, d) in t_d] + elif timesteps and decision_iterations and not time_decision_tuples: + t_d = list(itertools.product(timesteps, decision_iterations)) + list_of_tuples = [ + (t, d) for t, d, m, out in available + if m == model_name and out == output_name and (t, d) in t_d + ] else: - assert False, "It should not have been possible to reach this line of code." + msg = "Expected either timesteps, or decisions, or (timestep, decision) " + \ + "tuples, or timesteps and decisions, or none of the above." + raise ValueError(msg) - assert (len(list_of_tuples) > 0), "None of the requested data is available." + if not list_of_tuples: + raise SmifDataNotFoundError("None of the requested data is available.") return self._get_result_darray_internal( - model_run_name, sec_model_name, output_name, sorted(list_of_tuples) + model_run_name, model_name, output_name, sorted(list_of_tuples) ) def get_results(self, model_run_names: list, - sec_model_name: str, + model_name: str, output_names: list, timesteps: list = None, decisions: list = None, @@ -1122,26 +1167,33 @@ def get_results(self, Parameters ---------- - model_run_names: list the requested model run names - sec_model_name: the requested sector model name - output_names: list the requested output names (output specs must all match) - timesteps: list the requested timesteps - decisions: list the requested decision iterations - time_decision_tuples: list a list of requested (timestep, decision) tuples + model_run_names: list[str] + the requested model run names + model_name: str + the requested sector model name + output_names: list[str] + the requested output names (output specs must all match) + timesteps: list[int] + the requested timesteps + decisions: list[int] + the requested decision iterations + time_decision_tuples: list[tuple] + a list of requested (timestep, decision) tuples Returns ------- - Nested dictionary of DataArray objects, keyed on model run name and output name. - Returned DataArrays include one extra (timestep, decision_iteration) dimension. + dict + Nested dictionary of DataArray objects, keyed on model run name and output name. + Returned DataArrays include one extra (timestep, decision_iteration) dimension. """ # List the available output names and verify requested outputs match - outputs = self.read_model(sec_model_name)['outputs'] + outputs = self.read_model(model_name)['outputs'] available_outputs = [output['name'] for output in outputs] for output_name in output_names: assert output_name in available_outputs, \ - '{} is not an output of sector model {}.'.format(output_name, sec_model_name) + '{} is not an output of sector model {}.'.format(output_name, model_name) # The spec for each requested output must be the same. We check they have the same # coordinates @@ -1153,13 +1205,13 @@ def get_results(self, raise ValueError('Different outputs must have the same coordinates') # Now actually obtain the requested results - results_dict = dict() + results_dict = dict() # type: Dict for model_run_name in model_run_names: results_dict[model_run_name] = dict() for output_name in output_names: results_dict[model_run_name][output_name] = self.get_result_darray( model_run_name, - sec_model_name, + model_name, output_name, timesteps, decisions, From ae917af12915ae637f4960c2a0e6c0bbfbbcea0a Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Fri, 3 May 2019 12:34:57 +0100 Subject: [PATCH 55/61] Fix typo in error message --- src/smif/data_layer/file/file_data_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/smif/data_layer/file/file_data_store.py b/src/smif/data_layer/file/file_data_store.py index 6fda502d8..ba0069127 100644 --- a/src/smif/data_layer/file/file_data_store.py +++ b/src/smif/data_layer/file/file_data_store.py @@ -42,7 +42,7 @@ def __init__(self, base_folder): dirname = os.path.join(self.data_folder, folder) # ensure each directory exists if not os.path.exists(dirname): - msg = "Expected data folder at '{}' but it does does not exist" + msg = "Expected data folder at '{}' but it does not exist" abs_path = os.path.abspath(dirname) raise SmifDataNotFoundError(msg.format(abs_path)) self.data_folders[folder] = dirname From ab97a0951293931985bb079163b17bb5f047443d Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Fri, 3 May 2019 12:35:47 +0100 Subject: [PATCH 56/61] Add test fixture for results --- tests/data_layer/test_results.py | 243 +++++++++++++++++++++---------- 1 file changed, 165 insertions(+), 78 deletions(-) diff --git a/tests/data_layer/test_results.py b/tests/data_layer/test_results.py index 94fa23c8a..c3bcf44cb 100644 --- a/tests/data_layer/test_results.py +++ b/tests/data_layer/test_results.py @@ -3,38 +3,103 @@ import os +import numpy as np +import pandas as pd from pytest import fixture, raises -from smif.data_layer import Results +from smif.data_layer import DataArray, Results from smif.exception import SmifDataNotFoundError +from smif.metadata import Spec @fixture -def results(empty_store, model_run): +def results(empty_store): """Results fixture """ - empty_store.write_model_run(model_run) return Results(store=empty_store) @fixture -def results_with_results(empty_store, model_run, sample_results): +def results_with_results(empty_store): """Results fixture with a model run and fictional results """ - empty_store.write_model_run(model_run) - - empty_store.write_results(sample_results, 'unique_model_run_name', 'a_model', 2010, 0) - empty_store.write_results(sample_results, 'unique_model_run_name', 'a_model', 2015, 0) - empty_store.write_results(sample_results, 'unique_model_run_name', 'a_model', 2020, 0) - empty_store.write_results(sample_results, 'unique_model_run_name', 'a_model', 2015, 1) - empty_store.write_results(sample_results, 'unique_model_run_name', 'a_model', 2020, 1) - empty_store.write_results(sample_results, 'unique_model_run_name', 'a_model', 2015, 2) - empty_store.write_results(sample_results, 'unique_model_run_name', 'a_model', 2020, 2) - - empty_store.write_results(sample_results, 'unique_model_run_name', 'b_model', 2010, 0) - empty_store.write_results(sample_results, 'unique_model_run_name', 'b_model', 2015, 0) - empty_store.write_results(sample_results, 'unique_model_run_name', 'b_model', 2020, 0) - empty_store.write_results(sample_results, 'unique_model_run_name', 'b_model', 2025, 0) - empty_store.write_results(sample_results, 'unique_model_run_name', 'b_model', 2030, 0) + empty_store.write_dimension({ + 'name': 'sample_dim', + 'elements': [ {'name': 'a'}, {'name': 'b'} ] + }) + sample_output = { + 'name': 'sample_output', + 'dtype': 'float', + 'dims': ['sample_dim'], + 'coords': { 'sample_dim': [ {'name': 'a'}, {'name': 'b'} ] }, + 'unit': 'm' + } + empty_store.write_model({ + 'name': 'a_model', + 'description': "Sample model", + 'classname': 'DoesNotExist', + 'path': '/dev/null', + 'inputs': [], + 'outputs': [sample_output], + 'parameters': [], + 'interventions': [], + 'initial_conditions': [] + }) + empty_store.write_model({ + 'name': 'b_model', + 'description': "Second sample model", + 'classname': 'DoesNotExist', + 'path': '/dev/null', + 'inputs': [], + 'outputs': [sample_output], + 'parameters': [], + 'interventions': [], + 'initial_conditions': [] + }) + empty_store.write_sos_model({ + 'name': 'a_sos_model', + 'description': 'Sample SoS', + 'sector_models': ['a_model', 'b_model'], + 'scenarios': [], + 'scenario_dependencies': [], + 'model_dependencies': [], + 'narratives': [] + }) + empty_store.write_model_run({ + 'name': 'model_run_1', + 'description': 'Sample model run', + 'timesteps': [2010, 2015, 2020, 2025, 2030], + 'sos_model': 'a_sos_model', + 'scenarios': {}, + 'strategies': [], + 'narratives': {} + }) + empty_store.write_model_run({ + 'name': 'model_run_2', + 'description': 'Sample model run', + 'timesteps': [2010, 2015, 2020, 2025, 2030], + 'sos_model': 'a_sos_model', + 'scenarios': {}, + 'strategies': [], + 'narratives': {} + }) + + spec = Spec.from_dict(sample_output) + data = np.zeros((2,), dtype=float) + sample_results = DataArray(spec, data) + + empty_store.write_results(sample_results, 'model_run_1', 'a_model', 2010, 0) + empty_store.write_results(sample_results, 'model_run_1', 'a_model', 2015, 0) + empty_store.write_results(sample_results, 'model_run_1', 'a_model', 2020, 0) + empty_store.write_results(sample_results, 'model_run_1', 'a_model', 2015, 1) + empty_store.write_results(sample_results, 'model_run_1', 'a_model', 2020, 1) + empty_store.write_results(sample_results, 'model_run_1', 'a_model', 2015, 2) + empty_store.write_results(sample_results, 'model_run_1', 'a_model', 2020, 2) + + empty_store.write_results(sample_results, 'model_run_1', 'b_model', 2010, 0) + empty_store.write_results(sample_results, 'model_run_1', 'b_model', 2015, 0) + empty_store.write_results(sample_results, 'model_run_1', 'b_model', 2020, 0) + empty_store.write_results(sample_results, 'model_run_1', 'b_model', 2025, 0) + empty_store.write_results(sample_results, 'model_run_1', 'b_model', 2030, 0) return Results(store=empty_store) @@ -44,14 +109,9 @@ class TestNoResults: def test_exceptions(self, empty_store): # No arguments is not allowed - with raises(AssertionError) as e: + with raises(TypeError) as e: Results() - assert 'either a details dict or a store' in str(e.value) - - # Both arguments is also not allowed - with raises(AssertionError) as e: - Results(details_dict={'some': 'dict'}, store=empty_store) - assert 'either a details dict or a store' in str(e.value) + assert "missing 1 required positional argument: 'store'" in str(e) # Check that constructing with just a store works fine Results(store=empty_store) @@ -59,101 +119,107 @@ def test_exceptions(self, empty_store): # Check that valid configurations do work (but expect a SmifDataNotFoundError # because the store creation will fall over with raises(SmifDataNotFoundError) as e: - Results(details_dict={'interface': 'local_csv', 'dir': '.'}) - assert 'Expected configuration folder' in str(e.value) + Results(store={'interface': 'local_csv', 'dir': '.'}) + assert 'Expected data folder' in str(e) with raises(SmifDataNotFoundError) as e: - Results(details_dict={'interface': 'local_parquet', 'dir': '.'}) - assert 'Expected configuration folder' in str(e.value) + Results(store={'interface': 'local_parquet', 'dir': '.'}) + assert 'Expected data folder' in str(e) # Interface left blank will default to local_csv with raises(SmifDataNotFoundError) as e: - Results(details_dict={'dir': '.'}) - assert 'Expected configuration folder' in str(e.value) + Results(store={'dir': '.'}) + assert 'Expected data folder' in str(e) # Dir left blank will default to '.' with raises(SmifDataNotFoundError) as e: - Results(details_dict={'interface': 'local_parquet'}) - assert 'Expected configuration folder' in str(e.value) + Results(store={'interface': 'local_parquet'}) + assert 'Expected data folder' in str(e) # Invalid interface will raise a ValueError with raises(ValueError) as e: - Results(details_dict={'interface': 'invalid', 'dir': '.'}) - assert 'Unsupported interface "invalid"' in str(e.value) + Results(store={'interface': 'invalid', 'dir': '.'}) + assert 'Unsupported interface "invalid"' in str(e) # Invalid directory will raise a ValueError with raises(ValueError) as e: invalid_dir = os.path.join(os.path.dirname(__file__), 'does', 'not', 'exist') - Results(details_dict={'interface': 'local_csv', 'dir': invalid_dir}) - assert 'to be a valid directory' in str(e.value) + Results(store={'interface': 'local_csv', 'dir': invalid_dir}) + assert 'to be a valid directory' in str(e) - def test_list_model_runs(self, empty_store, model_run): + def test_list_model_runs(self, results_with_results): + assert results_with_results.list_model_runs() == ['model_run_1', 'model_run_2'] + def test_list_no_model_runs(self, results): # Should be no model runs in an empty Results() - results = Results(store=empty_store) assert results.list_model_runs() == [] - model_run_a = model_run.copy() - model_run_a['name'] = 'a_model_run' - - model_run_b = model_run.copy() - model_run_b['name'] = 'b_model_run' - - empty_store.write_model_run(model_run_a) - empty_store.write_model_run(model_run_b) - - assert results.list_model_runs() == ['a_model_run', 'b_model_run'] - - def test_available_results(self, results): - - available = results.available_results('unique_model_run_name') - - assert available['model_run'] == 'unique_model_run_name' - assert available['sos_model'] == 'energy' - assert available['sector_models'] == dict() + def test_available_results(self, results_with_results): + available = results_with_results.available_results('model_run_1') + + assert available['model_run'] == 'model_run_1' + assert available['sos_model'] == 'a_sos_model' + assert available['sector_models'] == { + 'a_model': { + 'outputs': { + 'sample_output': { + 0: [2010, 2015, 2020], + 1: [2015, 2020], + 2: [2015, 2020] + } + } + }, + 'b_model': { + 'outputs': { + 'sample_output': { + 0: [2010, 2015, 2020, 2025, 2030] + } + } + } + } class TestSomeResults: def test_available_results(self, results_with_results): - available = results_with_results.available_results('unique_model_run_name') + available = results_with_results.available_results('model_run_1') - assert available['model_run'] == 'unique_model_run_name' - assert available['sos_model'] == 'energy' + assert available['model_run'] == 'model_run_1' + assert available['sos_model'] == 'a_sos_model' sec_models = available['sector_models'] assert sorted(sec_models.keys()) == ['a_model', 'b_model'] # Check a_model outputs are correct outputs_a = sec_models['a_model']['outputs'] - assert sorted(outputs_a.keys()) == ['energy_use'] + assert sorted(outputs_a.keys()) == ['sample_output'] output_answer_a = {0: [2010, 2015, 2020], 1: [2015, 2020], 2: [2015, 2020]} - assert outputs_a['energy_use'] == output_answer_a + assert outputs_a['sample_output'] == output_answer_a # Check b_model outputs are correct outputs_b = sec_models['b_model']['outputs'] - assert sorted(outputs_b.keys()) == ['energy_use'] + assert sorted(outputs_b.keys()) == ['sample_output'] output_answer_b = {0: [2010, 2015, 2020, 2025, 2030]} - assert outputs_b['energy_use'] == output_answer_b + assert outputs_b['sample_output'] == output_answer_b def test_read_validate_names(self, results_with_results): # Passing anything other than one sector model or output is current not implemented with raises(NotImplementedError) as e: results_with_results.read( - model_run_names=['one', 'two'], - sec_model_names=[], - output_names=['one'] + model_run_names=['model_run_1', 'model_run_2'], + model_names=[], + output_names=['sample_output'] ) assert 'requires exactly one sector model' in str(e.value) with raises(NotImplementedError) as e: results_with_results.read( - model_run_names=['one', 'two'], - sec_model_names=['one', 'two'], + model_run_names=['model_run_1', 'model_run_2'], + model_names=['a_model', 'b_model'], output_names=['one'] ) assert 'requires exactly one sector model' in str(e.value) @@ -161,21 +227,42 @@ def test_read_validate_names(self, results_with_results): with raises(ValueError) as e: results_with_results.read( model_run_names=[], - sec_model_names=['one'], - output_names=['one'] + model_names=['a_model'], + output_names=['sample_output'] ) assert 'requires at least one sector model name' in str(e.value) with raises(ValueError) as e: results_with_results.read( - model_run_names=['one'], - sec_model_names=['one'], + model_run_names=['model_run_1'], + model_names=['a_model'], output_names=[] ) assert 'requires at least one output name' in str(e.value) - def test_read(self): + + def test_read(self, results_with_results): # This is difficult to test without fixtures defining an entire canonical project. - # See smif issue #304 (https://github.com/nismod/smif/issues/304). For now, we rely - # on tests of the underling get_results() method on the Store. - pass + # See smif issue #304 (https://github.com/nismod/smif/issues/304). + + # should pass validation + results_data = results_with_results.read( + model_run_names=['model_run_1'], + model_names=['a_model'], + output_names=['sample_output'] + ) + print(results_data) + expected = pd.DataFrame({ + 'model_run': 'model_run_1', + 'timestep': [ + 2010, 2015, 2015, 2015, 2020, 2020, 2020, + 2010, 2015, 2015, 2015, 2020, 2020, 2020], + 'decision': [ + 0, 0, 1, 2, 0, 1, 2, + 0, 0, 1, 2, 0, 1, 2], + 'sample_dim': [ + 'a', 'a', 'a', 'a', 'a', 'a', 'a', + 'b', 'b', 'b', 'b', 'b', 'b', 'b'], + 'sample_output': 0.0, + }) + pd.testing.assert_frame_equal(results_data, expected) From 0fe05f8a2f126fc02285c1445e0a5676c825a340 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Fri, 3 May 2019 15:24:05 +0100 Subject: [PATCH 57/61] #359 Change dict to OrderedDict to ensure ordered Pandas dataframe --- tests/data_layer/test_results.py | 33 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/tests/data_layer/test_results.py b/tests/data_layer/test_results.py index c3bcf44cb..f64c9b348 100644 --- a/tests/data_layer/test_results.py +++ b/tests/data_layer/test_results.py @@ -2,6 +2,7 @@ """ import os +from collections import OrderedDict import numpy as np import pandas as pd @@ -24,13 +25,13 @@ def results_with_results(empty_store): """ empty_store.write_dimension({ 'name': 'sample_dim', - 'elements': [ {'name': 'a'}, {'name': 'b'} ] + 'elements': [{'name': 'a'}, {'name': 'b'}] }) sample_output = { 'name': 'sample_output', 'dtype': 'float', 'dims': ['sample_dim'], - 'coords': { 'sample_dim': [ {'name': 'a'}, {'name': 'b'} ] }, + 'coords': {'sample_dim': [{'name': 'a'}, {'name': 'b'}]}, 'unit': 'm' } empty_store.write_model({ @@ -240,7 +241,6 @@ def test_read_validate_names(self, results_with_results): ) assert 'requires at least one output name' in str(e.value) - def test_read(self, results_with_results): # This is difficult to test without fixtures defining an entire canonical project. # See smif issue #304 (https://github.com/nismod/smif/issues/304). @@ -251,18 +251,17 @@ def test_read(self, results_with_results): model_names=['a_model'], output_names=['sample_output'] ) - print(results_data) - expected = pd.DataFrame({ - 'model_run': 'model_run_1', - 'timestep': [ - 2010, 2015, 2015, 2015, 2020, 2020, 2020, - 2010, 2015, 2015, 2015, 2020, 2020, 2020], - 'decision': [ - 0, 0, 1, 2, 0, 1, 2, - 0, 0, 1, 2, 0, 1, 2], - 'sample_dim': [ - 'a', 'a', 'a', 'a', 'a', 'a', 'a', - 'b', 'b', 'b', 'b', 'b', 'b', 'b'], - 'sample_output': 0.0, - }) + + expected = pd.DataFrame( + OrderedDict([ + ('model_run', 'model_run_1'), + ('timestep', [2010, 2015, 2015, 2015, 2020, 2020, 2020, + 2010, 2015, 2015, 2015, 2020, 2020, 2020]), + ('decision', [0, 0, 1, 2, 0, 1, 2, 0, 0, 1, 2, 0, 1, 2]), + ('sample_dim', ['a', 'a', 'a', 'a', 'a', 'a', 'a', + 'b', 'b', 'b', 'b', 'b', 'b', 'b']), + ('sample_output', 0.0), + ]) + ) + pd.testing.assert_frame_equal(results_data, expected) From 3c8bfbf7fff989b5217ecbae76158312fe6b1bef Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Fri, 3 May 2019 16:25:08 +0100 Subject: [PATCH 58/61] #359 Differentiate between Results instance with or without actual results --- tests/data_layer/test_results.py | 79 ++++++++++++++------------------ 1 file changed, 35 insertions(+), 44 deletions(-) diff --git a/tests/data_layer/test_results.py b/tests/data_layer/test_results.py index f64c9b348..c64319911 100644 --- a/tests/data_layer/test_results.py +++ b/tests/data_layer/test_results.py @@ -13,14 +13,7 @@ @fixture -def results(empty_store): - """Results fixture - """ - return Results(store=empty_store) - - -@fixture -def results_with_results(empty_store): +def results_no_results(empty_store): """Results fixture with a model run and fictional results """ empty_store.write_dimension({ @@ -84,25 +77,39 @@ def results_with_results(empty_store): 'narratives': {} }) + return Results(store=empty_store) + + +@fixture +def results_with_results(results_no_results): + + sample_output = { + 'name': 'sample_output', + 'dtype': 'float', + 'dims': ['sample_dim'], + 'coords': {'sample_dim': [{'name': 'a'}, {'name': 'b'}]}, + 'unit': 'm' + } + spec = Spec.from_dict(sample_output) data = np.zeros((2,), dtype=float) sample_results = DataArray(spec, data) - empty_store.write_results(sample_results, 'model_run_1', 'a_model', 2010, 0) - empty_store.write_results(sample_results, 'model_run_1', 'a_model', 2015, 0) - empty_store.write_results(sample_results, 'model_run_1', 'a_model', 2020, 0) - empty_store.write_results(sample_results, 'model_run_1', 'a_model', 2015, 1) - empty_store.write_results(sample_results, 'model_run_1', 'a_model', 2020, 1) - empty_store.write_results(sample_results, 'model_run_1', 'a_model', 2015, 2) - empty_store.write_results(sample_results, 'model_run_1', 'a_model', 2020, 2) + results_no_results._store.write_results(sample_results, 'model_run_1', 'a_model', 2010, 0) + results_no_results._store.write_results(sample_results, 'model_run_1', 'a_model', 2015, 0) + results_no_results._store.write_results(sample_results, 'model_run_1', 'a_model', 2020, 0) + results_no_results._store.write_results(sample_results, 'model_run_1', 'a_model', 2015, 1) + results_no_results._store.write_results(sample_results, 'model_run_1', 'a_model', 2020, 1) + results_no_results._store.write_results(sample_results, 'model_run_1', 'a_model', 2015, 2) + results_no_results._store.write_results(sample_results, 'model_run_1', 'a_model', 2020, 2) - empty_store.write_results(sample_results, 'model_run_1', 'b_model', 2010, 0) - empty_store.write_results(sample_results, 'model_run_1', 'b_model', 2015, 0) - empty_store.write_results(sample_results, 'model_run_1', 'b_model', 2020, 0) - empty_store.write_results(sample_results, 'model_run_1', 'b_model', 2025, 0) - empty_store.write_results(sample_results, 'model_run_1', 'b_model', 2030, 0) + results_no_results._store.write_results(sample_results, 'model_run_1', 'b_model', 2010, 0) + results_no_results._store.write_results(sample_results, 'model_run_1', 'b_model', 2015, 0) + results_no_results._store.write_results(sample_results, 'model_run_1', 'b_model', 2020, 0) + results_no_results._store.write_results(sample_results, 'model_run_1', 'b_model', 2025, 0) + results_no_results._store.write_results(sample_results, 'model_run_1', 'b_model', 2030, 0) - return Results(store=empty_store) + return results_no_results class TestNoResults: @@ -148,36 +155,20 @@ def test_exceptions(self, empty_store): Results(store={'interface': 'local_csv', 'dir': invalid_dir}) assert 'to be a valid directory' in str(e) - def test_list_model_runs(self, results_with_results): - assert results_with_results.list_model_runs() == ['model_run_1', 'model_run_2'] + def test_list_model_runs(self, results_no_results): + assert results_no_results.list_model_runs() == ['model_run_1', 'model_run_2'] - def test_list_no_model_runs(self, results): + def test_list_no_model_runs(self, empty_store): # Should be no model runs in an empty Results() + results = Results(store=empty_store) assert results.list_model_runs() == [] - def test_available_results(self, results_with_results): - available = results_with_results.available_results('model_run_1') + def test_available_results(self, results_no_results): + available = results_no_results.available_results('model_run_1') assert available['model_run'] == 'model_run_1' assert available['sos_model'] == 'a_sos_model' - assert available['sector_models'] == { - 'a_model': { - 'outputs': { - 'sample_output': { - 0: [2010, 2015, 2020], - 1: [2015, 2020], - 2: [2015, 2020] - } - } - }, - 'b_model': { - 'outputs': { - 'sample_output': { - 0: [2010, 2015, 2020, 2025, 2030] - } - } - } - } + assert available['sector_models'] == {} class TestSomeResults: From db7cf82bc85ccefc662c124e49c18237d3ec5e70 Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Fri, 3 May 2019 17:06:04 +0100 Subject: [PATCH 59/61] #359 Add coverage for multiple model runs --- tests/data_layer/test_results.py | 44 +++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/tests/data_layer/test_results.py b/tests/data_layer/test_results.py index c64319911..a93e1867b 100644 --- a/tests/data_layer/test_results.py +++ b/tests/data_layer/test_results.py @@ -109,6 +109,12 @@ def results_with_results(results_no_results): results_no_results._store.write_results(sample_results, 'model_run_1', 'b_model', 2025, 0) results_no_results._store.write_results(sample_results, 'model_run_1', 'b_model', 2030, 0) + results_no_results._store.write_results(sample_results, 'model_run_2', 'b_model', 2010, 0) + results_no_results._store.write_results(sample_results, 'model_run_2', 'b_model', 2015, 0) + results_no_results._store.write_results(sample_results, 'model_run_2', 'b_model', 2020, 0) + results_no_results._store.write_results(sample_results, 'model_run_2', 'b_model', 2025, 0) + results_no_results._store.write_results(sample_results, 'model_run_2', 'b_model', 2030, 0) + return results_no_results @@ -197,6 +203,21 @@ def test_available_results(self, results_with_results): output_answer_b = {0: [2010, 2015, 2020, 2025, 2030]} assert outputs_b['sample_output'] == output_answer_b + available = results_with_results.available_results('model_run_2') + + assert available['model_run'] == 'model_run_2' + assert available['sos_model'] == 'a_sos_model' + + sec_models = available['sector_models'] + assert sorted(sec_models.keys()) == ['b_model'] + + # Check a_model outputs are correct + outputs = sec_models['b_model']['outputs'] + assert sorted(outputs_a.keys()) == ['sample_output'] + + output_answer = {0: [2010, 2015, 2020, 2025, 2030]} + assert outputs['sample_output'] == output_answer + def test_read_validate_names(self, results_with_results): # Passing anything other than one sector model or output is current not implemented @@ -233,10 +254,8 @@ def test_read_validate_names(self, results_with_results): assert 'requires at least one output name' in str(e.value) def test_read(self, results_with_results): - # This is difficult to test without fixtures defining an entire canonical project. - # See smif issue #304 (https://github.com/nismod/smif/issues/304). - # should pass validation + # Read one model run and one output results_data = results_with_results.read( model_run_names=['model_run_1'], model_names=['a_model'], @@ -256,3 +275,22 @@ def test_read(self, results_with_results): ) pd.testing.assert_frame_equal(results_data, expected) + + # Read two model runs and one output + results_data = results_with_results.read( + model_run_names=['model_run_1', 'model_run_2'], + model_names=['b_model'], + output_names=['sample_output'] + ) + + expected = pd.DataFrame( + OrderedDict([ + ('model_run', ['model_run_1'] * 10 + ['model_run_2'] * 10), + ('timestep', [2010, 2015, 2020, 2025, 2030] * 4), + ('decision', 0), + ('sample_dim', ['a'] * 5 + ['b'] * 5 + ['a'] * 5 + ['b'] * 5), + ('sample_output', 0.0), + ]) + ) + + pd.testing.assert_frame_equal(results_data, expected) From b94bce984f25ec157da5c9434fdb3685b49cff1e Mon Sep 17 00:00:00 2001 From: Fergus Cooper Date: Tue, 7 May 2019 11:21:16 +0100 Subject: [PATCH 60/61] #359 Change to OrderedDict for reproducibility between 3.5 and 3.6 --- src/smif/data_layer/store.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/smif/data_layer/store.py b/src/smif/data_layer/store.py index 57b3d8368..daee7c629 100644 --- a/src/smif/data_layer/store.py +++ b/src/smif/data_layer/store.py @@ -17,6 +17,7 @@ import itertools import logging import os +from collections import OrderedDict from copy import deepcopy from operator import itemgetter from typing import Dict, List, Optional @@ -1205,9 +1206,9 @@ def get_results(self, raise ValueError('Different outputs must have the same coordinates') # Now actually obtain the requested results - results_dict = dict() # type: Dict + results_dict = OrderedDict() # type: OrderedDict for model_run_name in model_run_names: - results_dict[model_run_name] = dict() + results_dict[model_run_name] = OrderedDict() for output_name in output_names: results_dict[model_run_name][output_name] = self.get_result_darray( model_run_name, From 54a7aa98d06655ede17d9d0eb7e3a55eee61c4b2 Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Tue, 7 May 2019 16:01:49 +0100 Subject: [PATCH 61/61] Add test example reading multi-timestep scenario data Towards #370 --- tests/data_layer/test_data_store.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/tests/data_layer/test_data_store.py b/tests/data_layer/test_data_store.py index 60627c8a0..55c25cbbb 100644 --- a/tests/data_layer/test_data_store.py +++ b/tests/data_layer/test_data_store.py @@ -1,5 +1,7 @@ """Test all DataStore implementations """ +from copy import deepcopy + import numpy as np from pytest import fixture, mark, param, raises from smif.data_layer.data_array import DataArray @@ -37,7 +39,7 @@ class TestDataArray(): """Read and write DataArray """ def test_read_write_data_array(self, handler, scenario): - data = np.array([0, 1], dtype=float) + data = np.array([0, 1], dtype='float') spec = Spec.from_dict(scenario['provides'][0]) da = DataArray(spec, data) @@ -46,6 +48,29 @@ def test_read_write_data_array(self, handler, scenario): actual = handler.read_scenario_variant_data('mortality.csv', spec, 2010) assert actual == da + np.testing.assert_array_equal(actual.as_ndarray(), da.as_ndarray()) + + def test_read_write_data_array_all(self, handler, scenario): + spec = Spec.from_dict(deepcopy(scenario['provides'][0])) + + spec_with_t = scenario['provides'][0] + spec_with_t['dims'].insert(0, 'timestep') + spec_with_t['coords']['timestep'] = [2010, 2015] + spec_with_t = Spec.from_dict(spec_with_t) + da = DataArray(spec_with_t, np.array([[0, 1], [2, 3]], dtype='float')) + + handler.write_scenario_variant_data('mortality.csv', da) + actual = handler.read_scenario_variant_data('mortality.csv', spec_with_t) + expected = np.array([[0, 1], [2, 3]], dtype='float') + np.testing.assert_array_equal(actual.as_ndarray(), expected) + + da_2010 = handler.read_scenario_variant_data('mortality.csv', spec, 2010) + expected = np.array([0, 1], dtype='float') + np.testing.assert_array_equal(da_2010.as_ndarray(), expected) + + da_2015 = handler.read_scenario_variant_data('mortality.csv', spec, 2015) + expected = np.array([2, 3], dtype='float') + np.testing.assert_array_equal(da_2015.as_ndarray(), expected) def test_read_zero_d_from_timeseries(self, handler): """Read a single value