more tests for custom snpp; better checks for single values; update doc

#37
nismod · Jun 6, 2019 · 46bb42c · 46bb42c
1 parent e30dea9
commit 46bb42c
Show file tree

Hide file tree

Showing 6 changed files with 45 additions and 15 deletions.
diff --git a/README.md b/README.md
@@ -8,7 +8,29 @@
 
 # ukpopulation: UK Demographic Projections
 
-> ## Latest news: 1.1 release
+> ## Latest news: 1.2 release
+> - adds support for custom subnational population projections
+> ### Custom SNPP Data
+> An externally generated SNPP dataset (from e.g. [simim](https://github.com/nismod/simim)) can be registered with the `ukpopulation` package and used as if it was the standard ONS/StatsWales/NRScotland/NISRA projection:
+> ```python3
+> >>> import ukpopulation.customsnppdata as CustomSNPPData
+> >>> customdata = pd.read_csv("custom_snpp.csv")
+> >>> customdata.head()
+>   GEOGRAPHY_CODE  GENDER  C_AGE  OBS_VALUE  PROJECTED_YEAR_NAME
+> 0      E06000005       1      0      603.0                 2018
+> 1      E06000005       1      1      600.0                 2018
+> 2      E06000005       1      2      624.0                 2018
+> 3      E06000005       1      3      636.0                 2018
+> 4      E06000005       1      4      661.0                 2018
+> >>> CustomSNPPData.register_custom_projection("custom_snpp", customdata, "cache_directory")
+> Writing custom SNPP custom_snpp to cache/ukpopulation_custom_snpp_custom_snpp.csv
+> >>> CustomSNPPData.list_custom_projections("cache_directory")
+> ['custom_snpp']
+> >>>
+> ```
+> The external dataset must follow the format/column name conventions as above, but can also contain extra data if required for other use. The `GENDER` column should only take the values 1 (male) or 2 (female); the `C_AGE` column should contain the range 0-90 inclusive (90 meaning 90 or over).
+
+> ## 1.1 release
 > - adds UK household projections
 > - initial support for custom SNPP variants
 > - better consistency across the MYE/NPP/SNPP APIs (breaks backwards compatibility)

diff --git a/tests/test_all.py b/tests/test_all.py
@@ -252,7 +252,14 @@ def test_snpp_custom_projection(self):
     self.assertEqual(len(agg), 2)
     self.assertAlmostEqual(agg.OBS_VALUE.sum(), 30760.0, 5) # remember this is population under 46
 
-
+    # test extrapolagg is equivalent to extrapolate + external agg
+    years = range(custom.max_year()-1, custom.max_year() + 2)
+    ext = utils.aggregate(custom.extrapolate(self.npp, "E06000001", years), ["GENDER", "C_AGE"])
+    extagg = custom.extrapolagg(["GENDER", "C_AGE"], self.npp, "E06000001", years)
+    self.assertTrue(ext.equals(extagg))
+    self.assertEqual(len(ext.GEOGRAPHY_CODE.unique()), 1)
+    self.assertEqual(ext.GEOGRAPHY_CODE.unique()[0], "E06000001")
+    self.assertAlmostEqual(ext.OBS_VALUE.sum(), 279841.6197443956, 5)
 
   # test datasets have consistent ranges
   def test_consistency(self):

diff --git a/ukpopulation/customsnppdata.py b/ukpopulation/customsnppdata.py
@@ -67,13 +67,13 @@ def filter(self, geog_codes, years=None, ages=range(0,91), genders=[1,2]):
 
     if years is None:
       years=range(self.min_year(), self.max_year()+1)
-    if isinstance(years, int):
+    if np.isscalar(years):
       years = [years]
 
-    if isinstance(ages, int):
+    if np.isscalar(ages):
       ages = [ages]
 
-    if isinstance(genders, int):
+    if np.isscalar(genders):
       genders = [genders]
 
     # check for any codes requested that werent present
@@ -111,16 +111,16 @@ def extrapolate(self, npp, geog_codes, year_range):
     for country in geog_codes:
       if not geog_codes[country]: continue
 
-      max_year = self.max_year()
-      last_year = self.filter(geog_codes[country], max_year)
+      maxyear = self.max_year()
+      last_year = self.filter(geog_codes[country], maxyear)
 
-      (in_range, ex_range) = utils.split_range(year_range, max_year)
+      (in_range, ex_range) = utils.split_range(year_range, maxyear)
       # years that dont need to be extrapolated 
       all_years = self.filter(geog_codes[country], in_range) if in_range else pd.DataFrame()
 
       for year in ex_range:
         data = last_year.copy()
-        scaling = npp.year_ratio("ppp", country, max_year, year)
+        scaling = npp.year_ratio("ppp", country, maxyear, year)
         data = data.merge(scaling[["GENDER", "C_AGE", "OBS_VALUE"]], on=["GENDER", "C_AGE"])
         data["OBS_VALUE"] = data.OBS_VALUE_x * data.OBS_VALUE_y
         data.PROJECTED_YEAR_NAME = year

diff --git a/ukpopulation/myedata.py b/ukpopulation/myedata.py
@@ -2,6 +2,7 @@
 MYEData - wrapper around Mid-Year Estimate data by LAD, SYoA and gender
 """
 
+import numpy as np
 import pandas as pd
 import ukcensusapi.Nomisweb as Api
 import ukpopulation.utils as utils
@@ -47,9 +48,9 @@ def filter(self, geog_codes, years=None, ages=range(0,91), genders=[1,2]):
     # ensure array inputs
     if isinstance(geog_codes, str):
       geog_codes = [geog_codes]
-    if isinstance(ages, int):
+    if np.isscalar(ages):
       ages = [ages]
-    if isinstance(genders, int):
+    if np.isscalar(genders):
       genders = [genders]
 
     result = pd.DataFrame()

diff --git a/ukpopulation/nppdata.py b/ukpopulation/nppdata.py
@@ -92,10 +92,10 @@ def detail(self, variant_name, geog, years=None, ages=range(0,91), genders=[1,2]
     """
     Return a subset of the raw data
     """
-    if isinstance(ages, int):
+    if np.isscalar(ages):
       ages = [ages]
 
-    if isinstance(genders, int):
+    if np.isscalar(genders):
       genders = [genders]
 
     if not variant_name in NPPData.VARIANTS:

diff --git a/ukpopulation/snppdata.py b/ukpopulation/snppdata.py
@@ -68,10 +68,10 @@ def filter(self, geog_codes, years=None, ages=range(0,91), genders=[1,2]):
     if isinstance(geog_codes, str):
       geog_codes = [geog_codes]
 
-    if isinstance(ages, int):
+    if np.isscalar(ages):
       ages = [ages]
 
-    if isinstance(genders, int):
+    if np.isscalar(genders):
       genders = [genders]
 
     countries = utils.country(geog_codes)