From 46bb42ce8985946e5cd7643b3c3de104b7e4a24c Mon Sep 17 00:00:00 2001 From: virgesmith Date: Thu, 6 Jun 2019 10:47:32 +0100 Subject: [PATCH] more tests for custom snpp; better checks for single values; update doc #37 --- README.md | 24 +++++++++++++++++++++++- tests/test_all.py | 9 ++++++++- ukpopulation/customsnppdata.py | 14 +++++++------- ukpopulation/myedata.py | 5 +++-- ukpopulation/nppdata.py | 4 ++-- ukpopulation/snppdata.py | 4 ++-- 6 files changed, 45 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index c0dfa6a..7f4190e 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,29 @@ # ukpopulation: UK Demographic Projections -> ## Latest news: 1.1 release +> ## Latest news: 1.2 release +> - adds support for custom subnational population projections +> ### Custom SNPP Data +> An externally generated SNPP dataset (from e.g. [simim](https://github.com/nismod/simim)) can be registered with the `ukpopulation` package and used as if it was the standard ONS/StatsWales/NRScotland/NISRA projection: +> ```python3 +> >>> import ukpopulation.customsnppdata as CustomSNPPData +> >>> customdata = pd.read_csv("custom_snpp.csv") +> >>> customdata.head() +> GEOGRAPHY_CODE GENDER C_AGE OBS_VALUE PROJECTED_YEAR_NAME +> 0 E06000005 1 0 603.0 2018 +> 1 E06000005 1 1 600.0 2018 +> 2 E06000005 1 2 624.0 2018 +> 3 E06000005 1 3 636.0 2018 +> 4 E06000005 1 4 661.0 2018 +> >>> CustomSNPPData.register_custom_projection("custom_snpp", customdata, "cache_directory") +> Writing custom SNPP custom_snpp to cache/ukpopulation_custom_snpp_custom_snpp.csv +> >>> CustomSNPPData.list_custom_projections("cache_directory") +> ['custom_snpp'] +> >>> +> ``` +> The external dataset must follow the format/column name conventions as above, but can also contain extra data if required for other use. The `GENDER` column should only take the values 1 (male) or 2 (female); the `C_AGE` column should contain the range 0-90 inclusive (90 meaning 90 or over). + +> ## 1.1 release > - adds UK household projections > - initial support for custom SNPP variants > - better consistency across the MYE/NPP/SNPP APIs (breaks backwards compatibility) diff --git a/tests/test_all.py b/tests/test_all.py index 79c2c42..95402c6 100644 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -252,7 +252,14 @@ def test_snpp_custom_projection(self): self.assertEqual(len(agg), 2) self.assertAlmostEqual(agg.OBS_VALUE.sum(), 30760.0, 5) # remember this is population under 46 - + # test extrapolagg is equivalent to extrapolate + external agg + years = range(custom.max_year()-1, custom.max_year() + 2) + ext = utils.aggregate(custom.extrapolate(self.npp, "E06000001", years), ["GENDER", "C_AGE"]) + extagg = custom.extrapolagg(["GENDER", "C_AGE"], self.npp, "E06000001", years) + self.assertTrue(ext.equals(extagg)) + self.assertEqual(len(ext.GEOGRAPHY_CODE.unique()), 1) + self.assertEqual(ext.GEOGRAPHY_CODE.unique()[0], "E06000001") + self.assertAlmostEqual(ext.OBS_VALUE.sum(), 279841.6197443956, 5) # test datasets have consistent ranges def test_consistency(self): diff --git a/ukpopulation/customsnppdata.py b/ukpopulation/customsnppdata.py index 08119b2..d209a19 100644 --- a/ukpopulation/customsnppdata.py +++ b/ukpopulation/customsnppdata.py @@ -67,13 +67,13 @@ def filter(self, geog_codes, years=None, ages=range(0,91), genders=[1,2]): if years is None: years=range(self.min_year(), self.max_year()+1) - if isinstance(years, int): + if np.isscalar(years): years = [years] - if isinstance(ages, int): + if np.isscalar(ages): ages = [ages] - if isinstance(genders, int): + if np.isscalar(genders): genders = [genders] # check for any codes requested that werent present @@ -111,16 +111,16 @@ def extrapolate(self, npp, geog_codes, year_range): for country in geog_codes: if not geog_codes[country]: continue - max_year = self.max_year() - last_year = self.filter(geog_codes[country], max_year) + maxyear = self.max_year() + last_year = self.filter(geog_codes[country], maxyear) - (in_range, ex_range) = utils.split_range(year_range, max_year) + (in_range, ex_range) = utils.split_range(year_range, maxyear) # years that dont need to be extrapolated all_years = self.filter(geog_codes[country], in_range) if in_range else pd.DataFrame() for year in ex_range: data = last_year.copy() - scaling = npp.year_ratio("ppp", country, max_year, year) + scaling = npp.year_ratio("ppp", country, maxyear, year) data = data.merge(scaling[["GENDER", "C_AGE", "OBS_VALUE"]], on=["GENDER", "C_AGE"]) data["OBS_VALUE"] = data.OBS_VALUE_x * data.OBS_VALUE_y data.PROJECTED_YEAR_NAME = year diff --git a/ukpopulation/myedata.py b/ukpopulation/myedata.py index 0334b4f..40ad2b5 100644 --- a/ukpopulation/myedata.py +++ b/ukpopulation/myedata.py @@ -2,6 +2,7 @@ MYEData - wrapper around Mid-Year Estimate data by LAD, SYoA and gender """ +import numpy as np import pandas as pd import ukcensusapi.Nomisweb as Api import ukpopulation.utils as utils @@ -47,9 +48,9 @@ def filter(self, geog_codes, years=None, ages=range(0,91), genders=[1,2]): # ensure array inputs if isinstance(geog_codes, str): geog_codes = [geog_codes] - if isinstance(ages, int): + if np.isscalar(ages): ages = [ages] - if isinstance(genders, int): + if np.isscalar(genders): genders = [genders] result = pd.DataFrame() diff --git a/ukpopulation/nppdata.py b/ukpopulation/nppdata.py index 0a1f55d..8f8cdbd 100644 --- a/ukpopulation/nppdata.py +++ b/ukpopulation/nppdata.py @@ -92,10 +92,10 @@ def detail(self, variant_name, geog, years=None, ages=range(0,91), genders=[1,2] """ Return a subset of the raw data """ - if isinstance(ages, int): + if np.isscalar(ages): ages = [ages] - if isinstance(genders, int): + if np.isscalar(genders): genders = [genders] if not variant_name in NPPData.VARIANTS: diff --git a/ukpopulation/snppdata.py b/ukpopulation/snppdata.py index c42d4d5..76dcb18 100644 --- a/ukpopulation/snppdata.py +++ b/ukpopulation/snppdata.py @@ -68,10 +68,10 @@ def filter(self, geog_codes, years=None, ages=range(0,91), genders=[1,2]): if isinstance(geog_codes, str): geog_codes = [geog_codes] - if isinstance(ages, int): + if np.isscalar(ages): ages = [ages] - if isinstance(genders, int): + if np.isscalar(genders): genders = [genders] countries = utils.country(geog_codes)