Skip to content

Commit

Permalink
more tests for custom snpp; better checks for single values; update doc
Browse files Browse the repository at this point in the history
  • Loading branch information
virgesmith committed Jun 6, 2019
1 parent e30dea9 commit 46bb42c
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 15 deletions.
24 changes: 23 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,29 @@

# ukpopulation: UK Demographic Projections

> ## Latest news: 1.1 release
> ## Latest news: 1.2 release
> - adds support for custom subnational population projections
> ### Custom SNPP Data
> An externally generated SNPP dataset (from e.g. [simim](https://github.com/nismod/simim)) can be registered with the `ukpopulation` package and used as if it was the standard ONS/StatsWales/NRScotland/NISRA projection:
> ```python3
> >>> import ukpopulation.customsnppdata as CustomSNPPData
> >>> customdata = pd.read_csv("custom_snpp.csv")
> >>> customdata.head()
> GEOGRAPHY_CODE GENDER C_AGE OBS_VALUE PROJECTED_YEAR_NAME
> 0 E06000005 1 0 603.0 2018
> 1 E06000005 1 1 600.0 2018
> 2 E06000005 1 2 624.0 2018
> 3 E06000005 1 3 636.0 2018
> 4 E06000005 1 4 661.0 2018
> >>> CustomSNPPData.register_custom_projection("custom_snpp", customdata, "cache_directory")
> Writing custom SNPP custom_snpp to cache/ukpopulation_custom_snpp_custom_snpp.csv
> >>> CustomSNPPData.list_custom_projections("cache_directory")
> ['custom_snpp']
> >>>
> ```
> The external dataset must follow the format/column name conventions as above, but can also contain extra data if required for other use. The `GENDER` column should only take the values 1 (male) or 2 (female); the `C_AGE` column should contain the range 0-90 inclusive (90 meaning 90 or over).
> ## 1.1 release
> - adds UK household projections
> - initial support for custom SNPP variants
> - better consistency across the MYE/NPP/SNPP APIs (breaks backwards compatibility)
Expand Down
9 changes: 8 additions & 1 deletion tests/test_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,14 @@ def test_snpp_custom_projection(self):
self.assertEqual(len(agg), 2)
self.assertAlmostEqual(agg.OBS_VALUE.sum(), 30760.0, 5) # remember this is population under 46


# test extrapolagg is equivalent to extrapolate + external agg
years = range(custom.max_year()-1, custom.max_year() + 2)
ext = utils.aggregate(custom.extrapolate(self.npp, "E06000001", years), ["GENDER", "C_AGE"])
extagg = custom.extrapolagg(["GENDER", "C_AGE"], self.npp, "E06000001", years)
self.assertTrue(ext.equals(extagg))
self.assertEqual(len(ext.GEOGRAPHY_CODE.unique()), 1)
self.assertEqual(ext.GEOGRAPHY_CODE.unique()[0], "E06000001")
self.assertAlmostEqual(ext.OBS_VALUE.sum(), 279841.6197443956, 5)

# test datasets have consistent ranges
def test_consistency(self):
Expand Down
14 changes: 7 additions & 7 deletions ukpopulation/customsnppdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,13 @@ def filter(self, geog_codes, years=None, ages=range(0,91), genders=[1,2]):

if years is None:
years=range(self.min_year(), self.max_year()+1)
if isinstance(years, int):
if np.isscalar(years):
years = [years]

if isinstance(ages, int):
if np.isscalar(ages):
ages = [ages]

if isinstance(genders, int):
if np.isscalar(genders):
genders = [genders]

# check for any codes requested that werent present
Expand Down Expand Up @@ -111,16 +111,16 @@ def extrapolate(self, npp, geog_codes, year_range):
for country in geog_codes:
if not geog_codes[country]: continue

max_year = self.max_year()
last_year = self.filter(geog_codes[country], max_year)
maxyear = self.max_year()
last_year = self.filter(geog_codes[country], maxyear)

(in_range, ex_range) = utils.split_range(year_range, max_year)
(in_range, ex_range) = utils.split_range(year_range, maxyear)
# years that dont need to be extrapolated
all_years = self.filter(geog_codes[country], in_range) if in_range else pd.DataFrame()

for year in ex_range:
data = last_year.copy()
scaling = npp.year_ratio("ppp", country, max_year, year)
scaling = npp.year_ratio("ppp", country, maxyear, year)
data = data.merge(scaling[["GENDER", "C_AGE", "OBS_VALUE"]], on=["GENDER", "C_AGE"])
data["OBS_VALUE"] = data.OBS_VALUE_x * data.OBS_VALUE_y
data.PROJECTED_YEAR_NAME = year
Expand Down
5 changes: 3 additions & 2 deletions ukpopulation/myedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
MYEData - wrapper around Mid-Year Estimate data by LAD, SYoA and gender
"""

import numpy as np
import pandas as pd
import ukcensusapi.Nomisweb as Api
import ukpopulation.utils as utils
Expand Down Expand Up @@ -47,9 +48,9 @@ def filter(self, geog_codes, years=None, ages=range(0,91), genders=[1,2]):
# ensure array inputs
if isinstance(geog_codes, str):
geog_codes = [geog_codes]
if isinstance(ages, int):
if np.isscalar(ages):
ages = [ages]
if isinstance(genders, int):
if np.isscalar(genders):
genders = [genders]

result = pd.DataFrame()
Expand Down
4 changes: 2 additions & 2 deletions ukpopulation/nppdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,10 @@ def detail(self, variant_name, geog, years=None, ages=range(0,91), genders=[1,2]
"""
Return a subset of the raw data
"""
if isinstance(ages, int):
if np.isscalar(ages):
ages = [ages]

if isinstance(genders, int):
if np.isscalar(genders):
genders = [genders]

if not variant_name in NPPData.VARIANTS:
Expand Down
4 changes: 2 additions & 2 deletions ukpopulation/snppdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,10 @@ def filter(self, geog_codes, years=None, ages=range(0,91), genders=[1,2]):
if isinstance(geog_codes, str):
geog_codes = [geog_codes]

if isinstance(ages, int):
if np.isscalar(ages):
ages = [ages]

if isinstance(genders, int):
if np.isscalar(genders):
genders = [genders]

countries = utils.country(geog_codes)
Expand Down

0 comments on commit 46bb42c

Please sign in to comment.