Skip to content

Commit

Permalink
improve code coverage & add description to report
Browse files Browse the repository at this point in the history
  • Loading branch information
merlos committed Oct 30, 2024
1 parent 00d392a commit 42e0cb1
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 21 deletions.
22 changes: 20 additions & 2 deletions primero-api/integration_tests/test_primero_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
import os
import pytest
from pandas import DataFrame

from primero_api import PrimeroAPI
from primero_api import PrimeroAPI, Report

# Load from environment variables
PRIMERO_USER = os.getenv('PRIMERO_USER', 'primero')
Expand Down Expand Up @@ -33,8 +32,14 @@ def test_get_cases(primero_api):
cases = primero_api.get_cases()
assert cases is not None
assert type(cases) is DataFrame

def test_get_cases_anonymized_false(primero_api):
cases = primero_api.get_cases(anonymized=False)
assert cases is not None
assert type(cases) is DataFrame



def test_get_incidents_raw(primero_api):
incidents = primero_api.get_incidents_raw()
assert incidents is not None
Expand All @@ -49,7 +54,20 @@ def test_get_incidents(primero_api):
def test_get_reports(primero_api):
reports = primero_api.get_reports()
assert reports is not None
assert type(reports) is dict


def test_get_report(primero_api):
report = primero_api.get_report(1)
assert report is not None
# check is a Report object
assert type(report) is Report
assert report.id == 1
assert type(report.name) == str
assert type(report.description) == str
assert type(report.slug) == str
assert type(report.to_pandas()) == DataFrame
assert type(report.labels()) == dict

def test_get_version(primero_api):
version = primero_api.get_server_version()
Expand Down
24 changes: 12 additions & 12 deletions primero-api/primero_api/primero_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,20 +118,20 @@ def set_non_pii_cols(self, col_names=[]):
"""
self.non_pii_cols = col_names

def _anonymize_list(self, list_with_pii, additional_non_pii_cols:List = None):
def _anonymize_list(self, list_with_pii, additional_data:List = None):
"""
Anonymizes a list of dictionaries by removing personally identifiable information (PII).
Args:
list_with_pii (list): A list of dictionaries containing PII.
additional_non_pii_cols (List, optional): A list of additional non-PII columns to retain in the anonymized dictionaries. Defaults to None.
additional_data (List, optional): A list of additional non-PII columns to retain in the anonymized dictionaries. Defaults to None.
Returns:
list: A list of dictionaries with PII removed.
"""
anonymized_list = []
for dict_item in list_with_pii:
# remove pii cols
anonymized_item = self._extract_non_pii(dict_item,
additional_non_pii_cols=additional_non_pii_cols)
additional_data=additional_data)
anonymized_list.append(anonymized_item)
return anonymized_list

Expand Down Expand Up @@ -210,7 +210,7 @@ def _call_paginated_api(self, url: str):
page += 1
return data

def _extract_non_pii(self, data_dict, additional_non_pii_cols: List = None):
def _extract_non_pii(self, data_dict, additional_data: List = None):
"""
Removes personally identifiable information (PII) from a dictionary by keeping only self.non_pii_cols.
Expand All @@ -224,8 +224,8 @@ def _extract_non_pii(self, data_dict, additional_non_pii_cols: List = None):
dict: The dictionary with PII and additional columns removed.
"""
non_pii_cols = self.non_pii_cols.copy()
if additional_non_pii_cols:
non_pii_cols.extend(additional_non_pii_cols)
if additional_data:
non_pii_cols.extend(additional_data)

for key in list(data_dict.keys()):
if key not in non_pii_cols:
Expand All @@ -245,11 +245,11 @@ def get_cases_raw(self):
url = self.api_url + 'cases'
return self._call_paginated_api(url)

def get_cases(self, anonymized=True, additional_non_pii_cols:List=None):
def get_cases(self, anonymized=True, additional_data:List=None):
"""
Fetches case data from the Primero API.
anonymized: if True, removes personally identifiable information (PII) from the case data before returning it.
additional_non_pii_cols: Additional columns to whitelist from the case data. This is useful if you need any column that is not whitelisted by default.`
additional_data: Additional columns to whitelist from the case data. This is useful if you need any column that is not whitelisted by default.`
See the property `self.non_pii_cols` for the default list of non-PII columns.
Expand All @@ -260,7 +260,7 @@ def get_cases(self, anonymized=True, additional_non_pii_cols:List=None):
"""
cases = self.get_cases_raw()
if anonymized:
anonymized_cases = self._anonymize_list(cases, additional_non_pii_cols=additional_non_pii_cols)
anonymized_cases = self._anonymize_list(cases, additional_data=additional_data)
return pd.DataFrame(anonymized_cases)
# otherwise return the raw data
return pd.DataFrame(cases)
Expand All @@ -275,14 +275,14 @@ def get_incidents_raw(self):
url = self.api_url + 'incidents'
return self._call_paginated_api(url)

def get_incidents(self, anonymized = True, additional_non_pii_cols:List=None):
def get_incidents(self, anonymized = True, additional_data:List=None):
"""
Retrieve incidents data, by default anonymized.
Parameters:
-----------
anonymized : bool, optional
If True, the incidents data will be anonymized. Default is True.
additional_non_pii_cols : List, optional
additional_data : List, optional
A list of additional non-PII (Personally Identifiable Information) columns to include in the anonymized data. Default is None.
Returns:
--------
Expand All @@ -292,7 +292,7 @@ def get_incidents(self, anonymized = True, additional_non_pii_cols:List=None):

incidents = self.get_incidents_raw()
if anonymized:
anonymized_incidents = self._anonymize_list(incidents, additional_non_pii_cols=additional_non_pii_cols)
anonymized_incidents = self._anonymize_list(incidents, additional_data=additional_data)
return pd.DataFrame(anonymized_incidents)
return pd.DataFrame(incidents)

Expand Down
3 changes: 2 additions & 1 deletion primero-api/primero_api/report.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

from .report_processors import process_report, report_name, report_slug, get_report_labels
from .report_processors import process_report, report_name, report_slug, get_report_labels, report_description
from .logger import logger

class Report:
Expand All @@ -24,6 +24,7 @@ def __init__(self, report_data_dict, lang='en'):

self.slug = report_slug(report_data_dict, lang)
self.name = report_name(report_data_dict, lang)
self.description = report_description(report_data_dict, lang)

def __str__(self):
return f'Report {self.id} ({self.name})'
Expand Down
26 changes: 24 additions & 2 deletions primero-api/primero_api/report_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,26 @@
# Utility tools for processing reports
#

def report_description(report, lang='en'):
'''
Returns the name for the report in the given language.
If the report does not have a name, it returns ''
if the language is not in the name, it returns the english description
if the english description is not available, it returns ''
'''
# check if report has a name
if 'description' not in report:
return ''
# check if the language is in the name
if lang not in report['description']:
# check if english is in the name
if 'en' not in report['description']:
return ''
lang = 'en'
return report['description'][lang]



def report_name(report, lang='en'):
'''
Returns the name for the report in the given language.
Expand Down Expand Up @@ -58,7 +78,9 @@ def search_dict(d):

def get_report_labels(report, lang='en'):
'''
Returns the labels for the report in the format
Returns the labels for the report in the format (data contained in option_labels, in the original api response)
Some reports may not contain labels, in that case it returns an empty dictionary
label[id] = display_text
'''
all_labels=find_key_in_dict(report, 'option_labels')
Expand Down Expand Up @@ -104,7 +126,7 @@ def process_report(report, lang='en'):
# return empty dataframe if there is no report data
return pd.DataFrame()

labels = get_report_labels(report)
labels = get_report_labels(report, lang)
# Example of report_data
#
# report_data: {
Expand Down
39 changes: 35 additions & 4 deletions primero-api/tests/test_primero_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def test_extract_non_pii_custom_cols(primero_api):
'custom_non_pii': 'custom_value' # Additional non-PII
}

result = primero_api._extract_non_pii(record_with_custom_non_pii, additional_non_pii_cols=['custom_non_pii'])
result = primero_api._extract_non_pii(record_with_custom_non_pii, additional_data=['custom_non_pii'])
assert result == result_record_with_custom_non_pii

def test_extract_non_pii_custom_nonexistent_cols(primero_api):
Expand All @@ -90,8 +90,8 @@ def test_extract_non_pii_custom_nonexistent_cols(primero_api):
result_record_with_custom_nonexistent_pii = {
'enabled': 'non_pii_value', # Non-PII
}
# the additional_non_pii_cols do not exist and still does not break
result = primero_api._extract_non_pii(record_with_custom_non_pii, additional_non_pii_cols=['nonexistent_col'])
# the additional_data do not exist and still does not break
result = primero_api._extract_non_pii(record_with_custom_non_pii, additional_data=['nonexistent_col'])
assert result == result_record_with_custom_nonexistent_pii

def test_extract_non_pii_with_modified_non_pii_cols(primero_api):
Expand All @@ -111,4 +111,35 @@ def test_extract_non_pii_with_modified_non_pii_cols(primero_api):
'address_current': '123 Main St', # PII
}
result = primero_api._extract_non_pii(record_with_pii)
assert result == result_record_without_pii
assert result == result_record_without_pii


def test_get_cases_raw(primero_api):
# Mock the response for get_cases_raw
response_data = {
'data': [{'id': 1}, {'id': 2}],
'metadata': {'page': 1, 'total': 2, 'per': 2}
}

with requests_mock.Mocker() as m:
m.get('http://test.api/cases', json=response_data)
data = primero_api.get_cases_raw()

assert len(data) == 2
assert data == [{'id': 1}, {'id': 2}]

# Get incidents
def test_get_incidents_raw(primero_api):
# Mock the response for get_incidents_raw
response_data = {
'data': [{'id': 1}, {'id': 2}],
'metadata': {'page': 1, 'total': 2, 'per': 2}
}

with requests_mock.Mocker() as m:
m.get('http://test.api/incidents', json=response_data)
data = primero_api.get_incidents_raw()

assert len(data) == 2
assert data == [{'id': 1}, {'id': 2}]

0 comments on commit 42e0cb1

Please sign in to comment.