From 912e0b970f19c97f40db68d3aba21cc7c22f0b92 Mon Sep 17 00:00:00 2001 From: Tilly Woodfield <22456167+tillywoodfield@users.noreply.github.com> Date: Wed, 17 Jul 2024 15:24:44 +0300 Subject: [PATCH] Add dataset and prefix to activity_objects --- iatidata/__init__.py | 56 ++++++++------------------------------------ 1 file changed, 10 insertions(+), 46 deletions(-) diff --git a/iatidata/__init__.py b/iatidata/__init__.py index 505b120..8a9cd27 100644 --- a/iatidata/__init__.py +++ b/iatidata/__init__.py @@ -479,6 +479,8 @@ def create_rows( object["_link"] = f'{id}{"." if object_key else ""}{object_key}' object["_link_activity"] = str(id) + object["dataset"] = dataset + object["prefix"] = prefix if object_type != "activity": object["iatiidentifier"] = activity.get("iati-identifier") reporting_org = activity.get("reporting-org", {}) or {} @@ -498,8 +500,6 @@ def create_rows( row = dict( id=id, - dataset=dataset, - prefix=prefix, object_key=object_key, parent_keys=json.dumps(parent_keys), object_type=object_type, @@ -525,8 +525,6 @@ def activity_objects() -> None: DROP TABLE IF EXISTS _activity_objects; CREATE TABLE _activity_objects( id bigint, - dataset TEXT, - prefix TEXT, object_key TEXT, parent_keys JSONB, object_type TEXT, @@ -644,6 +642,10 @@ def schema_analysis(): ) else: docs = f"Foreign key to {key[6:]} tables `_link` field" + elif key == "dataset": + order, docs = 0, "The name of the dataset this row came from." + elif key == "prefix": + order, docs = 0, "" elif key == "iatiidentifier": order, docs = 1, "A globally unique identifier for the activity." elif key == "reportingorg_ref" and object_type != "activity": @@ -682,42 +684,6 @@ def schema_analysis(): ) ) - results = connection.execute( - text( - """ - SELECT - object_type, - COUNT(prefix) AS count_prefix, - COUNT(dataset) AS count_dataset - FROM _activity_objects - GROUP BY object_type - """ - ) - ) - for row in results: - connection.execute( - insert(fields_table).values( - [ - { - "table_name": row.object_type, - "field": "prefix", - "type": "string", - "count": row.count_prefix, - "docs": "", - "field_order": -1, - }, - { - "table_name": row.object_type, - "field": "dataset", - "type": "string", - "count": row.count_dataset, - "docs": "The dataset which this row was pulled from.", - "field_order": -1, - }, - ], - ), - ) - connection.execute( text( """ @@ -785,8 +751,6 @@ def postgres_tables(drop_release_objects=False): """ SELECT table_name, field, type FROM _fields - WHERE field != 'prefix' - AND field != 'dataset' ORDER BY table_name, field_order, field """ ) @@ -798,10 +762,10 @@ def postgres_tables(drop_release_objects=False): for object_type, object_detail in object_details.items(): field_sql, as_sql = create_field_sql(object_detail) table_sql = f""" - SELECT dataset, prefix, {field_sql} - FROM _activity_objects, jsonb_to_record(object) AS x({as_sql}) - WHERE object_type = :object_type - """ + SELECT {field_sql} + FROM _activity_objects, jsonb_to_record(object) AS x({as_sql}) + WHERE object_type = :object_type + """ create_table(object_type, table_sql, object_type=object_type) logger.info("Creating table: metadata")