From fe002055a56858215ef826694a8847d165bc3aee Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Wed, 8 May 2024 13:55:48 -0400 Subject: [PATCH 1/2] :bug: Don't use default limit=100 in target ID query --- .../kids_first_dataservice.py | 59 ++++++++++++++----- 1 file changed, 43 insertions(+), 16 deletions(-) diff --git a/kf_lib_data_ingest/target_api_plugins/kids_first_dataservice.py b/kf_lib_data_ingest/target_api_plugins/kids_first_dataservice.py index edd093fe..c56934b2 100644 --- a/kf_lib_data_ingest/target_api_plugins/kids_first_dataservice.py +++ b/kf_lib_data_ingest/target_api_plugins/kids_first_dataservice.py @@ -77,7 +77,9 @@ def get_key_components(cls, record, get_target_id_from_record): @classmethod def query_target_ids(cls, host, key_components): - return list(yield_kfids(host, cls.api_path, drop_none(key_components))) + filters = drop_none(key_components) + filters["limit"] = 2 + return list(yield_kfids(host, cls.api_path, filters)) @classmethod def build_entity(cls, record, get_target_id_from_record): @@ -123,9 +125,9 @@ def query_target_ids(cls, host, key_components): if kfid: return [kfid] else: - return list( - yield_kfids(host, cls.api_path, drop_none(key_components)) - ) + filters = drop_none(key_components) + filters["limit"] = 2 + return list(yield_kfids(host, cls.api_path, filters)) @classmethod def build_entity(cls, record, get_target_id_from_record): @@ -168,7 +170,9 @@ def get_key_components(cls, record, get_target_id_from_record): @classmethod def query_target_ids(cls, host, key_components): - return list(yield_kfids(host, cls.api_path, drop_none(key_components))) + filters = drop_none(key_components) + filters["limit"] = 2 + return list(yield_kfids(host, cls.api_path, filters)) @classmethod def build_entity(cls, record, get_target_id_from_record): @@ -264,7 +268,9 @@ def get_key_components(cls, record, get_target_id_from_record): @classmethod def query_target_ids(cls, host, key_components): - return list(yield_kfids(host, cls.api_path, drop_none(key_components))) + filters = drop_none(key_components) + filters["limit"] = 2 + return list(yield_kfids(host, cls.api_path, filters)) @classmethod def build_entity(cls, record, get_target_id_from_record): @@ -325,7 +331,9 @@ def get_key_components(cls, record, get_target_id_from_record): @classmethod def query_target_ids(cls, host, key_components): - return list(yield_kfids(host, cls.api_path, drop_none(key_components))) + filters = drop_none(key_components) + filters["limit"] = 2 + return list(yield_kfids(host, cls.api_path, filters)) @classmethod def build_entity(cls, record, get_target_id_from_record): @@ -392,6 +400,7 @@ def query_target_ids(cls, host, key_components): # We no longer want multiple participant outcome entries. # Patch whatever is latest for compatibility with existing dataservice # entries. + key_components["limit"] = 2 pes = sorted( yield_entities(host, cls.api_path, key_components), key=lambda e: e.get("age_at_event_days", 0), @@ -444,7 +453,9 @@ def get_key_components(cls, record, get_target_id_from_record): @classmethod def query_target_ids(cls, host, key_components): - return list(yield_kfids(host, cls.api_path, drop_none(key_components))) + filters = drop_none(key_components) + filters["limit"] = 2 + return list(yield_kfids(host, cls.api_path, filters)) @classmethod def build_entity(cls, record, get_target_id_from_record): @@ -535,7 +546,9 @@ def get_key_components(cls, record, get_target_id_from_record): @classmethod def query_target_ids(cls, host, key_components): - return list(yield_kfids(host, cls.api_path, drop_none(key_components))) + filters = drop_none(key_components) + filters["limit"] = 2 + return list(yield_kfids(host, cls.api_path, filters)) @classmethod def build_entity(cls, record, get_target_id_from_record): @@ -615,7 +628,9 @@ def get_key_components(cls, record, get_target_id_from_record): @classmethod def query_target_ids(cls, host, key_components): - return list(yield_kfids(host, cls.api_path, drop_none(key_components))) + filters = drop_none(key_components) + filters["limit"] = 2 + return list(yield_kfids(host, cls.api_path, filters)) @classmethod def build_entity(cls, record, get_target_id_from_record): @@ -658,7 +673,9 @@ def get_key_components(cls, record, get_target_id_from_record): @classmethod def query_target_ids(cls, host, key_components): - return list(yield_kfids(host, cls.api_path, drop_none(key_components))) + filters = drop_none(key_components) + filters["limit"] = 2 + return list(yield_kfids(host, cls.api_path, filters)) @classmethod def build_entity(cls, record, get_target_id_from_record): @@ -760,7 +777,9 @@ def get_key_components(cls, record, get_target_id_from_record): @classmethod def query_target_ids(cls, host, key_components): - return list(yield_kfids(host, cls.api_path, drop_none(key_components))) + filters = drop_none(key_components) + filters["limit"] = 2 + return list(yield_kfids(host, cls.api_path, filters)) @classmethod def build_entity(cls, record, get_target_id_from_record): @@ -806,7 +825,9 @@ def get_key_components(cls, record, get_target_id_from_record): @classmethod def query_target_ids(cls, host, key_components): - return list(yield_kfids(host, cls.api_path, drop_none(key_components))) + filters = drop_none(key_components) + filters["limit"] = 2 + return list(yield_kfids(host, cls.api_path, filters)) @classmethod def build_entity(cls, record, get_target_id_from_record): @@ -852,7 +873,9 @@ def get_key_components(cls, record, get_target_id_from_record): @classmethod def query_target_ids(cls, host, key_components): - return list(yield_kfids(host, cls.api_path, drop_none(key_components))) + filters = drop_none(key_components) + filters["limit"] = 2 + return list(yield_kfids(host, cls.api_path, filters)) @classmethod def build_entity(cls, record, get_target_id_from_record): @@ -898,7 +921,9 @@ def get_key_components(cls, record, get_target_id_from_record): @classmethod def query_target_ids(cls, host, key_components): - return list(yield_kfids(host, cls.api_path, drop_none(key_components))) + filters = drop_none(key_components) + filters["limit"] = 2 + return list(yield_kfids(host, cls.api_path, filters)) @classmethod def build_entity(cls, record, get_target_id_from_record): @@ -944,7 +969,9 @@ def get_key_components(cls, record, get_target_id_from_record): @classmethod def query_target_ids(cls, host, key_components): - return list(yield_kfids(host, cls.api_path, drop_none(key_components))) + filters = drop_none(key_components) + filters["limit"] = 2 + return list(yield_kfids(host, cls.api_path, filters)) @classmethod def build_entity(cls, record, get_target_id_from_record): From 50f2b5b3f239a89b4b2a6d74c7e55a488b37b797 Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Wed, 8 May 2024 15:46:21 -0400 Subject: [PATCH 2/2] :zap: Rm study_id filter for gf lookup to improve query perf --- kf_lib_data_ingest/target_api_plugins/kids_first_dataservice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kf_lib_data_ingest/target_api_plugins/kids_first_dataservice.py b/kf_lib_data_ingest/target_api_plugins/kids_first_dataservice.py index c56934b2..40a1dcb3 100644 --- a/kf_lib_data_ingest/target_api_plugins/kids_first_dataservice.py +++ b/kf_lib_data_ingest/target_api_plugins/kids_first_dataservice.py @@ -540,7 +540,7 @@ class GenomicFile: def get_key_components(cls, record, get_target_id_from_record): # FIXME: Temporary until KFDRC file hashes are reliably stable return { - "study_id": get_target_id_from_record(Study, record), + # "study_id": get_target_id_from_record(Study, record), "external_id": not_none(record[CONCEPT.GENOMIC_FILE.ID]), }