diff --git a/kf_lib_data_ingest/common/concept_schema.py b/kf_lib_data_ingest/common/concept_schema.py index 4046de59..3370489e 100644 --- a/kf_lib_data_ingest/common/concept_schema.py +++ b/kf_lib_data_ingest/common/concept_schema.py @@ -146,12 +146,16 @@ class EVENT_AGE(QuantityMixin): pass class BIOSPECIMEN_GROUP(PropertyMixin): + pass + + class SAMPLE(PropertyMixin): TISSUE_TYPE = None NCIT_TISSUE_TYPE_ID = None ANATOMY_SITE = None NCIT_ANATOMY_SITE_ID = None UBERON_ANATOMY_SITE_ID = None COMPOSITION = None + TUMOR_DESCRIPTOR = None EVENT_ID = None EVENT_AGE_DAYS = None @@ -161,36 +165,22 @@ class EVENT_AGE(QuantityMixin): class VOLUME(QuantityMixin): pass + SPATIAL_DESCRIPTOR = None + SHIPMENT_ORIGIN = None + SHIPMENT_DATE = None VOLUME_UL = None SAMPLE_PROCUREMENT = None PRESERVATION_METHOD = None - class BIOSPECIMEN(PropertyMixin): - TISSUE_TYPE = None - NCIT_TISSUE_TYPE_ID = None - ANATOMY_SITE = None - NCIT_ANATOMY_SITE_ID = None - UBERON_ANATOMY_SITE_ID = None - TUMOR_DESCRIPTOR = None - COMPOSITION = None - EVENT_AGE_DAYS = None - - class EVENT_AGE(QuantityMixin): - pass - + class BIOSPECIMEN(SAMPLE): class QUANTITY(QuantityMixin): pass class CONCENTRATION(QuantityMixin): pass - SPATIAL_DESCRIPTOR = None - SHIPMENT_ORIGIN = None - SHIPMENT_DATE = None ANALYTE = None CONCENTRATION_MG_PER_ML = None - VOLUME_UL = None - SAMPLE_PROCUREMENT = None DBGAP_STYLE_CONSENT_CODE = None CONSENT_SHORT_NAME = None @@ -264,14 +254,18 @@ def compile_schema(): property_path = [] property_paths = set() - _set_cls_attrs(CONCEPT, None, property_path, property_paths, include_root=False) + _set_cls_attrs( + CONCEPT, None, property_path, property_paths, include_root=False + ) return property_paths str_to_CONCEPT = {} -def _set_cls_attrs(node, prev_node, property_path, property_paths, include_root=False): +def _set_cls_attrs( + node, prev_node, property_path, property_paths, include_root=False +): """ Recursive method to traverse a class hierarchy and set class attributes equal to a string which represents a path in the hierarchy to reach the @@ -355,6 +349,7 @@ class C: CONCEPT.FAMILY_RELATIONSHIP.PERSON2, CONCEPT.BIOSPECIMEN_GROUP, CONCEPT.BIOSPECIMEN, + CONCEPT.SAMPLE, CONCEPT.DIAGNOSIS, CONCEPT.PHENOTYPE, CONCEPT.DIAGNOSIS, diff --git a/kf_lib_data_ingest/target_api_plugins/kids_first_dataservice.py b/kf_lib_data_ingest/target_api_plugins/kids_first_dataservice.py index 63e1aefd..f33f7100 100644 --- a/kf_lib_data_ingest/target_api_plugins/kids_first_dataservice.py +++ b/kf_lib_data_ingest/target_api_plugins/kids_first_dataservice.py @@ -398,7 +398,9 @@ def submit(cls, host, body): class Sample: class_name = "sample" api_path = "samples" - target_id_concept = CONCEPT.BIOSPECIMEN_GROUP.TARGET_SERVICE_ID + target_id_concept = ( + CONCEPT.SAMPLE.TARGET_SERVICE_ID or CONCEPT.BIOSPECIMEN_GROUP.TARGET_SERVICE_ID + ) service_id_fields = {"kf_id", "participant_id"} @classmethod @@ -416,30 +418,44 @@ def query_target_ids(cls, host, key_components): def build_entity(cls, record, get_target_id_from_record): secondary_components = { "kf_id": get_target_id_from_record(cls, record), - "age_at_event_days": flexible_age( - record, - CONCEPT.BIOSPECIMEN_GROUP.EVENT_AGE_DAYS, - CONCEPT.BIOSPECIMEN_GROUP.EVENT_AGE, + "age_at_event_days": ( + flexible_age( + record, + CONCEPT.SAMPLE.EVENT_AGE_DAYS, + CONCEPT.SAMPLE.EVENT_AGE, + ) + or flexible_age( + record, + CONCEPT.BIOSPECIMEN.EVENT_AGE_DAYS, + CONCEPT.BIOSPECIMEN.EVENT_AGE, + ) ), - "anatomical_location": record.get(CONCEPT.BIOSPECIMEN_GROUP.ANATOMY_SITE), - "method_of_sample_procurement": record.get( - CONCEPT.BIOSPECIMEN_GROUP.SAMPLE_PROCUREMENT + "anatomical_location": ( + record.get(CONCEPT.SAMPLE.ANATOMY_SITE) + or record.get(CONCEPT.BIOSPECIMEN.ANATOMY_SITE) + ), + "method_of_sample_procurement": ( + record.get(CONCEPT.SAMPLE.SAMPLE_PROCUREMENT) + or record.get(CONCEPT.BIOSPECIMEN.SAMPLE_PROCUREMENT) ), "participant_id": not_none(get_target_id_from_record(Participant, record)), - "preservation_method": record.get( - CONCEPT.BIOSPECIMEN_GROUP.PRESERVATION_METHOD + "preservation_method": record.get(CONCEPT.SAMPLE.PRESERVATION_METHOD), + "sample_event_key": record.get(CONCEPT.SAMPLE.EVENT_ID), + "sample_type": ( + record.get(CONCEPT.SAMPLE.COMPOSITION) + or record.get(CONCEPT.BIOSPECIMEN.COMPOSITION) ), - "sample_event_key": record.get(CONCEPT.BIOSPECIMEN_GROUP.EVENT_ID), - "sample_type": record.get(CONCEPT.BIOSPECIMEN_GROUP.COMPOSITION), - "tissue_type": record.get(CONCEPT.BIOSPECIMEN_GROUP.TISSUE_TYPE), - "visible": record.get(CONCEPT.BIOSPECIMEN_GROUP.VISIBLE), - "visibility_comment": record.get( - CONCEPT.BIOSPECIMEN_GROUP.VISIBILITY_COMMENT + "tissue_type": ( + record.get(CONCEPT.SAMPLE.TISSUE_TYPE) + or record.get(CONCEPT.BIOSPECIMEN.TISSUE_TYPE) ), - "visibility_reason": record.get( - CONCEPT.BIOSPECIMEN_GROUP.VISIBILTIY_REASON + "visible": record.get(CONCEPT.SAMPLE.VISIBLE), + "visibility_comment": record.get(CONCEPT.SAMPLE.VISIBILITY_COMMENT), + "visibility_reason": record.get(CONCEPT.SAMPLE.VISIBILTIY_REASON), + "volume_ul": ( + record.get(CONCEPT.SAMPLE.VOLUME_UL) + or record.get(CONCEPT.BIOSPECIMEN.VOLUME_UL) ), - "volume_ul": record.get(CONCEPT.BIOSPECIMEN.VOLUME_UL), } return { **cls.get_key_components(record, get_target_id_from_record), @@ -473,65 +489,76 @@ def build_entity(cls, record, get_target_id_from_record): ), "participant_id": not_none(get_target_id_from_record(Participant, record)), "external_sample_id": ( - record.get(CONCEPT.BIOSPECIMEN_GROUP.ID) + record.get(CONCEPT.SAMPLE.ID) + or record.get(CONCEPT.BIOSPECIMEN_GROUP.ID) or not_none(record[CONCEPT.BIOSPECIMEN.ID]) ), "source_text_tissue_type": ( - record.get(CONCEPT.BIOSPECIMEN.TISSUE_TYPE) - or record.get(CONCEPT.BIOSPECIMEN_GROUP.TISSUE_TYPE) + record.get(CONCEPT.SAMPLE.TISSUE_TYPE) + or record.get(CONCEPT.BIOSPECIMEN.TISSUE_TYPE) ), "composition": ( - record.get(CONCEPT.BIOSPECIMEN.COMPOSITION) - or record.get(CONCEPT.BIOSPECIMEN_GROUP.COMPOSITION) + record.get(CONCEPT.SAMPLE.COMPOSITION) + or record.get(CONCEPT.BIOSPECIMEN.COMPOSITION) ), "source_text_anatomical_site": ( - record.get(CONCEPT.BIOSPECIMEN.ANATOMY_SITE) - or record.get(CONCEPT.BIOSPECIMEN_GROUP.ANATOMY_SITE) + record.get(CONCEPT.SAMPLE.ANATOMY_SITE) + or record.get(CONCEPT.BIOSPECIMEN.ANATOMY_SITE) ), "age_at_event_days": ( flexible_age( record, - CONCEPT.BIOSPECIMEN.EVENT_AGE_DAYS, - CONCEPT.BIOSPECIMEN.EVENT_AGE, + CONCEPT.SAMPLE.EVENT_AGE_DAYS, + CONCEPT.SAMPLE.EVENT_AGE, ) or flexible_age( record, - CONCEPT.BIOSPECIMEN_GROUP.EVENT_AGE_DAYS, - CONCEPT.BIOSPECIMEN_GROUP.EVENT_AGE, + CONCEPT.BIOSPECIMEN.EVENT_AGE_DAYS, + CONCEPT.BIOSPECIMEN.EVENT_AGE, ) ), - "source_text_tumor_descriptor": record.get( - CONCEPT.BIOSPECIMEN.TUMOR_DESCRIPTOR + "source_text_tumor_descriptor": ( + record.get(CONCEPT.SAMPLE.TUMOR_DESCRIPTOR) + or record.get(CONCEPT.BIOSPECIMEN.TUMOR_DESCRIPTOR) ), "ncit_id_tissue_type": ( - record.get(CONCEPT.BIOSPECIMEN.NCIT_TISSUE_TYPE_ID) - or record.get(CONCEPT.BIOSPECIMEN_GROUP.NCIT_TISSUE_TYPE_ID) + record.get(CONCEPT.SAMPLE.NCIT_TISSUE_TYPE_ID) + or record.get(CONCEPT.BIOSPECIMEN.NCIT_TISSUE_TYPE_ID) ), "ncit_id_anatomical_site": ( - record.get(CONCEPT.BIOSPECIMEN.NCIT_ANATOMY_SITE_ID) - or record.get(CONCEPT.BIOSPECIMEN_GROUP.NCIT_ANATOMY_SITE_ID) + record.get(CONCEPT.SAMPLE.NCIT_ANATOMY_SITE_ID) + or record.get(CONCEPT.BIOSPECIMEN.NCIT_ANATOMY_SITE_ID) ), "uberon_id_anatomical_site": ( - record.get(CONCEPT.BIOSPECIMEN.UBERON_ANATOMY_SITE_ID) - or record.get(CONCEPT.BIOSPECIMEN_GROUP.UBERON_ANATOMY_SITE_ID) + record.get(CONCEPT.SAMPLE.UBERON_ANATOMY_SITE_ID) + or record.get(CONCEPT.BIOSPECIMEN.UBERON_ANATOMY_SITE_ID) + ), + "spatial_descriptor": ( + record.get(CONCEPT.SAMPLE.SPATIAL_DESCRIPTOR) + or record.get(CONCEPT.BIOSPECIMEN.SPATIAL_DESCRIPTOR) + ), + "shipment_origin": ( + record.get(CONCEPT.SAMPLE.SHIPMENT_ORIGIN) + or record.get(CONCEPT.BIOSPECIMEN.SHIPMENT_ORIGIN) + ), + "shipment_date": ( + record.get(CONCEPT.SAMPLE.SHIPMENT_DATE) + or record.get(CONCEPT.BIOSPECIMEN.SHIPMENT_DATE) ), - "spatial_descriptor": record.get(CONCEPT.BIOSPECIMEN.SPATIAL_DESCRIPTOR), - "shipment_origin": record.get(CONCEPT.BIOSPECIMEN.SHIPMENT_ORIGIN), - "shipment_date": record.get(CONCEPT.BIOSPECIMEN.SHIPMENT_DATE), "analyte_type": record.get(CONCEPT.BIOSPECIMEN.ANALYTE), "concentration_mg_per_ml": record.get( CONCEPT.BIOSPECIMEN.CONCENTRATION_MG_PER_ML ), "volume_ul": ( - record.get(CONCEPT.BIOSPECIMEN.VOLUME_UL) - or record.get(CONCEPT.BIOSPECIMEN_GROUP.VOLUME_UL) + record.get(CONCEPT.SAMPLE.VOLUME_UL) + or record.get(CONCEPT.BIOSPECIMEN.VOLUME_UL) ), "visible": record.get(CONCEPT.BIOSPECIMEN.VISIBLE), "visibility_comment": record.get(CONCEPT.BIOSPECIMEN.VISIBILITY_COMMENT), "visibility_reason": record.get(CONCEPT.BIOSPECIMEN.VISIBILTIY_REASON), "method_of_sample_procurement": ( - record.get(CONCEPT.BIOSPECIMEN.SAMPLE_PROCUREMENT) - or record.get(CONCEPT.BIOSPECIMEN_GROUP.SAMPLE_PROCUREMENT) + record.get(CONCEPT.SAMPLE.SAMPLE_PROCUREMENT) + or record.get(CONCEPT.BIOSPECIMEN.SAMPLE_PROCUREMENT) ), "dbgap_consent_code": record.get( CONCEPT.BIOSPECIMEN.DBGAP_STYLE_CONSENT_CODE