-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
13e398f
commit 36703c7
Showing
5 changed files
with
125 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
sample_id,aliquot_id,participant,tissue_type,composition,age_at_collection,analyte,parent_sample | ||
s001,a001,p01,tumor,tissue,32,DNA,na | ||
s001,a002,p01,tumor,tissue,32,DNA,na | ||
s001,a003,p01,tumor,tissue,32,DNA,na | ||
s001,a004,p01,tumor,tissue,32,RNA,na | ||
s002,a005,p01,normal,blood,42,DNA,na | ||
s002,a006,p01,normal,blood,42,DNA,na | ||
s003,a007,p01,tumor,tissue,32,RNA,s001 | ||
s003,a008,p01,tumor,tissue,32,DNA,s001 | ||
s004,na,p01,tumor,tissue,32,na,s003 | ||
s005,a009,p02,tumor,bone,56,DNA,na | ||
s006,a010,p02,normal,blood,58,DNA,na | ||
s007,na,p03,tumor,tissue,61,NA,na |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
""" | ||
Auto-generated extract config module. | ||
See documentation at | ||
https://kids-first.github.io/kf-lib-data-ingest/tutorial/extract.html for | ||
information on writing extract config files. | ||
""" | ||
|
||
from kf_lib_data_ingest.common import constants | ||
from kf_lib_data_ingest.etl.extract.operations import keep_map, value_map | ||
from kf_lib_data_ingest.common.concept_schema import CONCEPT | ||
|
||
# TODO - Replace this with a URL to your own data file | ||
source_data_url = "file://../data/sample_manifest.csv" | ||
|
||
# TODO - Replace this with operations that make sense for your own data file | ||
operations = [ | ||
keep_map(in_col="participant", out_col=CONCEPT.PARTICIPANT.ID), | ||
keep_map(in_col="sample_id", out_col=CONCEPT.SAMPLE.ID), | ||
value_map( | ||
in_col="aliquot_id", | ||
m=lambda x: None if x == "na" else x, | ||
out_col=CONCEPT.BIOSPECIMEN.ID, | ||
), | ||
value_map( | ||
in_col="tissue_type", | ||
m={ | ||
"tumor": constants.SPECIMEN.TISSUE_TYPE.TUMOR, | ||
"normal": constants.SPECIMEN.TISSUE_TYPE.NORMAL, | ||
}, | ||
out_col=CONCEPT.SAMPLE.TISSUE_TYPE, | ||
), | ||
value_map( | ||
in_col="composition", | ||
m={ | ||
"tissue": constants.SPECIMEN.COMPOSITION.TISSUE, | ||
"blood": constants.SPECIMEN.COMPOSITION.BLOOD, | ||
"bone": constants.SPECIMEN.COMPOSITION.BONE, | ||
}, | ||
out_col=CONCEPT.SAMPLE.COMPOSITION, | ||
), | ||
keep_map(in_col="age_at_collection", out_col=CONCEPT.SAMPLE.EVENT_AGE_DAYS), | ||
value_map( | ||
in_col="analyte", | ||
m={ | ||
"DNA": constants.SEQUENCING.ANALYTE.DNA, | ||
"RNA": constants.SEQUENCING.ANALYTE.RNA, | ||
"NA": None, | ||
"na": None, | ||
}, | ||
out_col=CONCEPT.BIOSPECIMEN.ANALYTE, | ||
), | ||
] |
25 changes: 25 additions & 0 deletions
25
tests/data/sample_registration/extract_configs/sample_relationship.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
""" | ||
Auto-generated extract config module. | ||
See documentation at | ||
https://kids-first.github.io/kf-lib-data-ingest/tutorial/extract.html for | ||
information on writing extract config files. | ||
""" | ||
|
||
from kf_lib_data_ingest.common import constants | ||
from kf_lib_data_ingest.etl.extract.operations import keep_map, value_map | ||
from kf_lib_data_ingest.common.concept_schema import CONCEPT | ||
|
||
# TODO - Replace this with a URL to your own data file | ||
source_data_url = "file://../data/sample_manifest.csv" | ||
|
||
# TODO - Replace this with operations that make sense for your own data file | ||
operations = [ | ||
keep_map(in_col="participant", out_col=CONCEPT.PARTICIPANT.ID), | ||
keep_map(in_col="sample_id", out_col=CONCEPT.SAMPLE.ID), | ||
value_map( | ||
in_col="parent_sample", | ||
m=lambda x: None if x == "na" else x, | ||
out_col=CONCEPT.BIOSPECIMEN.ID, | ||
), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
""" Ingest Package Config """ | ||
|
||
# The list of entities that will be loaded into the target service | ||
target_service_entities = ["sample" "biospecimen", "sample_relationship"] | ||
|
||
# All paths are relative to the directory this file is in | ||
extract_config_dir = "extract_configs" | ||
|
||
transform_function_path = "transform_module.py" | ||
|
||
study = "SD_ME0WME0W" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
""" | ||
Auto-generated transform module | ||
Replace the contents of transform_function with your own code | ||
See documentation at | ||
https://kids-first.github.io/kf-lib-data-ingest/ for information on | ||
implementing transform_function. | ||
""" | ||
|
||
import os | ||
|
||
# Use these merge funcs, not pandas.merge | ||
from kf_lib_data_ingest.common.pandas_utils import outer_merge | ||
from kf_lib_data_ingest.common.concept_schema import CONCEPT | ||
from kf_lib_data_ingest.config import DEFAULT_KEY | ||
|
||
|
||
def transform_function(mapped_df_dict): | ||
sr = mapped_df_dict["sample_relationship.py"] | ||
sr = sr.dropna() | ||
|
||
return {DEFAULT_KEY: mapped_df_dict["sample.py"], "sample_relationship": sr} |