Skip to content

Commit

Permalink
✅ Add sample test
Browse files Browse the repository at this point in the history
  • Loading branch information
chris-s-friedman committed May 31, 2024
1 parent 13e398f commit 36703c7
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 0 deletions.
13 changes: 13 additions & 0 deletions tests/data/sample_registration/data/sample_manifest.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
sample_id,aliquot_id,participant,tissue_type,composition,age_at_collection,analyte,parent_sample
s001,a001,p01,tumor,tissue,32,DNA,na
s001,a002,p01,tumor,tissue,32,DNA,na
s001,a003,p01,tumor,tissue,32,DNA,na
s001,a004,p01,tumor,tissue,32,RNA,na
s002,a005,p01,normal,blood,42,DNA,na
s002,a006,p01,normal,blood,42,DNA,na
s003,a007,p01,tumor,tissue,32,RNA,s001
s003,a008,p01,tumor,tissue,32,DNA,s001
s004,na,p01,tumor,tissue,32,na,s003
s005,a009,p02,tumor,bone,56,DNA,na
s006,a010,p02,normal,blood,58,DNA,na
s007,na,p03,tumor,tissue,61,NA,na
53 changes: 53 additions & 0 deletions tests/data/sample_registration/extract_configs/sample.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""
Auto-generated extract config module.
See documentation at
https://kids-first.github.io/kf-lib-data-ingest/tutorial/extract.html for
information on writing extract config files.
"""

from kf_lib_data_ingest.common import constants
from kf_lib_data_ingest.etl.extract.operations import keep_map, value_map
from kf_lib_data_ingest.common.concept_schema import CONCEPT

# TODO - Replace this with a URL to your own data file
source_data_url = "file://../data/sample_manifest.csv"

# TODO - Replace this with operations that make sense for your own data file
operations = [
keep_map(in_col="participant", out_col=CONCEPT.PARTICIPANT.ID),
keep_map(in_col="sample_id", out_col=CONCEPT.SAMPLE.ID),
value_map(
in_col="aliquot_id",
m=lambda x: None if x == "na" else x,
out_col=CONCEPT.BIOSPECIMEN.ID,
),
value_map(
in_col="tissue_type",
m={
"tumor": constants.SPECIMEN.TISSUE_TYPE.TUMOR,
"normal": constants.SPECIMEN.TISSUE_TYPE.NORMAL,
},
out_col=CONCEPT.SAMPLE.TISSUE_TYPE,
),
value_map(
in_col="composition",
m={
"tissue": constants.SPECIMEN.COMPOSITION.TISSUE,
"blood": constants.SPECIMEN.COMPOSITION.BLOOD,
"bone": constants.SPECIMEN.COMPOSITION.BONE,
},
out_col=CONCEPT.SAMPLE.COMPOSITION,
),
keep_map(in_col="age_at_collection", out_col=CONCEPT.SAMPLE.EVENT_AGE_DAYS),
value_map(
in_col="analyte",
m={
"DNA": constants.SEQUENCING.ANALYTE.DNA,
"RNA": constants.SEQUENCING.ANALYTE.RNA,
"NA": None,
"na": None,
},
out_col=CONCEPT.BIOSPECIMEN.ANALYTE,
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""
Auto-generated extract config module.
See documentation at
https://kids-first.github.io/kf-lib-data-ingest/tutorial/extract.html for
information on writing extract config files.
"""

from kf_lib_data_ingest.common import constants
from kf_lib_data_ingest.etl.extract.operations import keep_map, value_map
from kf_lib_data_ingest.common.concept_schema import CONCEPT

# TODO - Replace this with a URL to your own data file
source_data_url = "file://../data/sample_manifest.csv"

# TODO - Replace this with operations that make sense for your own data file
operations = [
keep_map(in_col="participant", out_col=CONCEPT.PARTICIPANT.ID),
keep_map(in_col="sample_id", out_col=CONCEPT.SAMPLE.ID),
value_map(
in_col="parent_sample",
m=lambda x: None if x == "na" else x,
out_col=CONCEPT.BIOSPECIMEN.ID,
),
]
11 changes: 11 additions & 0 deletions tests/data/sample_registration/ingest_package_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
""" Ingest Package Config """

# The list of entities that will be loaded into the target service
target_service_entities = ["sample" "biospecimen", "sample_relationship"]

# All paths are relative to the directory this file is in
extract_config_dir = "extract_configs"

transform_function_path = "transform_module.py"

study = "SD_ME0WME0W"
23 changes: 23 additions & 0 deletions tests/data/sample_registration/transform_module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""
Auto-generated transform module
Replace the contents of transform_function with your own code
See documentation at
https://kids-first.github.io/kf-lib-data-ingest/ for information on
implementing transform_function.
"""

import os

# Use these merge funcs, not pandas.merge
from kf_lib_data_ingest.common.pandas_utils import outer_merge
from kf_lib_data_ingest.common.concept_schema import CONCEPT
from kf_lib_data_ingest.config import DEFAULT_KEY


def transform_function(mapped_df_dict):
sr = mapped_df_dict["sample_relationship.py"]
sr = sr.dropna()

return {DEFAULT_KEY: mapped_df_dict["sample.py"], "sample_relationship": sr}

0 comments on commit 36703c7

Please sign in to comment.