Skip to content

Commit

Permalink
Merge pull request #556 from ptth222/protocol-types-refactor
Browse files Browse the repository at this point in the history
Protocol types refactor
  • Loading branch information
proccaserra authored Apr 19, 2024
2 parents abcb82c + 945241a commit a2446fa
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 66 deletions.
1 change: 1 addition & 0 deletions isatools/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
SYNONYMS = 'synonyms'
HEADER = 'header'

MATERIAL_LABELS = [
'Source Name',
Expand Down
63 changes: 40 additions & 23 deletions isatools/isatab/dump/write.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pandas import DataFrame
from numpy import nan

from isatools.constants import SYNONYMS
from isatools.constants import SYNONYMS, HEADER
from isatools.model import (
OntologyAnnotation,
Investigation,
Expand All @@ -21,8 +21,7 @@
get_pv_columns,
get_fv_columns,
get_characteristic_columns,
get_object_column_map,
get_column_header
get_object_column_map
)


Expand Down Expand Up @@ -241,7 +240,13 @@ def write_assay_table_files(inv_obj, output_dir, write_factor_values=False):

if not isinstance(inv_obj, Investigation):
raise NotImplementedError
protocol_types_dict = load_protocol_types_info()
yaml_dict = load_protocol_types_info()
protocol_types_dict = {}
for protocol, attributes in yaml_dict.items():
protocol_types_dict[protocol] = attributes
for synonym in attributes[SYNONYMS]:
protocol_types_dict[synonym] = attributes

for study_obj in inv_obj.studies:
for assay_obj in study_obj.assays:
a_graph = assay_obj.graph
Expand Down Expand Up @@ -296,17 +301,24 @@ def flatten(current_list):
columns += flatten(map(lambda x: get_pv_columns(olabel, x),
node.parameter_values))
if node.executes_protocol.protocol_type:
oname_label = get_column_header(
node.executes_protocol.protocol_type.term,
protocol_types_dict
)
if isinstance(node.executes_protocol.protocol_type, OntologyAnnotation):
protocol_type = node.executes_protocol.protocol_type.term.lower()
else:
protocol_type = node.executes_protocol.protocol_type.lower()

if protocol_type in protocol_types_dict and\
protocol_types_dict[protocol_type][HEADER]:
oname_label = protocol_types_dict[protocol_type][HEADER]
else:
oname_label = None

if oname_label is not None:
columns.append(oname_label)
elif node.executes_protocol.protocol_type.term.lower() \
in protocol_types_dict["nucleic acid hybridization"][SYNONYMS]:
columns.extend(
["Hybridization Assay Name",
"Array Design REF"])

if node.executes_protocol.protocol_type.term.lower() in \
protocol_types_dict["nucleic acid hybridization"][SYNONYMS]:
columns.append("Array Design REF")

columns += flatten(
map(lambda x: get_comment_column(olabel, x),
node.comments))
Expand Down Expand Up @@ -350,19 +362,24 @@ def pbar(x):
protocol_in_path_count += 1
df_dict[olabel][-1] = node.executes_protocol.name
if node.executes_protocol.protocol_type:
oname_label = get_column_header(
node.executes_protocol.protocol_type.term,
protocol_types_dict
)
if isinstance(node.executes_protocol.protocol_type, OntologyAnnotation):
protocol_type = node.executes_protocol.protocol_type.term.lower()
else:
protocol_type = node.executes_protocol.protocol_type.lower()

if protocol_type in protocol_types_dict and\
protocol_types_dict[protocol_type][HEADER]:
oname_label = protocol_types_dict[protocol_type][HEADER]
else:
oname_label = None

if oname_label is not None:
df_dict[oname_label][-1] = node.name

elif node.executes_protocol.protocol_type.term.lower() in \
protocol_types_dict["nucleic acid hybridization"][SYNONYMS]:
df_dict["Hybridization Assay Name"][-1] = \
node.name
df_dict["Array Design REF"][-1] = \
node.array_design_ref
if node.executes_protocol.protocol_type.term.lower() in \
protocol_types_dict["nucleic acid hybridization"][SYNONYMS]:
df_dict["Array Design REF"][-1] = node.array_design_ref

if node.date is not None:
df_dict[olabel + ".Date"][-1] = node.date
if node.performer is not None:
Expand Down
26 changes: 0 additions & 26 deletions isatools/isatab/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,32 +515,6 @@ def get_object_column_map(isatab_header, df_columns):
return object_column_map


def get_column_header(protocol_type_term, protocol_types_dict):
column_header = None
if protocol_type_term.lower() in \
protocol_types_dict["nucleic acid sequencing"][SYNONYMS] \
+ protocol_types_dict["phenotyping"][SYNONYMS] \
+ protocol_types_dict["data acquisition"][SYNONYMS]:
column_header = "Assay Name"
elif protocol_type_term.lower() in protocol_types_dict["data collection"][SYNONYMS]:
column_header = "Scan Name"
elif protocol_type_term.lower() in protocol_types_dict["mass spectrometry"][SYNONYMS]:
column_header = "MS Assay Name"
elif protocol_type_term.lower() in protocol_types_dict["nmr spectroscopy"][SYNONYMS]:
column_header = "NMR Assay Name"
elif protocol_type_term.lower() in \
protocol_types_dict["data transformation"][SYNONYMS] \
+ protocol_types_dict["sequence analysis data transformation"][SYNONYMS] \
+ protocol_types_dict["metabolite identification"][SYNONYMS] \
+ protocol_types_dict["protein identification"][SYNONYMS]:
column_header = "Data Transformation Name"
elif protocol_type_term.lower() in protocol_types_dict["normalization"][SYNONYMS]:
column_header = "Normalization Name"
if protocol_type_term.lower() == "unknown protocol":
column_header = "Unknown Protocol Name"
return column_header


def get_value_columns(label, x):
""" Generates the appropriate columns based on the value of the object.
For example, if the object's .value value is an OntologyAnnotation,
Expand Down
12 changes: 0 additions & 12 deletions isatools/model/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,18 +307,6 @@ def from_assay_dict(self, process, technology_type):
self.name = process.get('name', '')
self.executes_protocol = indexes.get_protocol(process['executesProtocol']['@id'])
self.load_comments(process.get('comments', []))
allowed_protocol_type_terms = [
"nucleic acid sequencing",
"nmr spectroscopy",
"mass spectrometry",
"nucleic acid hybridization",
"data transformation",
"data normalization"
]
if self.executes_protocol.protocol_type.term in allowed_protocol_type_terms or (
self.executes_protocol.protocol_type.term == 'data collection'
and technology_type.term == 'DNA microarray'):
self.name = process['name']

# Inputs / Outputs
for io_data_target in ['inputs', 'outputs']:
Expand Down
4 changes: 4 additions & 0 deletions isatools/model/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from collections.abc import Iterable
from pprint import pprint
from yaml import load, FullLoader
from isatools.constants import SYNONYMS
from isatools.model.comments import Commentable
from isatools.model.ontology_annotation import OntologyAnnotation
from isatools.model.protocol_parameter import ProtocolParameter
Expand Down Expand Up @@ -283,3 +284,6 @@ def load_protocol_types_info() -> dict:
filepath = os.path.join(os.path.dirname(__file__), '..', 'resources', 'config', 'yaml', 'protocol-types.yml')
with open(filepath) as yaml_file:
return load(yaml_file, Loader=FullLoader)



12 changes: 8 additions & 4 deletions isatools/resources/config/yaml/protocol-types.yml
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
sample collection:
header: Sample Name
header:
iri: http://purl.obolibrary.org/obo/OBI_0000659
synonyms:
- sample collection
- sampling
- aliquoting
extraction:
header: Extract Name
header:
iri: http://purl.obolibrary.org/obo/OBI_0302884
synonyms:
- extraction
- metabolite extraction
- intracellular metabolite extraction
- extracelluar metabolite extraction
labeling:
header: Labeled Extract Name
header:
iri: http://purl.obolibrary.org/obo/OBI_0600038
synonyms:
- labeling
Expand Down Expand Up @@ -83,4 +83,8 @@ metabolite identification:
protein identification:
header: Data Transformation Name
synonyms:
- protein identification
- protein identification
unknown protocol:
header: Unknown Protocol Name
synonyms:
- unknown protocol
2 changes: 1 addition & 1 deletion tests/model/test_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,4 +246,4 @@ class TestFunctions(TestCase):
def test_load_protocol_types_info(self):
yaml_config = load_protocol_types_info()
self.assertTrue(isinstance(yaml_config, dict))
self.assertTrue(len(yaml_config.keys()) == 15)
self.assertEqual(len(yaml_config.keys()), 16)

0 comments on commit a2446fa

Please sign in to comment.