Skip to content

Commit

Permalink
First attempt.
Browse files Browse the repository at this point in the history
The problem with trying to do this is that some of the protocol types in the yaml file have "Sample Name" and "Extract Name" as the headers and this causes those headers to be added multiple times. It might be better to leave those empty. Also might be good to change headers to a list to get rid of the special case for "nucleic acid hybridization".
  • Loading branch information
ptth222 committed Mar 19, 2024
1 parent d7aa027 commit a5ca2f1
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 48 deletions.
1 change: 1 addition & 0 deletions isatools/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
SYNONYMS = 'synonyms'
HEADER = 'header'

MATERIAL_LABELS = [
'Source Name',
Expand Down
53 changes: 31 additions & 22 deletions isatools/isatab/dump/write.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pandas import DataFrame
from numpy import nan

from isatools.constants import SYNONYMS
from isatools.constants import SYNONYMS, HEADER
from isatools.model import (
OntologyAnnotation,
Investigation,
Expand All @@ -21,8 +21,7 @@
get_pv_columns,
get_fv_columns,
get_characteristic_columns,
get_object_column_map,
get_column_header
get_object_column_map
)


Expand Down Expand Up @@ -296,17 +295,23 @@ def flatten(current_list):
columns += flatten(map(lambda x: get_pv_columns(olabel, x),
node.parameter_values))
if node.executes_protocol.protocol_type:
oname_label = get_column_header(
node.executes_protocol.protocol_type.term,
protocol_types_dict
)
if isinstance(node.executes_protocol.protocol_type, OntologyAnnotation):
protocol_type = node.executes_protocol.protocol_type.term.lower()
else:
protocol_type = node.executes_protocol.protocol_type.lower()

if protocol_type in protocol_types_dict:
oname_label = protocol_types_dict[protocol_type][HEADER]
else:
oname_label = None

if oname_label is not None:
columns.append(oname_label)
elif node.executes_protocol.protocol_type.term.lower() \
in protocol_types_dict["nucleic acid hybridization"][SYNONYMS]:
columns.extend(
["Hybridization Assay Name",
"Array Design REF"])

if node.executes_protocol.protocol_type.term.lower() in \
protocol_types_dict["nucleic acid hybridization"][SYNONYMS]:
columns.append("Array Design REF")

columns += flatten(
map(lambda x: get_comment_column(olabel, x),
node.comments))
Expand Down Expand Up @@ -350,19 +355,23 @@ def pbar(x):
protocol_in_path_count += 1
df_dict[olabel][-1] = node.executes_protocol.name
if node.executes_protocol.protocol_type:
oname_label = get_column_header(
node.executes_protocol.protocol_type.term,
protocol_types_dict
)
if isinstance(node.executes_protocol.protocol_type, OntologyAnnotation):
protocol_type = node.executes_protocol.protocol_type.term.lower()
else:
protocol_type = node.executes_protocol.protocol_type.lower()

if protocol_type in protocol_types_dict:
oname_label = protocol_types_dict[protocol_type][HEADER]
else:
oname_label = None

if oname_label is not None:
df_dict[oname_label][-1] = node.name

elif node.executes_protocol.protocol_type.term.lower() in \
protocol_types_dict["nucleic acid hybridization"][SYNONYMS]:
df_dict["Hybridization Assay Name"][-1] = \
node.name
df_dict["Array Design REF"][-1] = \
node.array_design_ref
if node.executes_protocol.protocol_type.term.lower() in \
protocol_types_dict["nucleic acid hybridization"][SYNONYMS]:
df_dict["Array Design REF"][-1] = node.array_design_ref

if node.date is not None:
df_dict[olabel + ".Date"][-1] = node.date
if node.performer is not None:
Expand Down
48 changes: 24 additions & 24 deletions isatools/isatab/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,30 +515,30 @@ def get_object_column_map(isatab_header, df_columns):
return object_column_map


def get_column_header(protocol_type_term, protocol_types_dict):
column_header = None
if protocol_type_term.lower() in \
protocol_types_dict["nucleic acid sequencing"][SYNONYMS] \
+ protocol_types_dict["phenotyping"][SYNONYMS] \
+ protocol_types_dict["data acquisition"][SYNONYMS]:
column_header = "Assay Name"
elif protocol_type_term.lower() in protocol_types_dict["data collection"][SYNONYMS]:
column_header = "Scan Name"
elif protocol_type_term.lower() in protocol_types_dict["mass spectrometry"][SYNONYMS]:
column_header = "MS Assay Name"
elif protocol_type_term.lower() in protocol_types_dict["nmr spectroscopy"][SYNONYMS]:
column_header = "NMR Assay Name"
elif protocol_type_term.lower() in \
protocol_types_dict["data transformation"][SYNONYMS] \
+ protocol_types_dict["sequence analysis data transformation"][SYNONYMS] \
+ protocol_types_dict["metabolite identification"][SYNONYMS] \
+ protocol_types_dict["protein identification"][SYNONYMS]:
column_header = "Data Transformation Name"
elif protocol_type_term.lower() in protocol_types_dict["normalization"][SYNONYMS]:
column_header = "Normalization Name"
if protocol_type_term.lower() == "unknown protocol":
column_header = "Unknown Protocol Name"
return column_header
# def get_column_header(protocol_type_term, protocol_types_dict):
# column_header = None
# if protocol_type_term.lower() in \
# protocol_types_dict["nucleic acid sequencing"][SYNONYMS] \
# + protocol_types_dict["phenotyping"][SYNONYMS] \
# + protocol_types_dict["data acquisition"][SYNONYMS]:
# column_header = "Assay Name"
# elif protocol_type_term.lower() in protocol_types_dict["data collection"][SYNONYMS]:
# column_header = "Scan Name"
# elif protocol_type_term.lower() in protocol_types_dict["mass spectrometry"][SYNONYMS]:
# column_header = "MS Assay Name"
# elif protocol_type_term.lower() in protocol_types_dict["nmr spectroscopy"][SYNONYMS]:
# column_header = "NMR Assay Name"
# elif protocol_type_term.lower() in \
# protocol_types_dict["data transformation"][SYNONYMS] \
# + protocol_types_dict["sequence analysis data transformation"][SYNONYMS] \
# + protocol_types_dict["metabolite identification"][SYNONYMS] \
# + protocol_types_dict["protein identification"][SYNONYMS]:
# column_header = "Data Transformation Name"
# elif protocol_type_term.lower() in protocol_types_dict["normalization"][SYNONYMS]:
# column_header = "Normalization Name"
# if protocol_type_term.lower() == "unknown protocol":
# column_header = "Unknown Protocol Name"
# return column_header


def get_value_columns(label, x):
Expand Down
13 changes: 12 additions & 1 deletion isatools/model/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from collections.abc import Iterable
from pprint import pprint
from yaml import load, FullLoader
from isatools.constants import SYNONYMS
from isatools.model.comments import Commentable
from isatools.model.ontology_annotation import OntologyAnnotation
from isatools.model.protocol_parameter import ProtocolParameter
Expand Down Expand Up @@ -282,4 +283,14 @@ def load_protocol_types_info() -> dict:
"""
filepath = os.path.join(os.path.dirname(__file__), '..', 'resources', 'config', 'yaml', 'protocol-types.yml')
with open(filepath) as yaml_file:
return load(yaml_file, Loader=FullLoader)
yaml_dict = load(yaml_file, Loader=FullLoader)

protocol_types_dict = {}
for protocol, attributes in yaml_dict.items():
protocol_types_dict[protocol] = attributes
for synonym in attributes[SYNONYMS]:
protocol_types_dict[synonym] = attributes

return protocol_types_dict


6 changes: 5 additions & 1 deletion isatools/resources/config/yaml/protocol-types.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,8 @@ metabolite identification:
protein identification:
header: Data Transformation Name
synonyms:
- protein identification
- protein identification
unknown protocol:
header: Unknown Protocol Name
synonyms:
- unknown protocol

0 comments on commit a5ca2f1

Please sign in to comment.