First attempt.

The problem with trying to do this is that some of the protocol types in the yaml file have "Sample Name" and "Extract Name" as the headers and this causes those headers to be added multiple times. It might be better to leave those empty. Also might be good to change headers to a list to get rid of the special case for "nucleic acid hybridization".
ISA-tools · Mar 19, 2024 · a5ca2f1 · a5ca2f1
1 parent d7aa027
commit a5ca2f1
Show file tree

Hide file tree

Showing 5 changed files with 73 additions and 48 deletions.
diff --git a/isatools/constants.py b/isatools/constants.py
@@ -1,4 +1,5 @@
 SYNONYMS = 'synonyms'
+HEADER = 'header'
 
 MATERIAL_LABELS = [
  'Source Name',

diff --git a/isatools/isatab/dump/write.py b/isatools/isatab/dump/write.py
@@ -3,7 +3,7 @@
 from pandas import DataFrame
 from numpy import nan
 
-from isatools.constants import SYNONYMS
+from isatools.constants import SYNONYMS, HEADER
 from isatools.model import (
  OntologyAnnotation,
  Investigation,
@@ -21,8 +21,7 @@
  get_pv_columns,
  get_fv_columns,
  get_characteristic_columns,
- get_object_column_map,
- get_column_header
+ get_object_column_map
 )
 
 
@@ -296,17 +295,23 @@ def flatten(current_list):
  columns += flatten(map(lambda x: get_pv_columns(olabel, x),
  node.parameter_values))
  if node.executes_protocol.protocol_type:
- oname_label = get_column_header(
- node.executes_protocol.protocol_type.term,
- protocol_types_dict
- )
+ if isinstance(node.executes_protocol.protocol_type, OntologyAnnotation):
+ protocol_type = node.executes_protocol.protocol_type.term.lower()
+ else:
+ protocol_type = node.executes_protocol.protocol_type.lower()
+
+ if protocol_type in protocol_types_dict:
+ oname_label = protocol_types_dict[protocol_type][HEADER]
+ else:
+ oname_label = None
+
  if oname_label is not None:
  columns.append(oname_label)
- elif node.executes_protocol.protocol_type.term.lower() \
-  in protocol_types_dict["nucleic acid hybridization"][SYNONYMS]:
- columns.extend(
- ["Hybridization Assay Name",
-  "Array Design REF"])
+
+ if node.executes_protocol.protocol_type.term.lower() in \
+  protocol_types_dict["nucleic acid hybridization"][SYNONYMS]:
+ columns.append("Array Design REF")
+
  columns += flatten(
  map(lambda x: get_comment_column(olabel, x),
  node.comments))
@@ -350,19 +355,23 @@ def pbar(x):
  protocol_in_path_count += 1
  df_dict[olabel][-1] = node.executes_protocol.name
  if node.executes_protocol.protocol_type:
- oname_label = get_column_header(
- node.executes_protocol.protocol_type.term,
- protocol_types_dict
- )
+ if isinstance(node.executes_protocol.protocol_type, OntologyAnnotation):
+ protocol_type = node.executes_protocol.protocol_type.term.lower()
+ else:
+ protocol_type = node.executes_protocol.protocol_type.lower()
+
+ if protocol_type in protocol_types_dict:
+ oname_label = protocol_types_dict[protocol_type][HEADER]
+ else:
+ oname_label = None
+
  if oname_label is not None:
  df_dict[oname_label][-1] = node.name
 
- elif node.executes_protocol.protocol_type.term.lower() in \
- protocol_types_dict["nucleic acid hybridization"][SYNONYMS]:
- df_dict["Hybridization Assay Name"][-1] = \
- node.name
- df_dict["Array Design REF"][-1] = \
- node.array_design_ref
+ if node.executes_protocol.protocol_type.term.lower() in \
+ protocol_types_dict["nucleic acid hybridization"][SYNONYMS]:
+ df_dict["Array Design REF"][-1] = node.array_design_ref
+
  if node.date is not None:
  df_dict[olabel + ".Date"][-1] = node.date
  if node.performer is not None:

diff --git a/isatools/isatab/utils.py b/isatools/isatab/utils.py
@@ -515,30 +515,30 @@ def get_object_column_map(isatab_header, df_columns):
  return object_column_map
 
 
-def get_column_header(protocol_type_term, protocol_types_dict):
- column_header = None
- if protocol_type_term.lower() in \
- protocol_types_dict["nucleic acid sequencing"][SYNONYMS] \
- + protocol_types_dict["phenotyping"][SYNONYMS] \
- + protocol_types_dict["data acquisition"][SYNONYMS]:
- column_header = "Assay Name"
- elif protocol_type_term.lower() in protocol_types_dict["data collection"][SYNONYMS]:
- column_header = "Scan Name"
- elif protocol_type_term.lower() in protocol_types_dict["mass spectrometry"][SYNONYMS]:
- column_header = "MS Assay Name"
- elif protocol_type_term.lower() in protocol_types_dict["nmr spectroscopy"][SYNONYMS]:
- column_header = "NMR Assay Name"
- elif protocol_type_term.lower() in \
- protocol_types_dict["data transformation"][SYNONYMS] \
- + protocol_types_dict["sequence analysis data transformation"][SYNONYMS] \
- + protocol_types_dict["metabolite identification"][SYNONYMS] \
- + protocol_types_dict["protein identification"][SYNONYMS]:
- column_header = "Data Transformation Name"
- elif protocol_type_term.lower() in protocol_types_dict["normalization"][SYNONYMS]:
- column_header = "Normalization Name"
- if protocol_type_term.lower() == "unknown protocol":
- column_header = "Unknown Protocol Name"
- return column_header
+# def get_column_header(protocol_type_term, protocol_types_dict):
+#  column_header = None
+#  if protocol_type_term.lower() in \
+#  protocol_types_dict["nucleic acid sequencing"][SYNONYMS] \
+#  + protocol_types_dict["phenotyping"][SYNONYMS] \
+#  + protocol_types_dict["data acquisition"][SYNONYMS]:
+#  column_header = "Assay Name"
+#  elif protocol_type_term.lower() in protocol_types_dict["data collection"][SYNONYMS]:
+#  column_header = "Scan Name"
+#  elif protocol_type_term.lower() in protocol_types_dict["mass spectrometry"][SYNONYMS]:
+#  column_header = "MS Assay Name"
+#  elif protocol_type_term.lower() in protocol_types_dict["nmr spectroscopy"][SYNONYMS]:
+#  column_header = "NMR Assay Name"
+#  elif protocol_type_term.lower() in \
+#  protocol_types_dict["data transformation"][SYNONYMS] \
+#  + protocol_types_dict["sequence analysis data transformation"][SYNONYMS] \
+#  + protocol_types_dict["metabolite identification"][SYNONYMS] \
+#  + protocol_types_dict["protein identification"][SYNONYMS]:
+#  column_header = "Data Transformation Name"
+#  elif protocol_type_term.lower() in protocol_types_dict["normalization"][SYNONYMS]:
+#  column_header = "Normalization Name"
+#  if protocol_type_term.lower() == "unknown protocol":
+#  column_header = "Unknown Protocol Name"
+#  return column_header
 
 
 def get_value_columns(label, x):

diff --git a/isatools/model/protocol.py b/isatools/model/protocol.py
@@ -2,6 +2,7 @@
 from collections.abc import Iterable
 from pprint import pprint
 from yaml import load, FullLoader
+from isatools.constants import SYNONYMS
 from isatools.model.comments import Commentable
 from isatools.model.ontology_annotation import OntologyAnnotation
 from isatools.model.protocol_parameter import ProtocolParameter
@@ -282,4 +283,14 @@ def load_protocol_types_info() -> dict:
  """
  filepath = os.path.join(os.path.dirname(__file__), '..', 'resources', 'config', 'yaml', 'protocol-types.yml')
  with open(filepath) as yaml_file:
- return load(yaml_file, Loader=FullLoader)
+ yaml_dict = load(yaml_file, Loader=FullLoader)
+
+ protocol_types_dict = {}
+ for protocol, attributes in yaml_dict.items():
+ protocol_types_dict[protocol] = attributes
+ for synonym in attributes[SYNONYMS]:
+ protocol_types_dict[synonym] = attributes
+
+ return protocol_types_dict
+
+
diff --git a/isatools/resources/config/yaml/protocol-types.yml b/isatools/resources/config/yaml/protocol-types.yml
@@ -83,4 +83,8 @@ metabolite identification:
 protein identification:
  header: Data Transformation Name
  synonyms:
- - protein identification
+ - protein identification
+unknown protocol:
+ header: Unknown Protocol Name
+ synonyms:
+ - unknown protocol