Merge branch 'issue-511' into extended-511

ISA-tools · Mar 18, 2024 · e1eda82 · e1eda82
2 parents 3793ed6 + 7d5f19f
commit e1eda82
Show file tree

Hide file tree

Showing 5 changed files with 21 additions and 54 deletions.
diff --git a/isatools/isatab/validate/rules/core.py b/isatools/isatab/validate/rules/core.py
@@ -115,7 +115,7 @@ def __init__(self,
  rules_to_run: tuple = DEFAULT_INVESTIGATION_RULES):
  """ The ISA investigation validator class
 
- :param investigation_df_dict: a dictionnary of DataFrames and list of dataframes representing sthe investigation
+ :param investigation_df_dict: a dictionary of DataFrames and lists of DataFrames representing the investigation file
  :param dir_context: the directory of the investigation
  :param configs: directory of the XML config files
  :param available_rules: a customizable list of all available rules for investigation objects

diff --git a/isatools/isatab/validate/rules/rules_40xx.py b/isatools/isatab/validate/rules/rules_40xx.py
@@ -116,12 +116,14 @@ def check_measurement_technology_types(i_df_dict, configs):
  for i, assay_df in enumerate(i_df_dict['s_assays']):
  measurement_types = assay_df['Study Assay Measurement Type'].tolist()
  technology_types = assay_df['Study Assay Technology Type'].tolist()
+
  if len(measurement_types) == len(technology_types):
  for x, measurement_type in enumerate(measurement_types):
  lowered_mt = measurement_types[x].lower()
  lowered_tt = technology_types[x].lower()
  if (lowered_mt, lowered_tt) not in configs.keys():
- spl = "Measurement {}/technology {},STUDY.{}, STUDY ASSAY.{}"
+
+ spl = "Measurement {}/technology {}, STUDY.{}, STUDY ASSAY.{}"
  spl = spl.format(measurement_types[x], technology_types[x], i, x)
  error = ("(E) Could not load configuration for measurement type '{}' and technology type '{}' "
  "for STUDY.{}, STUDY ASSAY.{}'").format(measurement_types[x], technology_types[x], i, x)
@@ -284,29 +286,6 @@ def pairwise(iterable):
  validator.add_warning(message="Missing Protocol Value", supplemental=spl, code=1007)
  log.warning(spl)
  if cfg.get_isatab_configuration():
- # proto_ref_index = [i for i in table.columns if 'protocol ref' in i.lower()]
- # result = True
- # for each in proto_ref_index:
- # prots_found = set()
- # for cell in table[each]:
- # prots_found.add(cell)
- # if len(prots_found) > 1:
- # log.warning("(W) Multiple protocol references {} are found in {}".format(prots_found, each))
- # log.warning("(W) Only one protocol reference should be used in a Protocol REF column.")
- # result = False
- # if result:
- # field_headers = [i for i in table.columns
- # if i.lower().endswith(' name')
- # or i.lower().endswith(' data file')
- # or i.lower().endswith(' data matrix file')]
- # protos = [i for i in table.columns if i.lower() == 'protocol ref']
- # if len(protos) > 0:
- # last_proto_index = table.columns.get_loc(protos[len(protos) - 1])
- # else:
- # last_proto_index = -1
- # last_mat_or_dat_index = table.columns.get_loc(field_headers[len(field_headers) - 1])
- # if last_proto_index > last_mat_or_dat_index:
- # log.warning("(W) Protocol REF column without output in file '" + table.filename + "'")
  for left, right in pairwise(field_headers):
  cleft = None
  cright = None
@@ -327,32 +306,19 @@ def pairwise(iterable):
  for proto_name in proto_names:
  proto_type = proto_map.get(proto_name)
  if not proto_type and proto_name:
- spl = ("Could not find protocol type for protocol name '{}' in file '{}'").format(
- proto_name, table.filename)
+ spl = ("Could not find protocol type for protocol name '{}' in file '{}'" ).format(proto_name, table.filename)
  validator.add_warning(message="Missing Protocol Declaration", supplemental=spl, code=1007)
  log.warning("(W) {}".format(spl))
  else:
  fprotos.append(proto_type)
 
- # proto_name = table.iloc[0][header]
- # try:
- # proto_type = proto_map[proto_name]
- # fprotos.append(proto_type)
- # except KeyError:
- # spl = ("Could not find protocol type for protocol name '{}', trying to validate_rules against name "
- # "only").format(proto_name)
- # validator.add_warning(message="Missing Protocol declaration", supplemental=spl, code=1007)
- # log.warning("(W) {}".format(spl))
- # fprotos.append(proto_name)
  invalid_protos = set(cprotos) - set(fprotos)
  if len(invalid_protos) > 0:
  spl = ("Protocol(s) of type {} defined in the ISA-configuration expected as a between '{}' and "
  "'{}' but has not been found, in the file '{}'")
  spl = spl.format(str(list(invalid_protos)), cleft.header, cright.header, table.filename)
  validator.add_warning(message="Missing Protocol declaration", supplemental=spl, code=1007)
  log.warning("(W) {}".format(spl))
- result = False
- return result
 
 
 def load_table_checks(df, filename):

diff --git a/tests/isatab/test_isatab.py b/tests/isatab/test_isatab.py
@@ -28,6 +28,9 @@ def setUpModule():
  "git clone -b tests --single-branch [email protected]:ISA-tools/ISAdatasets {0}"
  .format(utils.DATA_DIR))
 
+def replace_windows_newlines(input_string):
+ return input_string.replace('\r\r\n', '\n').replace('\r\n', '\n').replace('\r', '\n')
+
 
 def replace_windows_newlines(input_string):
  return input_string.replace('\r\r\n', '\n').replace('\r\n', '\n').replace('\r', '\n')
@@ -445,7 +448,7 @@ def test_isatab_dump_source_sample_char_quant(self):
  s.process_sequence = [sample_collection_process]
  s.samples.append(sample1)
  i.studies = [s]
- actual = isatab.dumps(i)
+ actual = replace_windows_newlines(isatab.dumps(i))
  expected = """Source Name\tMaterial Type\tCharacteristics[organism]\tTerm Source REF\tTerm Accession Number\tCharacteristics[body weight]\tUnit\tTerm Source REF\tTerm Accession Number\tProtocol REF\tParameter Value[vessel]\tTerm Source REF\tTerm Accession Number\tParameter Value[storage temperature]\tUnit\tTerm Source REF\tTerm Accession Number\tSample Name\tCharacteristics[organism part]\tTerm Source REF\tTerm Accession Number\tCharacteristics[specimen mass]\tUnit\tTerm Source REF\tTerm Accession Number
 source1\tspecimen\tHuman\tNCBITAXON\thttp://purl.bioontology.org/ontology/STY/T016\t72\tkilogram\tUO\thttp://purl.obolibrary.org/obo/UO_0000009\tsample collection\teppendorf tube\tOBI\tpurl.org\t-20\tdegree Celsius\tUO\thttp://purl.obolibrary.org/obo/UO_0000027\tsample1\tliver\tUBERON\thttp://purl.obolibrary.org/obo/UBERON_0002107\t450.5\tmilligram\tUO\thttp://purl.obolibrary.org/obo/UO_0000022"""
  self.assertIn(expected, actual)
@@ -1269,7 +1272,8 @@ def test_source_protocol_ref_sample_protocol_ref_sample(self):
  i.studies = [s]
  expected = """Source Name\tProtocol REF\tSample Name\tProtocol REF\tSample Name
 source1\tsample collection\tsample1\taliquoting\taliquot1"""
- self.assertIn(expected, isatab.dumps(i).replace('\r\r\n', '\n').replace('\r\n', '\n').replace('\r', '\n'))
+ self.assertIn(expected, replace_windows_newlines(isatab.dumps(i)))
+
 
  def test_sample_protocol_ref_material_protocol_ref_data2(self):
  i = Investigation()
@@ -1727,6 +1731,7 @@ def test_isatab_preprocess_issue235(self):
  if """Protocol REF\tData Transformation Name""" in header:
  self.fail('Incorrectly inserted Protocol REF before '
  'Data Transformation Name')
+ os.remove(tmp.name)
 
  def test_isatab_factor_value_parsing_issue270(self):
  with open(os.path.join(self._tab_data_dir, 'issue270', 'i_matteo.txt'),

diff --git a/tests/isatab/validate/test_core.py b/tests/isatab/validate/test_core.py
@@ -19,6 +19,7 @@ def test_b_ii_s_3(self):
  r = validate(fp=data_file, config_dir=self.default_conf, origin="")
  self.assertEqual(len(r['warnings']), 2)
 
+
  def test_mtbls267(self):
  data_path = path.join(path.dirname(path.abspath(__file__)), '..', '..', 'data', 'tab', 'MTBLS267-partial')
  with open(path.join(data_path, 'i_Investigation.txt'), 'r') as data_file:
@@ -84,6 +85,7 @@ def is_investigation(investigation_df):
  r = validate(data_file, rules=rules)
  self.assertEqual(len(r['warnings']), 2)
 
+
  rule = '12000'
  expected_error = {
  'message': 'Unknown/System Error',

diff --git a/tests/validators/test_validate_test_data.py b/tests/validators/test_validate_test_data.py
@@ -321,9 +321,6 @@ class TestIsaJsonCreateTestData(unittest.TestCase):
 
  def setUp(self):
  self._reporting_level = logging.ERROR
- # self.v2_create_schemas_path = os.path.join(
- # os.path.dirname(__file__), '../..', 'isatools', 'resources', 'schemas',
- # 'isa_model_version_2_0_schemas', 'create')
  self.v2_create_schemas_path = pathlib.Path(
  pathlib.Path(__file__).parents[0], '..', '..', 'isatools', 'resources', 'schemas',
  'isa_model_version_2_0_schemas', 'create')
@@ -334,13 +331,11 @@ def test_validate_testdata_sampleassayplan_json(self):
  with open(os.path.join(self.v2_create_schemas_path,
  'sample_assay_plan_schema.json')) as fp:
  sample_assay_plan_schema = json.load(fp)
- res_path = str(pathlib.Path("file://", self.v2_create_schemas_path,
- 'sample_assay_plan_schema.json'))
+
+ res_path = pathlib.Path("file://", self.v2_create_schemas_path,
+ 'sample_assay_plan_schema.json').as_uri()
  resolver = RefResolver(res_path, sample_assay_plan_schema)
- resolver = RefResolver('file://{}'.format(
- os.path.join(self.v2_create_schemas_path,
- 'sample_assay_plan_schema.json')),
- sample_assay_plan_schema)
+
  validator = Draft4Validator(sample_assay_plan_schema,
  resolver=resolver)
  validator.validate(json.load(test_case_fp))
@@ -368,10 +363,9 @@ def test_validate_testdata_treatment_sequence_json(self):
  with open(os.path.join(self.v2_create_schemas_path,
  'treatment_sequence_schema.json')) as fp:
  treatment_sequence_schema = json.load(fp)
- resolver = RefResolver('file://{}'.format(
- os.path.join(self.v2_create_schemas_path,
- 'treatment_sequence_schema.json')),
- treatment_sequence_schema)
+ res_path = pathlib.Path("file://", self.v2_create_schemas_path,
+ 'treatment_sequence_schema.json').as_uri()
+ resolver = RefResolver(res_path, treatment_sequence_schema)
  validator = Draft4Validator(treatment_sequence_schema,
  resolver=resolver)
  validator.validate(json.load(test_case_fp))