Skip to content

Commit

Permalink
Merge branch 'issue-511' into extended-511
Browse files Browse the repository at this point in the history
  • Loading branch information
proccaserra authored Mar 18, 2024
2 parents 3793ed6 + 7d5f19f commit e1eda82
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 54 deletions.
2 changes: 1 addition & 1 deletion isatools/isatab/validate/rules/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def __init__(self,
rules_to_run: tuple = DEFAULT_INVESTIGATION_RULES):
""" The ISA investigation validator class
:param investigation_df_dict: a dictionnary of DataFrames and list of dataframes representing sthe investigation
:param investigation_df_dict: a dictionary of DataFrames and lists of DataFrames representing the investigation file
:param dir_context: the directory of the investigation
:param configs: directory of the XML config files
:param available_rules: a customizable list of all available rules for investigation objects
Expand Down
42 changes: 4 additions & 38 deletions isatools/isatab/validate/rules/rules_40xx.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,14 @@ def check_measurement_technology_types(i_df_dict, configs):
for i, assay_df in enumerate(i_df_dict['s_assays']):
measurement_types = assay_df['Study Assay Measurement Type'].tolist()
technology_types = assay_df['Study Assay Technology Type'].tolist()

if len(measurement_types) == len(technology_types):
for x, measurement_type in enumerate(measurement_types):
lowered_mt = measurement_types[x].lower()
lowered_tt = technology_types[x].lower()
if (lowered_mt, lowered_tt) not in configs.keys():
spl = "Measurement {}/technology {},STUDY.{}, STUDY ASSAY.{}"

spl = "Measurement {}/technology {}, STUDY.{}, STUDY ASSAY.{}"
spl = spl.format(measurement_types[x], technology_types[x], i, x)
error = ("(E) Could not load configuration for measurement type '{}' and technology type '{}' "
"for STUDY.{}, STUDY ASSAY.{}'").format(measurement_types[x], technology_types[x], i, x)
Expand Down Expand Up @@ -284,29 +286,6 @@ def pairwise(iterable):
validator.add_warning(message="Missing Protocol Value", supplemental=spl, code=1007)
log.warning(spl)
if cfg.get_isatab_configuration():
# proto_ref_index = [i for i in table.columns if 'protocol ref' in i.lower()]
# result = True
# for each in proto_ref_index:
# prots_found = set()
# for cell in table[each]:
# prots_found.add(cell)
# if len(prots_found) > 1:
# log.warning("(W) Multiple protocol references {} are found in {}".format(prots_found, each))
# log.warning("(W) Only one protocol reference should be used in a Protocol REF column.")
# result = False
# if result:
# field_headers = [i for i in table.columns
# if i.lower().endswith(' name')
# or i.lower().endswith(' data file')
# or i.lower().endswith(' data matrix file')]
# protos = [i for i in table.columns if i.lower() == 'protocol ref']
# if len(protos) > 0:
# last_proto_index = table.columns.get_loc(protos[len(protos) - 1])
# else:
# last_proto_index = -1
# last_mat_or_dat_index = table.columns.get_loc(field_headers[len(field_headers) - 1])
# if last_proto_index > last_mat_or_dat_index:
# log.warning("(W) Protocol REF column without output in file '" + table.filename + "'")
for left, right in pairwise(field_headers):
cleft = None
cright = None
Expand All @@ -327,32 +306,19 @@ def pairwise(iterable):
for proto_name in proto_names:
proto_type = proto_map.get(proto_name)
if not proto_type and proto_name:
spl = ("Could not find protocol type for protocol name '{}' in file '{}'").format(
proto_name, table.filename)
spl = ("Could not find protocol type for protocol name '{}' in file '{}'" ).format(proto_name, table.filename)
validator.add_warning(message="Missing Protocol Declaration", supplemental=spl, code=1007)
log.warning("(W) {}".format(spl))
else:
fprotos.append(proto_type)

# proto_name = table.iloc[0][header]
# try:
# proto_type = proto_map[proto_name]
# fprotos.append(proto_type)
# except KeyError:
# spl = ("Could not find protocol type for protocol name '{}', trying to validate_rules against name "
# "only").format(proto_name)
# validator.add_warning(message="Missing Protocol declaration", supplemental=spl, code=1007)
# log.warning("(W) {}".format(spl))
# fprotos.append(proto_name)
invalid_protos = set(cprotos) - set(fprotos)
if len(invalid_protos) > 0:
spl = ("Protocol(s) of type {} defined in the ISA-configuration expected as a between '{}' and "
"'{}' but has not been found, in the file '{}'")
spl = spl.format(str(list(invalid_protos)), cleft.header, cright.header, table.filename)
validator.add_warning(message="Missing Protocol declaration", supplemental=spl, code=1007)
log.warning("(W) {}".format(spl))
result = False
return result


def load_table_checks(df, filename):
Expand Down
9 changes: 7 additions & 2 deletions tests/isatab/test_isatab.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ def setUpModule():
"git clone -b tests --single-branch [email protected]:ISA-tools/ISAdatasets {0}"
.format(utils.DATA_DIR))

def replace_windows_newlines(input_string):
return input_string.replace('\r\r\n', '\n').replace('\r\n', '\n').replace('\r', '\n')


def replace_windows_newlines(input_string):
return input_string.replace('\r\r\n', '\n').replace('\r\n', '\n').replace('\r', '\n')
Expand Down Expand Up @@ -445,7 +448,7 @@ def test_isatab_dump_source_sample_char_quant(self):
s.process_sequence = [sample_collection_process]
s.samples.append(sample1)
i.studies = [s]
actual = isatab.dumps(i)
actual = replace_windows_newlines(isatab.dumps(i))
expected = """Source Name\tMaterial Type\tCharacteristics[organism]\tTerm Source REF\tTerm Accession Number\tCharacteristics[body weight]\tUnit\tTerm Source REF\tTerm Accession Number\tProtocol REF\tParameter Value[vessel]\tTerm Source REF\tTerm Accession Number\tParameter Value[storage temperature]\tUnit\tTerm Source REF\tTerm Accession Number\tSample Name\tCharacteristics[organism part]\tTerm Source REF\tTerm Accession Number\tCharacteristics[specimen mass]\tUnit\tTerm Source REF\tTerm Accession Number
source1\tspecimen\tHuman\tNCBITAXON\thttp://purl.bioontology.org/ontology/STY/T016\t72\tkilogram\tUO\thttp://purl.obolibrary.org/obo/UO_0000009\tsample collection\teppendorf tube\tOBI\tpurl.org\t-20\tdegree Celsius\tUO\thttp://purl.obolibrary.org/obo/UO_0000027\tsample1\tliver\tUBERON\thttp://purl.obolibrary.org/obo/UBERON_0002107\t450.5\tmilligram\tUO\thttp://purl.obolibrary.org/obo/UO_0000022"""
self.assertIn(expected, actual)
Expand Down Expand Up @@ -1269,7 +1272,8 @@ def test_source_protocol_ref_sample_protocol_ref_sample(self):
i.studies = [s]
expected = """Source Name\tProtocol REF\tSample Name\tProtocol REF\tSample Name
source1\tsample collection\tsample1\taliquoting\taliquot1"""
self.assertIn(expected, isatab.dumps(i).replace('\r\r\n', '\n').replace('\r\n', '\n').replace('\r', '\n'))
self.assertIn(expected, replace_windows_newlines(isatab.dumps(i)))


def test_sample_protocol_ref_material_protocol_ref_data2(self):
i = Investigation()
Expand Down Expand Up @@ -1727,6 +1731,7 @@ def test_isatab_preprocess_issue235(self):
if """Protocol REF\tData Transformation Name""" in header:
self.fail('Incorrectly inserted Protocol REF before '
'Data Transformation Name')
os.remove(tmp.name)

def test_isatab_factor_value_parsing_issue270(self):
with open(os.path.join(self._tab_data_dir, 'issue270', 'i_matteo.txt'),
Expand Down
2 changes: 2 additions & 0 deletions tests/isatab/validate/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def test_b_ii_s_3(self):
r = validate(fp=data_file, config_dir=self.default_conf, origin="")
self.assertEqual(len(r['warnings']), 2)


def test_mtbls267(self):
data_path = path.join(path.dirname(path.abspath(__file__)), '..', '..', 'data', 'tab', 'MTBLS267-partial')
with open(path.join(data_path, 'i_Investigation.txt'), 'r') as data_file:
Expand Down Expand Up @@ -84,6 +85,7 @@ def is_investigation(investigation_df):
r = validate(data_file, rules=rules)
self.assertEqual(len(r['warnings']), 2)


rule = '12000'
expected_error = {
'message': 'Unknown/System Error',
Expand Down
20 changes: 7 additions & 13 deletions tests/validators/test_validate_test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,9 +321,6 @@ class TestIsaJsonCreateTestData(unittest.TestCase):

def setUp(self):
self._reporting_level = logging.ERROR
# self.v2_create_schemas_path = os.path.join(
# os.path.dirname(__file__), '../..', 'isatools', 'resources', 'schemas',
# 'isa_model_version_2_0_schemas', 'create')
self.v2_create_schemas_path = pathlib.Path(
pathlib.Path(__file__).parents[0], '..', '..', 'isatools', 'resources', 'schemas',
'isa_model_version_2_0_schemas', 'create')
Expand All @@ -334,13 +331,11 @@ def test_validate_testdata_sampleassayplan_json(self):
with open(os.path.join(self.v2_create_schemas_path,
'sample_assay_plan_schema.json')) as fp:
sample_assay_plan_schema = json.load(fp)
res_path = str(pathlib.Path("file://", self.v2_create_schemas_path,
'sample_assay_plan_schema.json'))

res_path = pathlib.Path("file://", self.v2_create_schemas_path,
'sample_assay_plan_schema.json').as_uri()
resolver = RefResolver(res_path, sample_assay_plan_schema)
resolver = RefResolver('file://{}'.format(
os.path.join(self.v2_create_schemas_path,
'sample_assay_plan_schema.json')),
sample_assay_plan_schema)

validator = Draft4Validator(sample_assay_plan_schema,
resolver=resolver)
validator.validate(json.load(test_case_fp))
Expand Down Expand Up @@ -368,10 +363,9 @@ def test_validate_testdata_treatment_sequence_json(self):
with open(os.path.join(self.v2_create_schemas_path,
'treatment_sequence_schema.json')) as fp:
treatment_sequence_schema = json.load(fp)
resolver = RefResolver('file://{}'.format(
os.path.join(self.v2_create_schemas_path,
'treatment_sequence_schema.json')),
treatment_sequence_schema)
res_path = pathlib.Path("file://", self.v2_create_schemas_path,
'treatment_sequence_schema.json').as_uri()
resolver = RefResolver(res_path, treatment_sequence_schema)
validator = Draft4Validator(treatment_sequence_schema,
resolver=resolver)
validator.validate(json.load(test_case_fp))
Expand Down

0 comments on commit e1eda82

Please sign in to comment.