From e95a8de513a8f7979d951017b64f7ac3617de477 Mon Sep 17 00:00:00 2001 From: Travis Thompson Date: Mon, 5 Feb 2024 16:28:35 -0500 Subject: [PATCH 1/2] Fairly significant changes to check_protocol_fields I started editing this function because of the "Only one protocol reference should be used in a Protocol REF column." message(s), but I found some other issues to address as well. --- isatools/isatab/validate/rules/rules_40xx.py | 61 ++++++++------------ 1 file changed, 25 insertions(+), 36 deletions(-) diff --git a/isatools/isatab/validate/rules/rules_40xx.py b/isatools/isatab/validate/rules/rules_40xx.py index 84b87ace..9679923d 100644 --- a/isatools/isatab/validate/rules/rules_40xx.py +++ b/isatools/isatab/validate/rules/rules_40xx.py @@ -254,30 +254,22 @@ def pairwise(iterable): a, b = tee(iterable) next(b, None) return zip(a, b) - - proto_ref_index = [i for i in table.columns if 'protocol ref' in i.lower()] - result = True - for each in proto_ref_index: - prots_found = set() - for cell in table[each]: - prots_found.add(cell) - if len(prots_found) > 1: - log.warning("(W) Multiple protocol references {} are found in {}".format(prots_found, each)) - log.warning("(W) Only one protocol reference should be used in a Protocol REF column.") - result = False - if result: - field_headers = [i for i in table.columns - if i.lower().endswith(' name') - or i.lower().endswith(' data file') - or i.lower().endswith(' data matrix file')] - protos = [i for i in table.columns if i.lower() == 'protocol ref'] - if len(protos) > 0: - last_proto_index = table.columns.get_loc(protos[len(protos) - 1]) - else: - last_proto_index = -1 - last_mat_or_dat_index = table.columns.get_loc(field_headers[len(field_headers) - 1]) - if last_proto_index > last_mat_or_dat_index: - log.warning("(W) Protocol REF column without output in file '" + table.filename + "'") + + field_headers = [i for i in table.columns + if i.lower().endswith(' name') + or i.lower().endswith(' data file') + or i.lower().endswith(' data matrix file')] + protos = [i for i in table.columns if i.lower() == 'protocol ref'] + if len(protos) > 0: + last_proto_index = table.columns.get_loc(protos[len(protos) - 1]) + else: + last_proto_index = -1 + last_mat_or_dat_index = table.columns.get_loc(field_headers[len(field_headers) - 1]) + if last_proto_index > last_mat_or_dat_index: + spl = "Protocol REF column without output in file '" + table.filename + "'" + validator.add_warning(message="Missing Protocol Value", supplemental=spl, code=1007) + log.warning("(W) Protocol REF column is not followed by a material or data node in file '" + table.filename + "'") + if cfg.get_isatab_configuration(): for left, right in pairwise(field_headers): cleft = None cright = None @@ -294,16 +286,15 @@ def pairwise(iterable): fprotos_headers = [i for i in raw_headers if 'protocol ref' in i.lower()] fprotos = list() for header in fprotos_headers: - proto_name = table.iloc[0][header] - try: - proto_type = proto_map[proto_name] - fprotos.append(proto_type) - except KeyError: - spl = ("Could not find protocol type for protocol name '{}', trying to validate_rules against name " - "only").format(proto_name) - validator.add_warning(message="Missing Protocol declaration", supplemental=spl, code=1007) - log.warning("(W) {}".format(spl)) - fprotos.append(proto_name) + proto_names = list(table.loc[:, header].unique()) + for proto_name in proto_names: + proto_type = proto_map.get(proto_name) + if not proto_type and proto_name: + spl = ("Could not find protocol type for protocol name '{}' in file '{}'" ).format(proto_name, table.filename) + validator.add_warning(message="Missing Protocol Declaration", supplemental=spl, code=1007) + log.warning("(W) {}".format(spl)) + else: + fprotos.append(proto_type) invalid_protos = set(cprotos) - set(fprotos) if len(invalid_protos) > 0: spl = ("Protocol(s) of type {} defined in the ISA-configuration expected as a between '{}' and " @@ -311,8 +302,6 @@ def pairwise(iterable): spl = spl.format(str(list(invalid_protos)), cleft.header, cright.header, table.filename) validator.add_warning(message="Missing Protocol declaration", supplemental=spl, code=1007) log.warning("(W) {}".format(spl)) - result = False - return result def load_table_checks(df, filename): From 733b74ff4384b95fd973365c754e19654fb2e4be Mon Sep 17 00:00:00 2001 From: Travis Thompson Date: Mon, 5 Feb 2024 16:44:35 -0500 Subject: [PATCH 2/2] Missed a small change. --- isatools/isatab/validate/rules/rules_40xx.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/isatools/isatab/validate/rules/rules_40xx.py b/isatools/isatab/validate/rules/rules_40xx.py index 9679923d..bd223ce5 100644 --- a/isatools/isatab/validate/rules/rules_40xx.py +++ b/isatools/isatab/validate/rules/rules_40xx.py @@ -266,9 +266,9 @@ def pairwise(iterable): last_proto_index = -1 last_mat_or_dat_index = table.columns.get_loc(field_headers[len(field_headers) - 1]) if last_proto_index > last_mat_or_dat_index: - spl = "Protocol REF column without output in file '" + table.filename + "'" + spl = "(W) Protocol REF column is not followed by a material or data node in file '" + table.filename + "'" validator.add_warning(message="Missing Protocol Value", supplemental=spl, code=1007) - log.warning("(W) Protocol REF column is not followed by a material or data node in file '" + table.filename + "'") + log.warning(spl) if cfg.get_isatab_configuration(): for left, right in pairwise(field_headers): cleft = None