diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e5b70ece..a6ac6456 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -20,12 +20,28 @@ jobs: uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} + - name: Install udunits + run: | + cd /opt + curl -O https://downloads.unidata.ucar.edu/udunits/2.2.28/udunits-2.2.28.tar.gz + tar -xzvf udunits-2.2.28.tar.gz + cd udunits-2.2.28 + ./configure + make all install + ln -sf /opt/lib/* $LD_LIBRARY_PATH + - name: Install exiftool + run: | + sudo apt install libimage-exiftool-perl -y - name: Install dependencies run: | python -m pip install --upgrade pip pip install flake8 black pytest if [ -f requirements.txt ]; then pip install -r requirements.txt; fi if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi + - name: Look for exiftool + run: | + which exiftool - name: Test with pytest run: | - python -m pytest -v tests/test_readers.py + export UDUNITS2_XML_PATH=/opt/share/udunits/udunits2.xml + python -m pytest -v tests/test_readers.py tests/test_images.py diff --git a/checksit/check.py b/checksit/check.py index 8ef96029..abf06b9e 100644 --- a/checksit/check.py +++ b/checksit/check.py @@ -10,13 +10,14 @@ from .cvs import vocabs, vocabs_prefix from .rules import rules, rules_prefix -from .readers import pp, badc_csv, cdl, yml +from .readers import pp, badc_csv, cdl, yml, image from .specs import SpecificationChecker from .utils import get_file_base, extension, UNDEFINED from .config import get_config from .make_specs import make_amof_specs AMOF_CONVENTIONS = ['"CF-1.6, NCAS-AMF-2.0.0"'] +IMAGE_EXTENSIONS = ["png", "jpg", "jpeg"] conf = get_config() @@ -229,12 +230,16 @@ def check_file(self, file_path, template="auto", mappings=None, extra_rules=None # tmpl = self.parse_file_header(template, auto_cache=auto_cache, verbose=verbose) - ### Check for AMOF netCDF file and gather specs ### - if template == "auto" and file_path.split('.')[-1] == 'nc': - # Look for AMOF Convention string in Conventions global attr, if it exists - if ':Conventions' in file_content.cdl: - conventions = file_content.cdl.split(':Conventions =')[1].split(';')[0].strip() - if "NCAS-AMOF" in conventions or "NCAS-GENERAL" in conventions or "NCAS-AMF" in conventions: + ### Check for NCAS data files and gather specs ### + # if template and specs are "default" values, check to see if + # file is an ncas file (assuming file name starts with instrument name) + if (template == "auto" and specs == None and + file_path.split("/")[-1].startswith("ncas-")): + # find appropriate specs depending on convention + if file_path.split(".")[-1] == "nc" and ":Conventions" in file_content.cdl: + conventions = file_content.cdl.split(":Conventions =")[1].split(";")[0].strip() + # NCAS-GENERAL file + if any(name in conventions for name in ["NCAS-GENERAL", "NCAS-AMF", "NCAS-AMOF"]): if verbose: print("\nNCAS-AMOF file detected, finding correct spec files") print("Finding correct AMOF version...") @@ -245,7 +250,7 @@ def check_file(self, file_path, template="auto", mappings=None, extra_rules=None # check specs exist for that version specs_dir = os.path.join(conf["settings"].get("specs_dir", "./specs"), f"groups/{spec_folder}") if not os.path.exists(specs_dir): - if verbose: print(f"Specs for version {version_number} not found, attempting download...") + if verbose: print(f"Specs for version NCAS-GENERAL-{version_number} not found, attempting download...") try: vocabs_dir = os.path.join(conf["settings"].get("vocabs_dir", "./checksit/vocabs"), f"AMF_CVs/{version_number}") cvs = urllib.request.urlopen(f"https://github.com/ncasuk/AMF_CVs/tree/v{version_number}/AMF_CVs") @@ -280,7 +285,6 @@ def check_file(self, file_path, template="auto", mappings=None, extra_rules=None sys.exit() except: raise - # get deployment mode and data product, to then get specs deployment_mode = file_content.cdl.split(':deployment_mode =')[1].split(';')[0].strip().strip('"') @@ -291,6 +295,30 @@ def check_file(self, file_path, template="auto", mappings=None, extra_rules=None # don't need to do template check template = "off" + # NCAS-RADAR (coming soon...) + # if "NCAS-Radar" in conventions + + elif (file_path.split(".")[-1].lower() in IMAGE_EXTENSIONS and + "XMP-photoshop:Instructions" in file_content.global_attrs.keys()): + conventions = file_content.global_attrs["XMP-photoshop:Instructions"] + if "National Centre for Atmospheric Science Image Metadata Standard" in file_content.global_attrs["XMP-photoshop:Instructions"].replace("\n"," "): + if verbose: + print("\nNCAS-IMAGE file detected, finding correct spec files") + print("Finding correct IMAGE version...") + version_number = conventions.replace("\n"," ").split("Metadata Standard ")[1].split(":")[0] + spec_folder = f"ncas-image-{version_number}" + if verbose: print(f" {version_number}") + specs_dir = os.path.join(conf["settings"].get("specs_dir", "./specs"), f"groups/{spec_folder}") + if not os.path.exists(specs_dir): + print(f"[ERROR] specs for NCAS-IMAGE {version_number} can not be found.") + print("Aborting...") + sys.exit() + product = file_path.split('/')[-1].split('_')[3] + product_spec = f"{spec_folder}/amof-{product}" + specs = [product_spec, f"{spec_folder}/amof-image-global-attrs"] + template = "off" + + if template == "off": tmpl = template @@ -404,6 +432,8 @@ def parse_file_header(self, file_path, auto_cache=False, verbose=False): reader = badc_csv elif ext in ("yml"): reader = yml + elif ext.lower() in IMAGE_EXTENSIONS: + reader = image else: raise Exception(f"No known reader for file with extension: {ext}") diff --git a/checksit/generic.py b/checksit/generic.py index 8fa6296e..421ebabd 100644 --- a/checksit/generic.py +++ b/checksit/generic.py @@ -99,7 +99,7 @@ def check_global_attrs(dct, defined_attrs=None, vocab_attrs=None, regex_attrs=No errors.append(f"[global-attributes:**************:{attr}]: No value defined for attribute '{attr}'.") else: errors.extend(vocabs.check(vocab_attrs[attr], dct["global_attributes"].get(attr), label=f"[global-attributes:******:{attr}]***")) - + for attr in regex_attrs: if attr not in dct['global_attributes']: errors.append( @@ -123,7 +123,9 @@ def check_global_attrs(dct, defined_attrs=None, vocab_attrs=None, regex_attrs=No elif is_undefined(dct['global_attributes'].get(attr)): errors.append(f"[global-attributes:**************:{attr}]: No value defined for attribute '{attr}'.") else: - errors.extend(rules.check(rules_attrs[attr], dct['global_attributes'].get(attr), label=f"[global-attributes:******:{attr}]***")) + rules_check_output = rules.check(rules_attrs[attr], dct['global_attributes'].get(attr), context=dct['inpt'], label=f"[global-attributes:******:{attr}]***") + warnings.extend(rules_check_output[1]) + errors.extend(rules_check_output[0]) return errors, warnings diff --git a/checksit/readers/cdl.py b/checksit/readers/cdl.py index 1df5ce93..a040e409 100644 --- a/checksit/readers/cdl.py +++ b/checksit/readers/cdl.py @@ -2,6 +2,7 @@ import re import yaml import subprocess as sp +import sys from ..cvs import vocabs, vocabs_prefix @@ -40,7 +41,8 @@ def _parse(self, inpt): for s in self.CDL_SPLITTERS: if s not in cdl_lines: - raise Exception(f"Invalid file or CDL contents provided: '{inpt[:100]}...'") + print(f"Please check your command - invalid file or CDL contents provided: '{inpt[:100]}...'") + sys.exit(1) sections = self._get_sections(cdl_lines, split_patterns=self.CDL_SPLITTERS, start_at=1) @@ -188,7 +190,8 @@ def to_yaml(self): def to_dict(self): return {"dimensions": self.dimensions, "variables": self.variables, - "global_attributes": self.global_attrs} + "global_attributes": self.global_attrs, + "inpt": self.inpt} def read(fpath, verbose=False): diff --git a/checksit/readers/image.py b/checksit/readers/image.py new file mode 100644 index 00000000..5052347e --- /dev/null +++ b/checksit/readers/image.py @@ -0,0 +1,60 @@ +import subprocess as sp +import yaml + +def get_output(cmd): + subp = sp.Popen(cmd, shell=True, stdout=sp.PIPE, stderr=sp.PIPE) + return subp.stdout.read().decode("charmap"), subp.stderr.read().decode("charmap") + + +class ImageParser: + + def __init__(self, inpt, verbose=False): + self.inpt = inpt + self.verbose = verbose + self.base_exiftool_arguments = ["exiftool", "-G1", "-j", "-c", "%+.6f"] + self._find_exiftool() + self._parse(inpt) + + def _parse(self, inpt): + if self.verbose: print(f"[INFO] Parsing input: {inpt[:100]}...") + self.global_attrs = {} + exiftool_arguments = self.base_exiftool_arguments + [inpt] + exiftool_return_string = sp.check_output(exiftool_arguments) + raw_global_attrs = yaml.load(exiftool_return_string, Loader=yaml.SafeLoader)[0] + for tag_name in raw_global_attrs.keys(): + value_type = type(raw_global_attrs[tag_name]) + if value_type == list: + self.global_attrs[tag_name] = str(raw_global_attrs[tag_name][0]) + else: + self.global_attrs[tag_name] = str(raw_global_attrs[tag_name]) + + def _find_exiftool(self): + if self.verbose: print("[INFO] Searching for exiftool...") + which_output, which_error = get_output("which exiftool") + if which_error.startswith("which: no exiftool in"): + msg = ( + f"'exiftool' required to read image file metadata but cannot be found.\n" + f" Visit https://exiftool.org/ for information on 'exiftool'." + ) + raise RuntimeError(msg) + else: + self.exiftool_location = which_output.strip() + if self.verbose: print(f"[INFO] Found exiftool at {self.exiftool_location}.") + + def _attrs_dict(self,content_lines): + attr_dict = {} + for line in content_lines: + if self.verbose: print(f"WORKING ON LINE: {line}") + key_0 = line.split("=",1)[0].strip() + key = key_0[1:] #removes first character - unwanted quotation marks + value = line.split("=",1)[1].strip() + attr_dict[key] = value + return attr_dict + + def to_dict(self): + return {"global_attributes": self.global_attrs, "inpt": self.inpt} + + +def read(fpath, verbose=False): + return ImageParser(fpath, verbose=verbose) + diff --git a/checksit/rules/rule_funcs.py b/checksit/rules/rule_funcs.py index 43a99355..99237cae 100644 --- a/checksit/rules/rule_funcs.py +++ b/checksit/rules/rule_funcs.py @@ -1,5 +1,8 @@ import os import re +from datetime import datetime +import requests +from urllib.request import urlopen from . import processors from ..config import get_config @@ -82,3 +85,171 @@ def string_of_length(value, context, extras=None, label=""): errors.append(f"{label} '{value}' must be exactly {min_length} characters") return errors + + +def validate_image_date_time(value, context, extras=None, label=""): + """ + A function to indifity if a date-time value is compatible with the NCAS image standard + """ + errors = [] + + try: + if value != datetime.strptime(value, "%Y:%m:%d %H:%M:%S").strftime("%Y:%m:%d %H:%M:%S") and value != datetime.strptime(value, "%Y:%m:%d #%H:%M:%S.%f").strftime("%Y:%m:%d %H:%M:%S.%f"): + errors.append(f"{label} '{value}' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s") + except ValueError: + errors.append(f"{label} '{value}' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s") + + return errors + + +def validate_orcid_ID(value, context, extras=None, label=""): + """ + A function to verify the format of an orcid ID + """ + orcid_string = "https://orcid.org/" # required format of start of the string + + errors = [] + + PI_orcid_digits = value[-19:] + PI_orcid_digits_only = PI_orcid_digits.replace("-", "") + + # Check that total the length is correct + if len(value) != 37: + errors.append(f"{label} '{value}' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX") + + # Check the start of the string (first 18 characters) + elif (value[0:18] != orcid_string or + + # Check that the "-" are in the correct places + value[22] != "-" or + value[27] != "-" or + value[32] != "-" or + + # Check that the last characters contain only "-" and digits + not PI_orcid_digits_only.isdigit): + + errors.append(f"{label} '{value}' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX") + + return errors + + +def list_of_names(value, context, extras=None, label=""): + """ + A function to verify the names of people when a list of names may be provided + """ + name_pattern = r'(.)+, (.)+ ?((.)+|((.)\.))' # The format names should be written in + character_name_pattern = r'[A-Za-z_À-ÿ\-\'\ \.\,]+' + + warnings = [] + + if type(value) == list: + for i in value: + if not re.fullmatch(name_pattern, i): + warnings.append(f"{label} '{value}' should be of the format , or , where appropriate") + if not re.fullmatch(character_name_pattern, i): + warnings.append(f"{label} '{value}' - please use characters A-Z, a-z, À-ÿ where appropriate") + + if type(value) == str: + if not re.fullmatch(name_pattern, value): + warnings.append(f"{label} '{value}' should be of the format , or , where appropriate") + if not re.fullmatch(character_name_pattern, value): + warnings.append(f"{label} '{value}' - please use characters A-Z, a-z, À-ÿ where appropriate") + + return warnings + + +def headline(value, context, extras=None, label=""): + """ + A function to verify the format of the Headline + """ + warnings = [] + + if len(value) > 150: + warnings.append(f"{label} '{value}' should contain no more than one sentence") + + if value.count(".") >= 2: + warnings.append(f"{label} '{value}' should contain no more than one sentence") + + if not value[0].isupper(): + warnings.append(f"{label} '{value}' should start with a capital letter") + + if len(value) < 10: + warnings.append(f"{label} '{value}' should be at least 10 characters") + + return warnings + + +def title_check(value, context, extras=None, label=""): + """ + A function to check if the title matches the system filename + """ + errors = [] + + if value != os.path.basename(context) : + errors.append(f"{label} '{value}' must match the name of the file") + + return errors + + +def url_checker(value, context, extras=None, label=""): + """ + A function to check if the url exists + """ + warnings = [] + + try: url=urlopen(value) + except: + warnings.append(f"{label} '{value}' is not a reachable url") + else: + if url.getcode() != 200: # (200 means it exists and is up and reachable) + warnings.append(f"{label} '{value}' is not a reachable url") + finally: + return warnings + + +def relation_url_checker(value, context, extras=None, label=""): + """ + A function to check if Relation field is in the correct format, and that the url exists + """ + errors = [] + + if " " not in value: + errors.append(f"{label} '{value}' should contain a space before the url") + else: + relation_url = value.partition(" ")[2] # extract only the url part of the relation string + if url_checker(relation_url, context, extras, label) != []: + errors.append(url_checker(relation_url, context, extras, label)) # check the url exists using the url_checker() function defined above + + return errors + + +def latitude(value, context, extras=None, label=""): + """ + A function to check if the latitude is within -90 and +90 + """ + errors = [] + + latitude = re.findall(r'[0-9]+', value) + int_latitude = int(latitude[0]) + dec_latitude = int(latitude[1]) + + if int_latitude > 90 or (int_latitude == 90 and dec_latitude > 0): + errors.append(f"{label} '{value}' must be within -90 and +90 ") + + return errors + + +def longitude(value, context, extras=None, label=""): + """ + A function to check if the longitude is within -180 and +180 + """ + errors = [] + + longitude = re.findall(r'[0-9]+', value) + int_longitude = int(longitude[0]) + dec_longitude = int(longitude[1]) + + if int_longitude > 180 or (int_longitude == 180 and dec_longitude > 0): + errors.append(f"{label} '{value}' must be within -180 and +180 ") + + return errors diff --git a/checksit/rules/rules.py b/checksit/rules/rules.py index 12513b1f..e92f1336 100644 --- a/checksit/rules/rules.py +++ b/checksit/rules/rules.py @@ -28,7 +28,17 @@ def __init__(self): "datetime": r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?", "datetime-or-na": r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?)|" + _NOT_APPLICABLE_RULES, - "number": r"-?\d+(\.\d+)?" + "number": r"-?\d+(\.\d+)?", + "location": r'(.)+(\,\ )(.)+', + "latitude-image": r'[\+|\-]?[0-9]{1,2}\.[0-9]{0,6}', + "longitude-image": r'[\+|\-]?1?[0-9]{1,2}\.[0-9]{0,6}', + "title": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?_(.)+_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)', + "title-data-product": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?_(plot|photo)((.)+)?_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)', + "name-format": r'(.)+, (.)+ ?((.)+|((.)\.))', + "name-characters": r'[A-Za-z_À-ÿ\-\'\ \.\,]+', + "altitude-image-warning": r'-?\d+\sm', # should be integers only for images + "altitude-image": r'-?\d+(\.\d+)?\sm', + "ncas-email": r'[^@\s]+@ncas.ac.uk' } def _map_type_rule(self, type_rule): @@ -48,42 +58,70 @@ def check(self, rule_lookup, value, context=None, label=""): # Return a list of errors - empty list if no errors errors = [] + warnings = [] rule_lookup = re.sub(f"^{rules_prefix}:", "", rule_lookup) - if rule_lookup.startswith("rule-func:"): - rule_comps = rule_lookup.split(":") - rule_func = getattr(rule_funcs, rule_comps[1].replace("-", "_")) - extras = rule_comps[2:] - errors.extend(rule_func(value, context, extras, label=label)) + rule_lookup_list = rule_lookup.split(", ") - elif rule_lookup.startswith("type-rule"): - type_rule = rule_lookup.split(":")[1] + for i in rule_lookup_list: - if not isinstance(value, self._map_type_rule(type_rule)): - errors.append(f"{label} Value '{value}' is not of required type: '{type_rule}'.") - - elif rule_lookup.startswith("regex:"): - pattern = ':'.join(rule_lookup.split(":")[1:]) # in case pattern has colons in it, e.g. a URL - if not re.match(f"^{pattern}$", value): - errors.append(f"{label} Value '{value}' does not match regular expression: '{pattern}'.") + if i.startswith("rule-func:"): + rule_comps = i.split(":") + rule_func = getattr(rule_funcs, rule_comps[1].replace("-", "_")) + extras = rule_comps[2:] + errors.extend(rule_func(value, context, extras, label=label)) - elif rule_lookup.startswith("regex-rule:"): - regex_rule = rule_lookup.split(":", 1)[1] + elif i.startswith("rule-func-warning:"): + rule_comps = i.split(":") + rule_func = getattr(rule_funcs, rule_comps[1].replace("-", "_")) + extras = rule_comps[2:] + warnings.extend(rule_func(value, context, extras, label=label)) - if regex_rule in self.static_regex_rules: - pattern = self.static_regex_rules[regex_rule] + elif i.startswith("type-rule"): + type_rule = i.split(":")[1] - if not re.match("^" + pattern + "$", value): - errors.append(f"{label} Value '{value}' does not match regex rule: '{regex_rule}'.") + if not isinstance(value, self._map_type_rule(type_rule)): + errors.append(f"{label} Value '{value}' is not of required type: '{type_rule}'.") + + elif i.startswith("regex-warning:"): + pattern = ':'.join(i.split(":")[1:]) # in case pattern has colons in it, e.g. a URL + if not re.match(f"^{pattern}$", value): + warnings.append(f"{label} Value '{value}' does not match regular expression: '{pattern}'.") + elif i.startswith("regex:"): + pattern = ':'.join(i.split(":")[1:]) # in case pattern has colons in it, e.g. a URL + if not re.match(f"^{pattern}$", value): + errors.append(f"{label} Value '{value}' does not match regular expression: '{pattern}'.") + + elif i.startswith("regex-rule-warning:"): + regex_rule = i.split(":", 1)[1] + + if regex_rule in self.static_regex_rules: + pattern = self.static_regex_rules[regex_rule] + + if not re.match("^" + pattern + "$", value): + warnings.append(f"{label} Value '{value}' does not match regex rule: '{regex_rule}'.") + + else: + raise Exception(f"Rule not found with rule ID: {rule_lookup}.") + + elif i.startswith("regex-rule:"): + regex_rule = i.split(":", 1)[1] + + if regex_rule in self.static_regex_rules: + pattern = self.static_regex_rules[regex_rule] + + if not re.match("^" + pattern + "$", value): + errors.append(f"{label} Value '{value}' does not match regex rule: '{regex_rule}'.") + + else: + raise Exception(f"Rule not found with rule ID: {rule_lookup}.") + else: raise Exception(f"Rule not found with rule ID: {rule_lookup}.") - else: - raise Exception(f"Rule not found with rule ID: {rule_lookup}.") - - return errors + return errors, warnings rules = Rules() diff --git a/checksit/specs.py b/checksit/specs.py index 614ea628..562e80aa 100644 --- a/checksit/specs.py +++ b/checksit/specs.py @@ -6,7 +6,6 @@ from .config import get_config - conf = get_config() specs_dir = os.path.join(conf["settings"].get("specs_dir", "./specs"), "groups") diff --git a/requirements.txt b/requirements.txt index af11ae6a..da22c4ea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ pyyaml cf-python netcdf4 pandas +requests diff --git a/specs/groups/ncas-image-v1.0/amof-image-global-attrs.yml b/specs/groups/ncas-image-v1.0/amof-image-global-attrs.yml new file mode 100644 index 00000000..12c41ace --- /dev/null +++ b/specs/groups/ncas-image-v1.0/amof-image-global-attrs.yml @@ -0,0 +1,26 @@ +required-global-attrs: + func: checksit.generic.check_global_attrs + params: + rules_attrs: + # Version of the standard being followed + XMP-photoshop:Instructions: regex:Metadata\sfollow\sthe\sNational\sCentre\sfor\sAtmospheric\sScience\sImage\sMetadata\sStandard\sv1.0:\shttps://doi.org/10.5281/zenodo.6368295 + # Description of what is shown in the image + XMP-photoshop:Headline: rule-func-warning:headline + XMP-dc:Description: rule-func:string-of-length:10+ + # Date-time details + XMP-xmp:CreateDate: rule-func:validate-image-date-time + XMP-xmp:MetadataDate: rule-func:validate-image-date-time + # Licensing and acknowledgement details + XMP-dc:Rights: regex-warning:Use\sof\sthis\simage\sis\scovered\sby\sthe\s\(UK\)\sOpen\sGovernment\sLicence,\shttp://www.nationalarchives.gov.uk/doc/open-government-licence/ + XMP-xmpRights:WebStatement: regex-warning:http://www.nationalarchives.gov.uk/doc/open-government-licence/ + XMP-photoshop:Credit: regex-warning:National\sCentre\sfor\sAtmospheric\sScience\s\(NCAS\) + # Source of the image + XMP-dc:Title: regex-rule:title, regex-rule-warning:title-data-product, rule-func:title_check + # Instrument Scientist’s Details + XMP-dc:Creator: rule-func-warning:list_of_names + XMP-iptcCore:CreatorWorkEmail: regex-rule-warning:ncas-email, regex-rule:valid-email + XMP-iptcExt:CreatorIdentifier: rule-func:validate_orcid_ID, rule-func-warning:url_checker + # Principal Investigator’s Details + XMP-iptcExt:ContributorName: regex-rule-warning:name-format, regex-rule-warning:name-characters + XMP-iptcExt:ContributorIdentifier: rule-func:validate_orcid_ID, rule-func-warning:url_checker + XMP-iptcExt:ContributorRole: regex:Project\sPrincipal\sInvestigator \ No newline at end of file diff --git a/specs/groups/ncas-image-v1.0/amof-photo.yml b/specs/groups/ncas-image-v1.0/amof-photo.yml new file mode 100644 index 00000000..de31d3e2 --- /dev/null +++ b/specs/groups/ncas-image-v1.0/amof-photo.yml @@ -0,0 +1,12 @@ +required-global-attrs: + func: checksit.generic.check_global_attrs + params: + rules_attrs: + # Location of a camera or an instrument (photo) + XMP-iptcExt:LocationCreatedLocationName: regex-rule-warning:location + XMP-iptcExt:LocationCreatedGPSAltitude: regex-rule-warning:altitude-image-warning, regex-rule:altitude-image + XMP-iptcExt:LocationCreatedGPSLatitude: regex-rule:latitude-image, rule-func:latitude + XMP-iptcExt:LocationCreatedGPSLongitude: regex-rule:longitude-image, rule-func:longitude + # (Relation for photos) + XMP-dc:Relation: regex-warning:(((i|I)sPartOf\shttps://catalogue.ceda.ac.uk/uuid/)|((i|I)sFormatOf\shttps://catalogue.ceda.ac.uk/uuid/))[a-z0-9]{32} + XMP-dc:Relation: rule-func-warning:relation_url_checker diff --git a/specs/groups/ncas-image-v1.0/amof-plot.yml b/specs/groups/ncas-image-v1.0/amof-plot.yml new file mode 100644 index 00000000..935f5fff --- /dev/null +++ b/specs/groups/ncas-image-v1.0/amof-plot.yml @@ -0,0 +1,15 @@ +required-global-attrs: + func: checksit.generic.check_global_attrs + params: + rules_attrs: + # Location of a camera or an instrument (plots) + XMP-iptcExt:LocationShownLocationName: regex-rule-warning:location + XMP-iptcExt:LocationShownGPSAltitude: regex-rule-warning:altitude-image-warning, regex-rule:altitude-image + XMP-iptcExt:LocationShownGPSLatitude: regex-rule:latitude-image, rule-func:latitude + XMP-iptcExt:LocationShownGPSLongitude: regex-rule:longitude-image, rule-func:longitude + # (For plots only) + XMP-iptcExt:TemporalCoverageFrom: rule-func:validate-image-date-time + XMP-iptcExt:TemporalCoverageTo: rule-func:validate-image-date-time + # (Relation for plots) + XMP-dc:Relation: regex-warning:((i|I)sBasedOn\shttps://catalogue.ceda.ac.uk/uuid/)[a-z0-9]{32} + XMP-dc:Relation: rule-func-warning:relation_url_checker diff --git a/tests/test_images.py b/tests/test_images.py new file mode 100644 index 00000000..5c41f5d4 --- /dev/null +++ b/tests/test_images.py @@ -0,0 +1,80 @@ +import pytest +from click.testing import CliRunner +from checksit import cli + + +# photos from ncas named instruments +@pytest.mark.parametrize( + "photo, error_level, number_errors", + [ + ("ncas-cam-9_cao_20160510-134927_photo_v1.0.jpg", "NONE", 0), + ("ncas-cam-9_cao_20160510-134927_photo_test-2_v1.0.jpg", "ERROR", 1), + ("ncas-cam-9_cao_20160510-134927_photo_test-1_v1.0.jpg", "WARNING", 3), + ("ncas-cam-9_cao_20160510-134927_photo_test-3_v1.0.jpg", "ERROR", 1), + ("ncas-cam-9_cao_20160510-134927_photo_test-4_v1.0.jpg", "ERROR", 1), + ("ncas-cam-9_cao_20160510-134927_photo_test-5_v1.0.jpg", "ERROR", 2), + ("ncas-cam-9_cao_20160510-134927_photo_test-6_v1.0.jpg", "ERROR", 3), + ("ncas-cam-9_cao_20160510-134927_photo_test-7_v1.0.jpg", "ERROR", 1), + ], +) +def test_ncas_photo_checks(photo, error_level, number_errors): + runner = CliRunner() + photo_file = f"tests/test_images/{photo}" + result = runner.invoke(cli.check, ["-p", "-l", "compact", photo_file]) + level_found, errors_found = [i.strip() for i in result.output.split("|")[2:4]] + errors_found = int(errors_found) + assert error_level == level_found + assert number_errors == errors_found + + +# plots from non-ncas named instruments +@pytest.mark.parametrize( + "plot, error_level, number_errors", + [ + ("nerc-mstrf-radar-mst_capel-dewi_20230809_st300_wind.png", "ERROR", 1), + ("radar-mst_capel-dewi_20230706_m300.png", "ERROR", 1), + ("nerc-mstrf-met-sensors_capel-dewi_20230101_campbell-sci.png", "ERROR", 1), + ("nerc-mstrf-met-sensors_capel-dewi_20160906_campbell-sci.png", "ERROR", 20), + ("wind-sensors_frongoch_20090203.png", "ERROR", 13), + ("nerc-mstrf-radar-mst_capel-dewi_20230809_st300_wind_test-1_v1.0.png", "WARNING", 2), + ("nerc-mstrf-radar-mst_capel-dewi_20230809_plot_st300_wind_test-2_v1.0.png", "NONE", 0), + ("nerc-mstrf-radar-mst_capel-dewi_20230809_plot_st300_wind_test-3_v1.0.png", "ERROR", 1), + ("nerc-mstrf-radar-mst_capel-dewi_20230809_plot_st300_wind_test-4_v1.0.png", "ERROR", 1), + ] +) +def test_other_plot_checks(plot, error_level, number_errors): + runner = CliRunner() + specs = "ncas-image-v1.0/amof-image-global-attrs,ncas-image-v1.0/amof-plot" + result = runner.invoke(cli.check, ["-p", "-l", "compact", "-t", "off", "--specs", specs, f"tests/test_images/{plot}"]) + level_found, errors_found = [i.strip() for i in result.output.split("|")[2:4]] + errors_found = int(errors_found) + assert error_level == level_found + assert number_errors == errors_found + + +# check error messages +@pytest.mark.parametrize( + "plot, error_message", + [ + ("nerc-mstrf-radar-mst_capel-dewi_20230809_st300_wind.png", + ( + "[global-attributes:******:XMP-dc:Title]*** Value 'nerc-mstrf-radar-mst_capel-dewi_20230809_st300_wind.png'" + " does not match regex rule: 'title'." + ), + ), + ("nerc-mstrf-met-sensors_capel-dewi_20160906_campbell-sci.png", + ( + "[global-attributes:**************:XMP-photoshop:Instructions]: " + "Attribute 'XMP-photoshop:Instructions' does not exist." + ), + ), + ] +) +def test_check_errors(plot, error_message): + runner = CliRunner() + specs = "ncas-image-v1.0/amof-image-global-attrs,ncas-image-v1.0/amof-plot" + result = runner.invoke(cli.check, ["-p", "-l", "compact", "-t", "off", "--specs", specs, f"tests/test_images/{plot}"]) + message_found = result.output.split("|")[4].strip() + assert error_message == message_found + + diff --git a/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_test-1_v1.0.jpg b/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_test-1_v1.0.jpg new file mode 100644 index 00000000..b9f33c9c Binary files /dev/null and b/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_test-1_v1.0.jpg differ diff --git a/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_test-2_v1.0.jpg b/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_test-2_v1.0.jpg new file mode 100644 index 00000000..b9f33c9c Binary files /dev/null and b/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_test-2_v1.0.jpg differ diff --git a/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_test-3_v1.0.jpg b/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_test-3_v1.0.jpg new file mode 100644 index 00000000..2823c4d9 Binary files /dev/null and b/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_test-3_v1.0.jpg differ diff --git a/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_test-4_v1.0.jpg b/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_test-4_v1.0.jpg new file mode 100644 index 00000000..4a80f1f3 Binary files /dev/null and b/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_test-4_v1.0.jpg differ diff --git a/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_test-5_v1.0.jpg b/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_test-5_v1.0.jpg new file mode 100644 index 00000000..abea190e Binary files /dev/null and b/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_test-5_v1.0.jpg differ diff --git a/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_test-6_v1.0.jpg b/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_test-6_v1.0.jpg new file mode 100644 index 00000000..4981f2eb Binary files /dev/null and b/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_test-6_v1.0.jpg differ diff --git a/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_test-7_v1.0.jpg b/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_test-7_v1.0.jpg new file mode 100644 index 00000000..256b5f55 Binary files /dev/null and b/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_test-7_v1.0.jpg differ diff --git a/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_v1.0.jpg b/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_v1.0.jpg new file mode 100644 index 00000000..1774119d Binary files /dev/null and b/tests/test_images/ncas-cam-9_cao_20160510-134927_photo_v1.0.jpg differ diff --git a/tests/test_images/nerc-mstrf-met-sensors_capel-dewi_20160906_campbell-sci.png b/tests/test_images/nerc-mstrf-met-sensors_capel-dewi_20160906_campbell-sci.png new file mode 100644 index 00000000..966b8890 Binary files /dev/null and b/tests/test_images/nerc-mstrf-met-sensors_capel-dewi_20160906_campbell-sci.png differ diff --git a/tests/test_images/nerc-mstrf-met-sensors_capel-dewi_20230101_campbell-sci.png b/tests/test_images/nerc-mstrf-met-sensors_capel-dewi_20230101_campbell-sci.png new file mode 100644 index 00000000..d585fdb9 Binary files /dev/null and b/tests/test_images/nerc-mstrf-met-sensors_capel-dewi_20230101_campbell-sci.png differ diff --git a/tests/test_images/nerc-mstrf-radar-mst_capel-dewi_20230809_plot_st300_wind_test-2_v1.0.png b/tests/test_images/nerc-mstrf-radar-mst_capel-dewi_20230809_plot_st300_wind_test-2_v1.0.png new file mode 100644 index 00000000..c832367b Binary files /dev/null and b/tests/test_images/nerc-mstrf-radar-mst_capel-dewi_20230809_plot_st300_wind_test-2_v1.0.png differ diff --git a/tests/test_images/nerc-mstrf-radar-mst_capel-dewi_20230809_plot_st300_wind_test-3_v1.0.png b/tests/test_images/nerc-mstrf-radar-mst_capel-dewi_20230809_plot_st300_wind_test-3_v1.0.png new file mode 100644 index 00000000..8e7f5222 Binary files /dev/null and b/tests/test_images/nerc-mstrf-radar-mst_capel-dewi_20230809_plot_st300_wind_test-3_v1.0.png differ diff --git a/tests/test_images/nerc-mstrf-radar-mst_capel-dewi_20230809_plot_st300_wind_test-4_v1.0.png b/tests/test_images/nerc-mstrf-radar-mst_capel-dewi_20230809_plot_st300_wind_test-4_v1.0.png new file mode 100644 index 00000000..10ea004d Binary files /dev/null and b/tests/test_images/nerc-mstrf-radar-mst_capel-dewi_20230809_plot_st300_wind_test-4_v1.0.png differ diff --git a/tests/test_images/nerc-mstrf-radar-mst_capel-dewi_20230809_st300_wind.png b/tests/test_images/nerc-mstrf-radar-mst_capel-dewi_20230809_st300_wind.png new file mode 100644 index 00000000..cb68444f Binary files /dev/null and b/tests/test_images/nerc-mstrf-radar-mst_capel-dewi_20230809_st300_wind.png differ diff --git a/tests/test_images/nerc-mstrf-radar-mst_capel-dewi_20230809_st300_wind_test-1_v1.0.png b/tests/test_images/nerc-mstrf-radar-mst_capel-dewi_20230809_st300_wind_test-1_v1.0.png new file mode 100644 index 00000000..3e307d12 Binary files /dev/null and b/tests/test_images/nerc-mstrf-radar-mst_capel-dewi_20230809_st300_wind_test-1_v1.0.png differ diff --git a/tests/test_images/radar-mst_capel-dewi_20230706_m300.png b/tests/test_images/radar-mst_capel-dewi_20230706_m300.png new file mode 100644 index 00000000..225e9fe0 Binary files /dev/null and b/tests/test_images/radar-mst_capel-dewi_20230706_m300.png differ diff --git a/tests/test_images/wind-sensors_frongoch_20090203.png b/tests/test_images/wind-sensors_frongoch_20090203.png new file mode 100644 index 00000000..62025c26 Binary files /dev/null and b/tests/test_images/wind-sensors_frongoch_20090203.png differ diff --git a/tests/test_rules.py b/tests/test_rules.py index 6faab9b7..3d45c10e 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -13,49 +13,50 @@ def test_match_file_name(): assert len(match_file_name(value, context, ["lowercase", "no_extension"])) == 0 def _test_type(_type, value): - return len(r.check(f"type-rule:{_type}", value)) + return r.check(f"type-rule:{_type}", value) def test_type_rules(): tt = _test_type _type = "number" for value in 3.4, -4: - assert tt(_type, value) == 0 + assert tt(_type, value) == ([], []) + print('sarah test', tt(_type, value)) for value in "3", "3.4", ["hi"]: - assert tt(_type, value) == 1 + assert tt(_type, value) != ([], []) _type = "float" for value in [3.4]: - assert tt(_type, value) == 0 + assert tt(_type, value) == ([], []) for value in "3", 3, ["hi"]: - assert tt(_type, value) == 1 + assert tt(_type, value) != ([], []) _type = "integer" for value in [3]: - assert tt(_type, value) == 0 + assert tt(_type, value) == ([], []) for value in "3", 3.5, ["hi"]: - assert tt(_type, value) == 1 + assert tt(_type, value) != ([], []) _type = "string" for value in "3", "hi": - assert tt(_type, value) == 0 + assert tt(_type, value) == ([], []) for value in 3, 4.5, ["hi"]: - assert tt(_type, value) == 1 + assert tt(_type, value) != ([], []) def test_regex_rules(): rule = "regex-rule:integer" - assert r.check(rule, "-1") == [] - assert r.check(rule, "500") == [] - assert r.check(rule, "1.3") != [] + assert r.check(rule, "-1") == ([], []) + assert r.check(rule, "500") == ([], []) + assert r.check(rule, "1.3") != ([], []) rule = "regex-rule:valid-email" - assert r.check(rule, "freda.bloggs@amail.com") == [] - assert r.check(rule, "@amail.com") != [] - assert r.check(rule, "freda.bloggs@") != [] + assert r.check(rule, "freda.bloggs@amail.com") == ([], []) + assert r.check(rule, "@amail.com") != ([], []) + assert r.check(rule, "freda.bloggs@") != ([], []) #TODO: Add checks for all the published rules #TODO: Add checks for some regular expressions to check they are executed correctly