Skip to content

Commit

Permalink
Merge pull request #42 from cedadev/additional_tests
Browse files Browse the repository at this point in the history
Additonal tests
  • Loading branch information
joshua-hampton authored Feb 22, 2024
2 parents 28b32af + b05a001 commit f68c7fa
Show file tree
Hide file tree
Showing 18 changed files with 1,207 additions and 101 deletions.
8 changes: 3 additions & 5 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,17 @@ jobs:
./configure
make all install
ln -sf /opt/lib/* $LD_LIBRARY_PATH
- name: Install exiftool
- name: Install exiftool and netcdf
run: |
sudo apt install libimage-exiftool-perl -y
sudo apt-get install -y netcdf-bin
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 black pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi
- name: Look for exiftool
run: |
which exiftool
- name: Test with pytest
run: |
export UDUNITS2_XML_PATH=/opt/share/udunits/udunits2.xml
python -m pytest -v tests/test_readers.py tests/test_images.py
python -m pytest -v tests/test_readers.py tests/test_rules.py tests/test_show_specs.py tests/test_images.py tests/test_generic.py tests/test_utils.py tests/test_ncas_file_proc.py tests/test_cvs.py
4 changes: 2 additions & 2 deletions checksit/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,8 @@ def check_var(dct, variable, defined_attrs, skip_spellcheck=False):
attr_value = np.array(attr_value, dtype=np.int8)
if not np.all(dct["variables"][variable].get(attr_key) == attr_value):
errors.append(
f"[variable**************:{variable}]: Attribute '{attr_key}' must have definition {attr_value}, "
f"not {dct['variables'][variable].get(attr_key) if skip_spellcheck else ''}."
f"[variable**************:{variable}]: Attribute '{attr_key}' must have definition '{attr_value}', "
f"not '{dct['variables'][variable].get(attr_key)}'."
)
#elif attr_key == 'flag_meanings':
# print(attr_value)
Expand Down
47 changes: 30 additions & 17 deletions checksit/rules/rule_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ def match_file_name(value, context, extras=None, label=""):

def match_one_of(value, context, extras=None, label=""):
"""
Matches only one of...
value matches one of options defined in extras
default rule splitter is '|' and defined in checksit.ini file
"""
options = [x.strip() for x in extras[0].split(rule_splitter)]
errors = []
Expand All @@ -55,7 +56,7 @@ def match_one_of(value, context, extras=None, label=""):

def match_one_or_more_of(value, context, extras=None, label=""):
"""
Matches one of more of...
String value or list value must match one of more of list given in extras
"""
def as_set(x, sep): return set([i.strip() for i in x.split(sep)])
options = as_set(extras[0], rule_splitter)
Expand Down Expand Up @@ -93,10 +94,15 @@ def validate_image_date_time(value, context, extras=None, label=""):
"""
errors = []

try:
if value != datetime.strptime(value, "%Y:%m:%d %H:%M:%S").strftime("%Y:%m:%d %H:%M:%S") and value != datetime.strptime(value, "%Y:%m:%d #%H:%M:%S.%f").strftime("%Y:%m:%d %H:%M:%S.%f"):
errors.append(f"{label} '{value}' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s")
except ValueError:
match = False
for f in ["%Y:%m:%d %H:%M:%S", "%Y:%m:%d %H:%M:%S.%f"]:
if match == False:
try:
match = (value == datetime.strptime(value, f).strftime(f))
except ValueError:
pass

if not match:
errors.append(f"{label} '{value}' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s")

return errors
Expand Down Expand Up @@ -125,8 +131,11 @@ def validate_orcid_ID(value, context, extras=None, label=""):
value[27] != "-" or
value[32] != "-" or

# Check that the last characters contain only "-" and digits
not PI_orcid_digits_only.isdigit):
# Check that the last characters contain only "-" and digits (plus 'X' for last digit)
not (
PI_orcid_digits_only.isdigit() or (PI_orcid_digits_only[0:15].isdigit() and PI_orcid_digits_only[15] == "X")
)
):

errors.append(f"{label} '{value}' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX")

Expand Down Expand Up @@ -164,17 +173,21 @@ def headline(value, context, extras=None, label=""):
"""
warnings = []

if len(value) > 150:
warnings.append(f"{label} '{value}' should contain no more than one sentence")
if value == "":
warnings.append(f"{label} '{value}' should not be empty")

else:
if len(value) > 150:
warnings.append(f"{label} '{value}' should contain no more than one sentence")

if value.count(".") >= 2:
warnings.append(f"{label} '{value}' should contain no more than one sentence")
if value.count(".") >= 2:
warnings.append(f"{label} '{value}' should contain no more than one sentence")

if not value[0].isupper():
warnings.append(f"{label} '{value}' should start with a capital letter")
if not value[0].isupper():
warnings.append(f"{label} '{value}' should start with a capital letter")

if len(value) < 10:
warnings.append(f"{label} '{value}' should be at least 10 characters")
if len(value) < 10:
warnings.append(f"{label} '{value}' should be at least 10 characters")

return warnings

Expand Down Expand Up @@ -218,7 +231,7 @@ def relation_url_checker(value, context, extras=None, label=""):
else:
relation_url = value.partition(" ")[2] # extract only the url part of the relation string
if url_checker(relation_url, context, extras, label) != []:
errors.append(url_checker(relation_url, context, extras, label)) # check the url exists using the url_checker() function defined above
errors.extend(url_checker(relation_url, context, extras, label)) # check the url exists using the url_checker() function defined above

return errors

Expand Down
54 changes: 18 additions & 36 deletions checksit/rules/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def __init__(self):
"integer": r"-?\d+",
"valid-email": r"[^@\s]+@[^@\s]+\.[^\s@]+",
"valid-url": r"https?://[^\s]+\.[^\s]*[^\s\.](/[^\s]+)?",
"valid-url-or-na": r"(https?://[^\s]+\.[^\s]*[^\s\.](/[^\s]+))|" + _NOT_APPLICABLE_RULES,
"valid-url-or-na": r"(https?://[^\s]+\.[^\s]*[^\s\.](/[^\s]+)?)|" + _NOT_APPLICABLE_RULES,
"match:vN.M": r"v\d\.\d",
"datetime": r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?",
"datetime-or-na":
Expand All @@ -32,9 +32,9 @@ def __init__(self):
"location": r'(.)+(\,\ )(.)+',
"latitude-image": r'[\+|\-]?[0-9]{1,2}\.[0-9]{0,6}',
"longitude-image": r'[\+|\-]?1?[0-9]{1,2}\.[0-9]{0,6}',
"title": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?_(.)+_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)',
"title": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?(_.+)?_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)',
"title-data-product": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?_(plot|photo)((.)+)?_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)',
"name-format": r'(.)+, (.)+ ?((.)+|((.)\.))',
"name-format": r'([^,])+, ([^,])+( ?[^,]+|((.)\.))',
"name-characters": r'[A-Za-z_À-ÿ\-\'\ \.\,]+',
"altitude-image-warning": r'-?\d+\sm', # should be integers only for images
"altitude-image": r'-?\d+(\.\d+)?\sm',
Expand Down Expand Up @@ -66,60 +66,42 @@ def check(self, rule_lookup, value, context=None, label=""):

for i in rule_lookup_list:

if i.startswith("rule-func:"):
rule_comps = i.split(":")
rule_func = getattr(rule_funcs, rule_comps[1].replace("-", "_"))
extras = rule_comps[2:]
errors.extend(rule_func(value, context, extras, label=label))
if i.split(":")[0].endswith("-warning"):
output = warnings
else:
output = errors

elif i.startswith("rule-func-warning:"):
if i.startswith("rule-func"):
rule_comps = i.split(":")
rule_func = getattr(rule_funcs, rule_comps[1].replace("-", "_"))
extras = rule_comps[2:]
warnings.extend(rule_func(value, context, extras, label=label))
output.extend(rule_func(value, context, extras, label=label))

elif i.startswith("type-rule"):
type_rule = i.split(":")[1]

if not isinstance(value, self._map_type_rule(type_rule)):
errors.append(f"{label} Value '{value}' is not of required type: '{type_rule}'.")

elif i.startswith("regex-warning:"):
pattern = ':'.join(i.split(":")[1:]) # in case pattern has colons in it, e.g. a URL
if not re.match(f"^{pattern}$", value):
warnings.append(f"{label} Value '{value}' does not match regular expression: '{pattern}'.")

elif i.startswith("regex:"):
pattern = ':'.join(i.split(":")[1:]) # in case pattern has colons in it, e.g. a URL
if not re.match(f"^{pattern}$", value):
errors.append(f"{label} Value '{value}' does not match regular expression: '{pattern}'.")
output.append(f"{label} Value '{value}' is not of required type: '{type_rule}'.")

elif i.startswith("regex-rule-warning:"):
elif i.startswith("regex-rule"):
regex_rule = i.split(":", 1)[1]

if regex_rule in self.static_regex_rules:
pattern = self.static_regex_rules[regex_rule]

if not re.match("^" + pattern + "$", value):
warnings.append(f"{label} Value '{value}' does not match regex rule: '{regex_rule}'.")
output.append(f"{label} Value '{value}' does not match regex rule: '{regex_rule}'.")

else:
raise Exception(f"Rule not found with rule ID: {rule_lookup}.")
raise Exception(f"Regex rule not found with rule ID: {i}.")

elif i.startswith("regex-rule:"):
regex_rule = i.split(":", 1)[1]

if regex_rule in self.static_regex_rules:
pattern = self.static_regex_rules[regex_rule]

if not re.match("^" + pattern + "$", value):
errors.append(f"{label} Value '{value}' does not match regex rule: '{regex_rule}'.")
elif i.startswith("regex"):
pattern = i.split(":", 1)[1] # in case pattern has colons in it, e.g. a URL
if not re.match(f"^{pattern}$", value):
output.append(f"{label} Value '{value}' does not match regular expression: '{pattern}'.")

else:
raise Exception(f"Rule not found with rule ID: {rule_lookup}.")

else:
raise Exception(f"Rule not found with rule ID: {rule_lookup}.")
raise Exception(f"Rule not found with rule ID: {i}.")

return errors, warnings

Expand Down
10 changes: 4 additions & 6 deletions checksit/specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,20 @@ def load_specs(spec_ids=None):
spec_files = [f"{specs_dir}/{spec_id}.yml" for spec_id in spec_ids] or \
glob.glob(f"{specs_dir}/*.yml")

return _parse_specs(spec_files)
return _parse_specs(spec_files)


def show_specs(spec_ids=None, verbose=False):

all_specs = load_specs(spec_ids)
spec_ids_names = tuple([(spec_id.split("/")[-1]) for spec_id in spec_ids])

if not spec_ids:
specs = all_specs
specs = all_specs.items()
else:
specs = [(spec_ids[spec_ids_names.index(spec_id)], spec) for (spec_id, spec) in all_specs.items() if spec_id in spec_ids_names]

print("Specifications:")
for spec_id, spec in specs:

print(f"\n{spec_id}:")
print(json.dumps(spec, indent=4).replace("\\\\", "\\"))

Expand Down Expand Up @@ -68,6 +66,6 @@ def run_checks(self, record, skip_spellcheck=False):
record, check_dict, skip_spellcheck=skip_spellcheck
)
errors.extend(check_errors)
warnings.extend(check_warnings)
warnings.extend(check_warnings)

return errors, warnings
10 changes: 10 additions & 0 deletions checksit/vocabs/tests/test_instruments.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"test_instruments": {
"inst1": {
"instrument_id": "inst1"
},
"inst2": {
"instrument_id": "inst2"
}
}
}
12 changes: 12 additions & 0 deletions checksit/vocabs/tests/test_platforms.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"test_platforms": {
"plat1": {
"platform_id": "plat1",
"description": "test platform 1"
},
"plat2": {
"platform_id": "plat2",
"description": "test platform 2"
}
}
}
6 changes: 6 additions & 0 deletions checksit/vocabs/tests/test_products.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"test_products": [
"prod1",
"prod2"
]
}
13 changes: 13 additions & 0 deletions specs/groups/tests/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
var-requires:
func: checksit.generic.check_var_attrs
params:
defined_attrs:
- long_name

required-global-attrs:
func: checksit.generic.check_dim_exists
params:
dimensions:
- time


38 changes: 19 additions & 19 deletions tests/test_cvs.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
from checksit.cvs import vocabs, vc
from checksit.cvs import vocabs
import pytest


lookups = {
'vocabs:ukcp18:variables:season_year':
{'dimensions': ['time'], 'units': '1', 'dtype': 'int', 'long_name': 'season_year'},
'vocabs:ukcp18:collection':
['land-cpm', 'land-derived', 'land-gcm', 'land-indices', 'land-prob', 'land-rcm', 'land-rcm-gwl', 'marine-sim'],
'vocabs:cf-netcdf:Conventions':
["CF-1.5", "CF-1.6"]
}


for lookup, exp_value in lookups.items():
value = vocabs.lookup(lookup)
assert exp_value == value


for lookup, exp_value in lookups.items():
value = vc._lookup(lookup)
assert exp_value == value
def test_lookup():
assert vocabs.lookup('__vocabs__:tests/test_instruments:test_instruments') == {'inst1': {"instrument_id": "inst1"}, "inst2": {"instrument_id": "inst2"}}
assert vocabs.lookup('__vocabs__:tests/test_instruments:test_instruments:__all__') == ["inst1", "inst2"]
assert vocabs.lookup('__vocabs__:tests/test_instruments:test_instruments:inst1') == {"instrument_id": "inst1"}
assert vocabs.lookup('__vocabs__:tests/test_instruments:test_instruments:__all__:instrument_id') == ["inst1", "inst2"]
with pytest.raises(ValueError):
vocabs.lookup('__vocabs__:tests/test_instruments:test_instruments:__all__:__all__')

def test_check():
assert vocabs.check('__vocabs__:tests/test_instruments:test_instruments:__all__:instrument_id', 'inst1', label = "Test") == []
assert vocabs.check(
"__vocabs__:tests/test_instruments:test_instruments:__all__:instrument_id", "inst3", label="Test",
) == [
"Test 'inst3' not in vocab options: ['inst1', 'inst2'] (using: '__vocabs__:tests/test_instruments:test_instruments:__all__:instrument_id')"
]
assert vocabs.check('__vocabs__:tests/test_platforms:test_platforms:plat1', {"platform_id": "plat1"}, label = "Test") == ["Test does not have attribute 'description'"]
assert vocabs.check('__vocabs__:tests/test_platforms:test_platforms:plat1:platform_id', "plat1", label = "Test") == []
assert vocabs.check('__vocabs__:tests/test_platforms:test_platforms:plat1:platform_id', "plat2", label = "Test") == ["Test 'plat2' does not equal required vocab value: 'plat1' (using: '__vocabs__:tests/test_platforms:test_platforms:plat1:platform_id')"]
Loading

0 comments on commit f68c7fa

Please sign in to comment.