Merge pull request #42 from cedadev/additional_tests

Additonal tests
cedadev · Feb 22, 2024 · f68c7fa · f68c7fa
2 parents 28b32af + b05a001
commit f68c7fa
Show file tree

Hide file tree

Showing 18 changed files with 1,207 additions and 101 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -29,19 +29,17 @@ jobs:
         ./configure
         make all install
         ln -sf /opt/lib/* $LD_LIBRARY_PATH
-    - name: Install exiftool
+    - name: Install exiftool and netcdf
       run: |
         sudo apt install libimage-exiftool-perl -y
+        sudo apt-get install -y netcdf-bin
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
         pip install flake8 black pytest
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
         if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi
-    - name: Look for exiftool
-      run: |
-        which exiftool
     - name: Test with pytest
       run: |
         export UDUNITS2_XML_PATH=/opt/share/udunits/udunits2.xml
-        python -m pytest -v tests/test_readers.py tests/test_images.py
+        python -m pytest -v tests/test_readers.py tests/test_rules.py tests/test_show_specs.py tests/test_images.py tests/test_generic.py tests/test_utils.py tests/test_ncas_file_proc.py tests/test_cvs.py
diff --git a/checksit/generic.py b/checksit/generic.py
@@ -222,8 +222,8 @@ def check_var(dct, variable, defined_attrs, skip_spellcheck=False):
                     attr_value = np.array(attr_value, dtype=np.int8)
                     if not np.all(dct["variables"][variable].get(attr_key) == attr_value):
                         errors.append(
-                            f"[variable**************:{variable}]: Attribute '{attr_key}' must have definition {attr_value}, "
-                            f"not {dct['variables'][variable].get(attr_key) if skip_spellcheck else ''}."
+                            f"[variable**************:{variable}]: Attribute '{attr_key}' must have definition '{attr_value}', "
+                            f"not '{dct['variables'][variable].get(attr_key)}'."
                         )
                 #elif attr_key == 'flag_meanings':
                 #    print(attr_value)

diff --git a/checksit/rules/rule_funcs.py b/checksit/rules/rule_funcs.py
@@ -42,7 +42,8 @@ def match_file_name(value, context, extras=None, label=""):
 
 def match_one_of(value, context, extras=None, label=""):
     """
-    Matches only one of...
+    value matches one of options defined in extras
+    default rule splitter is '|' and defined in checksit.ini file
     """
     options = [x.strip() for x in extras[0].split(rule_splitter)]
     errors = []
@@ -55,7 +56,7 @@ def match_one_of(value, context, extras=None, label=""):
 
 def match_one_or_more_of(value, context, extras=None, label=""):
     """
-    Matches one of more of...
+    String value or list value must match one of more of list given in extras
     """
     def as_set(x, sep): return set([i.strip() for i in x.split(sep)])
     options = as_set(extras[0], rule_splitter)
@@ -93,10 +94,15 @@ def validate_image_date_time(value, context, extras=None, label=""):
     """
     errors = []
 
-    try:
-        if value != datetime.strptime(value, "%Y:%m:%d %H:%M:%S").strftime("%Y:%m:%d %H:%M:%S") and value != datetime.strptime(value, "%Y:%m:%d #%H:%M:%S.%f").strftime("%Y:%m:%d %H:%M:%S.%f"):
-            errors.append(f"{label} '{value}' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s")
-    except ValueError:
+    match = False
+    for f in ["%Y:%m:%d %H:%M:%S", "%Y:%m:%d %H:%M:%S.%f"]:
+        if match == False:
+            try:
+                match = (value == datetime.strptime(value, f).strftime(f))
+            except ValueError:
+                pass
+
+    if not match:
         errors.append(f"{label} '{value}' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s")
 
     return errors
@@ -125,8 +131,11 @@ def validate_orcid_ID(value, context, extras=None, label=""):
         value[27] != "-" or
         value[32] != "-" or
 
-        # Check that the last characters contain only "-" and digits
-        not PI_orcid_digits_only.isdigit):
+        # Check that the last characters contain only "-" and digits (plus 'X' for last digit)
+        not (
+            PI_orcid_digits_only.isdigit() or (PI_orcid_digits_only[0:15].isdigit() and PI_orcid_digits_only[15] == "X")
+        )
+    ):
 
         errors.append(f"{label} '{value}' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX")
 
@@ -164,17 +173,21 @@ def headline(value, context, extras=None, label=""):
     """
     warnings = []
 
-    if len(value) > 150:
-        warnings.append(f"{label} '{value}' should contain no more than one sentence")
+    if value == "":
+        warnings.append(f"{label} '{value}' should not be empty")
+
+    else:
+        if len(value) > 150:
+            warnings.append(f"{label} '{value}' should contain no more than one sentence")
 
-    if value.count(".") >= 2:
-        warnings.append(f"{label} '{value}' should contain no more than one sentence")
+        if value.count(".") >= 2:
+            warnings.append(f"{label} '{value}' should contain no more than one sentence")
 
-    if not value[0].isupper():
-        warnings.append(f"{label} '{value}' should start with a capital letter")
+        if not value[0].isupper():
+            warnings.append(f"{label} '{value}' should start with a capital letter")
 
-    if len(value) < 10:
-        warnings.append(f"{label} '{value}' should be at least 10 characters")
+        if len(value) < 10:
+            warnings.append(f"{label} '{value}' should be at least 10 characters")
 
     return warnings
 
@@ -218,7 +231,7 @@ def relation_url_checker(value, context, extras=None, label=""):
     else:
         relation_url = value.partition(" ")[2]        # extract only the url part of the relation string
         if url_checker(relation_url, context, extras, label) != []:
-            errors.append(url_checker(relation_url, context, extras, label))       # check the url exists using the url_checker() function defined above
+            errors.extend(url_checker(relation_url, context, extras, label))       # check the url exists using the url_checker() function defined above
 
     return errors
 

diff --git a/checksit/rules/rules.py b/checksit/rules/rules.py
@@ -23,7 +23,7 @@ def __init__(self):
             "integer": r"-?\d+",
             "valid-email": r"[^@\s]+@[^@\s]+\.[^\s@]+",
             "valid-url": r"https?://[^\s]+\.[^\s]*[^\s\.](/[^\s]+)?",
-            "valid-url-or-na": r"(https?://[^\s]+\.[^\s]*[^\s\.](/[^\s]+))|" + _NOT_APPLICABLE_RULES,
+            "valid-url-or-na": r"(https?://[^\s]+\.[^\s]*[^\s\.](/[^\s]+)?)|" + _NOT_APPLICABLE_RULES,
             "match:vN.M": r"v\d\.\d",
             "datetime": r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?",
             "datetime-or-na": 
@@ -32,9 +32,9 @@ def __init__(self):
             "location": r'(.)+(\,\ )(.)+',
             "latitude-image": r'[\+|\-]?[0-9]{1,2}\.[0-9]{0,6}',
             "longitude-image": r'[\+|\-]?1?[0-9]{1,2}\.[0-9]{0,6}',
-            "title": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?_(.)+_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)',
+            "title": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?(_.+)?_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)',
             "title-data-product": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?_(plot|photo)((.)+)?_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)',
-            "name-format": r'(.)+, (.)+ ?((.)+|((.)\.))',
+            "name-format": r'([^,])+, ([^,])+( ?[^,]+|((.)\.))',
             "name-characters": r'[A-Za-z_À-ÿ\-\'\ \.\,]+',
             "altitude-image-warning": r'-?\d+\sm',    # should be integers only for images
             "altitude-image": r'-?\d+(\.\d+)?\sm',
@@ -66,60 +66,42 @@ def check(self, rule_lookup, value, context=None, label=""):
 
         for i in rule_lookup_list:
 
-            if i.startswith("rule-func:"):
-                rule_comps = i.split(":")
-                rule_func = getattr(rule_funcs, rule_comps[1].replace("-", "_"))
-                extras = rule_comps[2:]
-                errors.extend(rule_func(value, context, extras, label=label))
+            if i.split(":")[0].endswith("-warning"):
+                output = warnings
+            else:
+                output = errors
 
-            elif i.startswith("rule-func-warning:"):
+            if i.startswith("rule-func"):
                 rule_comps = i.split(":")
                 rule_func = getattr(rule_funcs, rule_comps[1].replace("-", "_"))
                 extras = rule_comps[2:]
-                warnings.extend(rule_func(value, context, extras, label=label))
+                output.extend(rule_func(value, context, extras, label=label))
 
             elif i.startswith("type-rule"):
                 type_rule = i.split(":")[1]
 
                 if not isinstance(value, self._map_type_rule(type_rule)):
-                    errors.append(f"{label} Value '{value}' is not of required type: '{type_rule}'.")
-
-            elif i.startswith("regex-warning:"):
-                pattern = ':'.join(i.split(":")[1:])  # in case pattern has colons in it, e.g. a URL 
-                if not re.match(f"^{pattern}$", value):
-                    warnings.append(f"{label} Value '{value}' does not match regular expression: '{pattern}'.")
-
-            elif i.startswith("regex:"):
-                pattern = ':'.join(i.split(":")[1:])  # in case pattern has colons in it, e.g. a URL 
-                if not re.match(f"^{pattern}$", value):
-                    errors.append(f"{label} Value '{value}' does not match regular expression: '{pattern}'.")
+                    output.append(f"{label} Value '{value}' is not of required type: '{type_rule}'.")
 
-            elif i.startswith("regex-rule-warning:"):
+            elif i.startswith("regex-rule"):
                 regex_rule = i.split(":", 1)[1]
 
                 if regex_rule in self.static_regex_rules:
                     pattern = self.static_regex_rules[regex_rule]
 
                     if not re.match("^" + pattern + "$", value):
-                        warnings.append(f"{label} Value '{value}' does not match regex rule: '{regex_rule}'.")
+                        output.append(f"{label} Value '{value}' does not match regex rule: '{regex_rule}'.")
 
                 else:
-                    raise Exception(f"Rule not found with rule ID: {rule_lookup}.")
+                    raise Exception(f"Regex rule not found with rule ID: {i}.")
 
-            elif i.startswith("regex-rule:"):
-                regex_rule = i.split(":", 1)[1]
-
-                if regex_rule in self.static_regex_rules:
-                    pattern = self.static_regex_rules[regex_rule]
-
-                    if not re.match("^" + pattern + "$", value):
-                        errors.append(f"{label} Value '{value}' does not match regex rule: '{regex_rule}'.")
+            elif i.startswith("regex"):
+                pattern = i.split(":", 1)[1]  # in case pattern has colons in it, e.g. a URL
+                if not re.match(f"^{pattern}$", value):
+                    output.append(f"{label} Value '{value}' does not match regular expression: '{pattern}'.")
 
-                else:
-                    raise Exception(f"Rule not found with rule ID: {rule_lookup}.")
-
             else:
-                raise Exception(f"Rule not found with rule ID: {rule_lookup}.")
+                raise Exception(f"Rule not found with rule ID: {i}.")
 
         return errors, warnings
 

diff --git a/checksit/specs.py b/checksit/specs.py
@@ -19,22 +19,20 @@ def load_specs(spec_ids=None):
     spec_files = [f"{specs_dir}/{spec_id}.yml" for spec_id in spec_ids] or \
                  glob.glob(f"{specs_dir}/*.yml")
 
-    return _parse_specs(spec_files) 
-     
+    return _parse_specs(spec_files)
+
 
 def show_specs(spec_ids=None, verbose=False):
-
     all_specs = load_specs(spec_ids)
     spec_ids_names = tuple([(spec_id.split("/")[-1]) for spec_id in spec_ids])
 
     if not spec_ids:
-        specs = all_specs
+        specs = all_specs.items()
     else:
         specs = [(spec_ids[spec_ids_names.index(spec_id)], spec) for (spec_id, spec) in all_specs.items() if spec_id in spec_ids_names]
 
     print("Specifications:")
     for spec_id, spec in specs:
-
         print(f"\n{spec_id}:")
         print(json.dumps(spec, indent=4).replace("\\\\", "\\"))
 
@@ -68,6 +66,6 @@ def run_checks(self, record, skip_spellcheck=False):
                                                record, check_dict, skip_spellcheck=skip_spellcheck
                                            )
             errors.extend(check_errors)
-            warnings.extend(check_warnings) 
+            warnings.extend(check_warnings)
 
         return errors, warnings
diff --git a/checksit/vocabs/tests/test_instruments.json b/checksit/vocabs/tests/test_instruments.json
@@ -0,0 +1,10 @@
+{
+    "test_instruments": {
+        "inst1": {
+            "instrument_id": "inst1"
+        },
+        "inst2": {
+            "instrument_id": "inst2"
+        }
+    }
+}
diff --git a/checksit/vocabs/tests/test_platforms.json b/checksit/vocabs/tests/test_platforms.json
@@ -0,0 +1,12 @@
+{
+    "test_platforms": {
+        "plat1": {
+            "platform_id": "plat1",
+            "description": "test platform 1"
+        },
+        "plat2": {
+            "platform_id": "plat2",
+            "description": "test platform 2"
+        }
+    }
+}
diff --git a/checksit/vocabs/tests/test_products.json b/checksit/vocabs/tests/test_products.json
@@ -0,0 +1,6 @@
+{
+    "test_products": [
+        "prod1",
+        "prod2"
+    ]
+}
diff --git a/specs/groups/tests/test.yml b/specs/groups/tests/test.yml
@@ -0,0 +1,13 @@
+var-requires:
+  func: checksit.generic.check_var_attrs
+  params:
+    defined_attrs: 
+      - long_name
+
+required-global-attrs:
+  func: checksit.generic.check_dim_exists
+  params:
+    dimensions:
+      - time
+
+
diff --git a/tests/test_cvs.py b/tests/test_cvs.py
@@ -1,22 +1,22 @@
-from checksit.cvs import vocabs, vc
+from checksit.cvs import vocabs
+import pytest
 
 
-lookups = {
-    'vocabs:ukcp18:variables:season_year':
-        {'dimensions': ['time'], 'units': '1', 'dtype': 'int', 'long_name': 'season_year'},
-    'vocabs:ukcp18:collection': 
-        ['land-cpm', 'land-derived', 'land-gcm', 'land-indices', 'land-prob', 'land-rcm', 'land-rcm-gwl', 'marine-sim'],
-    'vocabs:cf-netcdf:Conventions':
-        ["CF-1.5", "CF-1.6"]
-}
-
-
-for lookup, exp_value in lookups.items():
-    value = vocabs.lookup(lookup)
-    assert exp_value == value
-
-
-for lookup, exp_value in lookups.items():
-    value = vc._lookup(lookup)
-    assert exp_value == value
+def test_lookup():
+    assert vocabs.lookup('__vocabs__:tests/test_instruments:test_instruments') == {'inst1': {"instrument_id": "inst1"}, "inst2": {"instrument_id": "inst2"}}
+    assert vocabs.lookup('__vocabs__:tests/test_instruments:test_instruments:__all__') == ["inst1", "inst2"]
+    assert vocabs.lookup('__vocabs__:tests/test_instruments:test_instruments:inst1') == {"instrument_id": "inst1"}
+    assert vocabs.lookup('__vocabs__:tests/test_instruments:test_instruments:__all__:instrument_id') == ["inst1", "inst2"]
+    with pytest.raises(ValueError):
+        vocabs.lookup('__vocabs__:tests/test_instruments:test_instruments:__all__:__all__')
 
+def test_check():
+    assert vocabs.check('__vocabs__:tests/test_instruments:test_instruments:__all__:instrument_id', 'inst1', label = "Test") == []
+    assert vocabs.check(
+        "__vocabs__:tests/test_instruments:test_instruments:__all__:instrument_id", "inst3", label="Test",
+    ) == [
+        "Test 'inst3' not in vocab options: ['inst1', 'inst2'] (using: '__vocabs__:tests/test_instruments:test_instruments:__all__:instrument_id')"
+    ]
+    assert vocabs.check('__vocabs__:tests/test_platforms:test_platforms:plat1', {"platform_id": "plat1"}, label = "Test") == ["Test does not have attribute 'description'"]
+    assert vocabs.check('__vocabs__:tests/test_platforms:test_platforms:plat1:platform_id', "plat1", label = "Test") == []
+    assert vocabs.check('__vocabs__:tests/test_platforms:test_platforms:plat1:platform_id', "plat2", label = "Test") == ["Test 'plat2' does not equal required vocab value: 'plat1' (using: '__vocabs__:tests/test_platforms:test_platforms:plat1:platform_id')"]