Merge pull request #1062 from ioos/develop

Final merge prior to 5.1.1 release
ioos · Apr 16, 2024 · da97a6d · da97a6d
2 parents 03e279b + 6498863
commit da97a6d
Show file tree

Hide file tree

Showing 16 changed files with 102 additions and 44 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -19,7 +19,7 @@ repos:
         - test_requirements.txt
 
 - repo: https://github.com/psf/black
-  rev: 24.3.0
+  rev: 24.4.0
   hooks:
   - id: black
     language_version: python3
@@ -31,7 +31,7 @@ repos:
 
 
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.3.5
+  rev: v0.3.7
   hooks:
     - id: ruff
 

diff --git a/compliance_checker/base.py b/compliance_checker/base.py
@@ -192,8 +192,9 @@ def __del__(self):
         inadvertently mutated by other functions.
         """
 
-        cfutil.get_geophysical_variables.cache_clear()
-        cfutil.get_time_variables.cache_clear()
+        if cfutil is not None:
+            cfutil.get_geophysical_variables.cache_clear()
+            cfutil.get_time_variables.cache_clear()
 
 
 class BaseNCCheck:

diff --git a/compliance_checker/cf/cf_1_6.py b/compliance_checker/cf/cf_1_6.py
@@ -425,12 +425,12 @@ def check_fill_value_equal_missing_value(self, ds):
 
         return Result(
             BaseCheck.MEDIUM,
-            (len(fails), total),
+            (total - len(fails), total),
             self.section_titles["2.5"],
             msgs=fails,
         )
 
-    def check_valid_range_or_valid_min_max_present(self, ds):
+    def check_valid_range_and_valid_min_max_present(self, ds):
         """
         The valid_range attribute must not be present if the valid_min
         and/or valid_max attributes are present. This according to 2.5.1 Requirements.
@@ -443,19 +443,22 @@ def check_valid_range_or_valid_min_max_present(self, ds):
         total = 0
 
         for variable in ds.variables.values():
-            if hasattr(variable, "valid_max") and (
-                hasattr(variable, "valid_min") or hasattr(variable, "valid_range")
-            ):
-                total = total + 1
-
-                fails.append(
-                    f"For the variable {variable.name} the valid_range attribute must not be present "
-                    "if the valid_min and/or valid_max attributes are present",
-                )
+            if hasattr(variable, "valid_max") or hasattr(variable, "valid_min"):
+                total += 1
+                # if there's also valid_range in addition to
+                # valid_min/valid_max, this is not compliant
+                if hasattr(variable, "valid_range"):
+                    fails.append(
+                        f"For the variable {variable.name} the valid_range attribute must not be present "
+                        "if the valid_min and/or valid_max attributes are present",
+                    )
+            # *Just* valid_range should be added to total as well
+            elif hasattr(variable, "valid_range"):
+                total += 1
 
         return Result(
             BaseCheck.MEDIUM,
-            (len(fails), total),
+            (total - len(fails), total),
             self.section_titles["2.5"],
             msgs=fails,
         )

diff --git a/compliance_checker/cf/cf_1_7.py b/compliance_checker/cf/cf_1_7.py
@@ -391,7 +391,6 @@ def check_cell_boundaries_interval(self, ds):
                     reasoning,
                 )
                 ret_val.append(result)
-                print(ret_val)
             return ret_val
 
     def check_cell_measures(self, ds):

diff --git a/compliance_checker/protocols/netcdf.py b/compliance_checker/protocols/netcdf.py
@@ -91,6 +91,10 @@ def is_remote_netcdf(ds_str):
     else:
         content_type = head_req.headers.get("content-type")
 
+    if content_type is None:
+        return False
+
     # if the Content-Type header returned was "application/x-netcdf",
     # or a netCDF file (not OPeNDAP) we can open this into a Dataset
-    return content_type == "application/x-netcdf"
+    # Add support for application/x-netcdf;ver=4
+    return content_type.split(";")[0] == "application/x-netcdf"
diff --git a/compliance_checker/suite.py b/compliance_checker/suite.py
@@ -17,13 +17,13 @@
 from pathlib import Path
 from urllib.parse import urlparse
 
+import importlib_metadata
 import requests
 from lxml import etree as ET
 from netCDF4 import Dataset
 from owslib.sos import SensorObservationService
 from owslib.swe.sensor.sml import SensorML
 from packaging.version import parse
-from pkg_resources import working_set
 
 from compliance_checker import __version__, tempnc
 from compliance_checker.base import BaseCheck, GenericFile, Result, fix_return_value
@@ -73,8 +73,10 @@ def _get_generator_plugins(cls):
         """
 
         if not hasattr(cls, "suite_generators"):
-            gens = working_set.iter_entry_points("compliance_checker.generators")
-            cls.suite_generators = [x.resolve() for x in gens]
+            gens = importlib_metadata.entry_points(
+                group="compliance_checker.generators",
+            )
+            cls.suite_generators = [x.load() for x in gens]
 
         return cls.suite_generators
 
@@ -136,7 +138,9 @@ def load_all_available_checkers(cls):
         Helper method to retrieve all sub checker classes derived from various
         base classes.
         """
-        cls._load_checkers(working_set.iter_entry_points("compliance_checker.suites"))
+        cls._load_checkers(
+            importlib_metadata.entry_points(group="compliance_checker.suites"),
+        )
 
     @classmethod
     def _load_checkers(cls, checkers):
@@ -147,7 +151,7 @@ def _load_checkers(cls, checkers):
 
         for c in checkers:
             try:
-                check_obj = c.resolve()
+                check_obj = c.load()
                 if hasattr(check_obj, "_cc_spec") and hasattr(
                     check_obj,
                     "_cc_spec_version",
@@ -867,6 +871,11 @@ def load_remote_dataset(self, ds_str):
         content_type = response.headers.get("content-type")
         if content_type.split(";")[0] == "text/xml":
             return self.process_doc(response.content)
+        elif content_type.split(";")[0] == "application/x-netcdf":
+            return Dataset(
+                urlparse(response.url).path,
+                memory=response.content,
+            )
         else:
             raise ValueError(
                 f"Unknown service with content-type: {content_type}",

diff --git a/compliance_checker/tests/helpers.py b/compliance_checker/tests/helpers.py
@@ -1,6 +1,6 @@
 import tempfile
 
-from netCDF4 import Dataset
+from netCDF4._netCDF4 import Dataset
 
 
 class MockNetCDF(Dataset):
@@ -23,6 +23,14 @@ def __init__(self, filename=None):
             persist=False,
         )
 
+    # suppress usual dealloc routine to prevent caught exception messages
+    # from printing
+    def __dealloc__(self):
+        try:
+            super().__dealloc__()
+        except AttributeError:
+            pass
+
 
 class MockTimeSeries(MockNetCDF):
     """

diff --git a/compliance_checker/tests/test_cf.py b/compliance_checker/tests/test_cf.py
@@ -396,7 +396,7 @@ def test_check_fill_value_equal_missing_value(self):
 
         assert result.msgs == expected_msgs
 
-    def test_check_valid_range_or_valid_min_max_present(self):
+    def test_check_valid_range_and_valid_min_max_present(self):
         """
         2.5.1 Missing data, valid and actual range of data
         Requirements:
@@ -426,7 +426,7 @@ def test_check_valid_range_or_valid_min_max_present(self):
         dataset.variables["c"][1] = 2
         dataset.variables["c"].setncattr("valid_range", [-10, 10])
 
-        result = self.cf.check_valid_range_or_valid_min_max_present(dataset)
+        result = self.cf.check_valid_range_and_valid_min_max_present(dataset)
 
         # check if the test fails when when variable "a" is checked.
         expected_msgs = [
@@ -436,7 +436,7 @@ def test_check_valid_range_or_valid_min_max_present(self):
         ]
 
         assert result.msgs == expected_msgs
-        assert result.value[0] == result.value[1]
+        assert result.value[0] < result.value[1]
 
     def test_check_fill_value_outside_valid_range(self):
         """
@@ -1794,7 +1794,8 @@ def test_64bit(self):
         dataset = self.load_dataset(STATIC_FILES["ints64"])
         suite = CheckSuite()
         suite.checkers = {"cf": CF1_6Check}
-        suite.run(dataset, "cf")
+        # suite.run(dataset, "cf")
+        suite.run_all(dataset, ["cf"], skip_checks=["cf"])
 
     def test_variable_feature_check(self):
         # non-compliant dataset -- 1/1 fail

diff --git a/compliance_checker/tests/test_cf_integration.py b/compliance_checker/tests/test_cf_integration.py
@@ -245,7 +245,8 @@ def get_results(self, check_results, checksuite):
         ],  # must be specified to load this param at runtime, instead of at collection
     )
     def test_cf_integration(self, loaded_dataset, expected_messages, cs):
-        check_results = cs.run(loaded_dataset, [], "cf")
+        # check_results = cs.run(loaded_dataset, [], "cf")
+        check_results = cs.run_all(loaded_dataset, ["cf"], skip_checks=[])
         scored, out_of, messages = self.get_results(check_results, cs)
 
         assert scored < out_of
@@ -270,14 +271,16 @@ def test_cf_integration(self, loaded_dataset, expected_messages, cs):
         indirect=["loaded_dataset"],
     )
     def test_no_incorrect_errors(self, cs, loaded_dataset, wrong_message):
-        check_results = cs.run(loaded_dataset, [], True, "cf")
+        # check_results = cs.run(loaded_dataset, [], True, "cf")
+        check_results = cs.run_all(loaded_dataset, ["cf"], skip_checks=[])
         messages = self.get_results(check_results, cs)[-1]
 
         assert wrong_message not in "".join(messages)
 
     @pytest.mark.parametrize("loaded_dataset", ["fvcom"], indirect=True)
     def test_fvcom(self, cs, loaded_dataset):
-        check_results = cs.run(loaded_dataset, [], True, "cf")
+        # check_results = cs.run(loaded_dataset, [], True, "cf")
+        check_results = cs.run_all(loaded_dataset, ["cf"], skip_checks=[])
         scored, out_of, messages = self.get_results(check_results, cs)
         assert scored < out_of
 
@@ -305,6 +308,7 @@ def test_ncei_templates(self, cs, loaded_dataset):
         Tests some of the NCEI NetCDF templates, which usually should get a
         perfect score.
         """
-        check_results = cs.run(loaded_dataset, [], "cf")
+        # check_results = cs.run(loaded_dataset, [], "cf")
+        check_results = cs.run_all(loaded_dataset, ["cf"], skip_checks=[])
         scored, out_of, messages = self.get_results(check_results, cs)
         assert scored < out_of
diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py
@@ -91,7 +91,7 @@ def checker_1():
         def checker_2():
             return Namespace(_cc_spec="checker_2", _cc_spec_version="2.2")
 
-        mock_checkers = [Namespace(resolve=checker_1), Namespace(resolve=checker_2)]
+        mock_checkers = [Namespace(load=checker_1), Namespace(load=checker_2)]
         with pytest.warns(DeprecationWarning):
             CheckSuite._load_checkers(mock_checkers)
 

diff --git a/compliance_checker/tests/test_protocols.py b/compliance_checker/tests/test_protocols.py
@@ -38,7 +38,10 @@ def test_hyrax():
     """
     Tests that a connection can be made to Hyrax
     """
-    url = "http://test.opendap.org:8080/opendap/ioos/mday_joinExist.ncml"
+    # Returns: error 405
+    # url = "http://test.opendap.org:8080/opendap/ioos/mday_joinExist.ncml"
+    # More direct file
+    url = "http://test.opendap.org:8080/opendap/ioos/mday_joinExist.ncml.dap.nc4"
     cs = CheckSuite()
     ds = cs.load_dataset(url)
     assert ds is not None
@@ -48,13 +51,17 @@ def test_thredds():
     """
     Tests that a connection can be made to a remote THREDDS endpoint
     """
-    url = "http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p25deg_ana/TP"
+    # Returns: error 400
+    # url = "http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p25deg_ana/TP"
+    # Use a smaller dataset
+    url = "https://thredds.ucar.edu/thredds/ncss/grid/grib/NCEP/GFS/Global_0p25deg_ana/TP?var=Temperature_altitude_above_msl&accept=netcdf3"
 
     cs = CheckSuite()
     ds = cs.load_dataset(url)
     assert ds is not None
 
 
+@pytest.mark.skip(reason="The thredds endpoint is no longer serving SOS.")
 def test_sos():
     """
     Tests that a connection can be made to an SOS endpoint

diff --git a/compliance_checker/tests/test_suite.py b/compliance_checker/tests/test_suite.py
@@ -51,16 +51,19 @@ def test_suite(self):
         # BWA: what's the purpose of this test?  Just to see if the suite
         # runs without errors?
         ds = self.cs.load_dataset(static_files["2dim"])
-        self.cs.run(ds, [], "acdd")
+        # self.cs.run(ds, [], "acdd")
+        self.cs.run_all(ds, ["acdd"], skip_checks=[])
 
     def test_suite_pathlib(self):
         path_obj = Path(static_files["2dim"])
         ds = self.cs.load_dataset(path_obj)
-        self.cs.run(ds, [], "acdd")
+        # self.cs.run(ds, [], "acdd")
+        self.cs.run_all(ds, ["acdd"], skip_checks=[])
 
     def test_unicode_formatting(self):
         ds = self.cs.load_dataset(static_files["bad_region"])
-        score_groups = self.cs.run(ds, [], "cf")
+        # score_groups = self.cs.run(ds, [], "cf")
+        score_groups = self.cs.run_all(ds, ["cf"], skip_checks=[])
 
         limit = 2
         for checker, rpair in score_groups.items():
@@ -151,7 +154,8 @@ def test_group_func(self):
         # This is checking for issue #183, where group_func results in
         # IndexError: list index out of range
         ds = self.cs.load_dataset(static_files["bad_data_type"])
-        score_groups = self.cs.run(ds, [], "cf")
+        # score_groups = self.cs.run(ds, [], "cf")
+        score_groups = self.cs.run_all(ds, ["cf"], skip_checks=[])
 
         limit = 2
         for checker, rpair in score_groups.items():
@@ -186,7 +190,8 @@ def test_cdl_file(self):
         # Testing whether you can run compliance checker on a .cdl file
         # Load the cdl file
         ds = self.cs.load_dataset(static_files["test_cdl"])
-        vals = self.cs.run(ds, [], "cf")
+        # vals = self.cs.run(ds, [], "cf")
+        vals = self.cs.run_all(ds, ["cf"], skip_checks=[])
 
         limit = 2
         for checker, rpair in vals.items():
@@ -209,7 +214,8 @@ def test_cdl_file(self):
 
         # Ok now load the nc file that it came from
         ds = self.cs.load_dataset(static_files["test_cdl_nc"])
-        vals = self.cs.run(ds, [], "cf")
+        # vals = self.cs.run(ds, [], "cf")
+        vals = self.cs.run_all(ds, ["cf"], skip_checks=[])
 
         limit = 2
         for checker, rpair in vals.items():
@@ -247,7 +253,8 @@ def test_standard_output_score_header(self):
         of potential issues, rather than the weighted score
         """
         ds = self.cs.load_dataset(static_files["bad_region"])
-        score_groups = self.cs.run(ds, [], "cf")
+        # score_groups = self.cs.run(ds, [], "cf")
+        score_groups = self.cs.run_all(ds, ["cf"], skip_checks=[])
         limit = 2
         groups, errors = score_groups["cf"]
         score_list, all_passed, out_of = self.cs.standard_output(

diff --git a/docs/source/development.md b/docs/source/development.md
@@ -0,0 +1,11 @@
+# Developer Notes
+
+## pytest
+
+When running the python test suite, there may be test errors.  Certain
+tests `record` responses to remote queries for information.  If tests
+fail, they will appear to continue to fail as the queries are cached.
+
+To perform tests using fresh queries from remote services, use
+`pytest --disable-vcr`.  In certain cases, clearing the cache is
+also advised, use `pytest --clear-cache`.
diff --git a/docs/source/faq.md b/docs/source/faq.md
@@ -45,6 +45,8 @@ The Compliance Checker is completely open-source and available on [GitHub](https
 
 ## Disclaimer
 
-The objective of the IOOS Compliance Checker is to check your file against our interpretation of select dataset metadata standards to use as a guideline in generating compliant files.
-The compliance checker should not be considered the authoritative source on whether your file is 100% "compliant".
+The objective of the IOOS Compliance Checker is to check your file against
+our interpretation of select dataset metadata standards to use as a
+guideline in generating compliant files.  The compliance checker should
+not be considered the authoritative source on whether your file is 100% "compliant".
 Instead, we recommend that users use the results as a guide to work towards compliance.
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -10,6 +10,7 @@ Python tool to check your datasets against compliance standards.
    quickintro
    compliance_checker_api
    faq
+   development
 
 Indices and tables
 ==================

diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,6 @@
 cf-units>=2
 cftime>=1.1.0
+importlib-metadata   # drop this when dropping Python 3.8
 importlib-resources  # drop this when dropping Python 3.8
 isodate>=0.6.1
 jinja2>=2.7.3