Skip to content

Commit

Permalink
Merge pull request #1062 from ioos/develop
Browse files Browse the repository at this point in the history
Final merge prior to 5.1.1 release
  • Loading branch information
benjwadams authored Apr 16, 2024
2 parents 03e279b + 6498863 commit da97a6d
Show file tree
Hide file tree
Showing 16 changed files with 102 additions and 44 deletions.
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ repos:
- test_requirements.txt

- repo: https://github.com/psf/black
rev: 24.3.0
rev: 24.4.0
hooks:
- id: black
language_version: python3
Expand All @@ -31,7 +31,7 @@ repos:


- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.3.5
rev: v0.3.7
hooks:
- id: ruff

Expand Down
5 changes: 3 additions & 2 deletions compliance_checker/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,9 @@ def __del__(self):
inadvertently mutated by other functions.
"""

cfutil.get_geophysical_variables.cache_clear()
cfutil.get_time_variables.cache_clear()
if cfutil is not None:
cfutil.get_geophysical_variables.cache_clear()
cfutil.get_time_variables.cache_clear()


class BaseNCCheck:
Expand Down
27 changes: 15 additions & 12 deletions compliance_checker/cf/cf_1_6.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,12 +425,12 @@ def check_fill_value_equal_missing_value(self, ds):

return Result(
BaseCheck.MEDIUM,
(len(fails), total),
(total - len(fails), total),
self.section_titles["2.5"],
msgs=fails,
)

def check_valid_range_or_valid_min_max_present(self, ds):
def check_valid_range_and_valid_min_max_present(self, ds):
"""
The valid_range attribute must not be present if the valid_min
and/or valid_max attributes are present. This according to 2.5.1 Requirements.
Expand All @@ -443,19 +443,22 @@ def check_valid_range_or_valid_min_max_present(self, ds):
total = 0

for variable in ds.variables.values():
if hasattr(variable, "valid_max") and (
hasattr(variable, "valid_min") or hasattr(variable, "valid_range")
):
total = total + 1

fails.append(
f"For the variable {variable.name} the valid_range attribute must not be present "
"if the valid_min and/or valid_max attributes are present",
)
if hasattr(variable, "valid_max") or hasattr(variable, "valid_min"):
total += 1
# if there's also valid_range in addition to
# valid_min/valid_max, this is not compliant
if hasattr(variable, "valid_range"):
fails.append(
f"For the variable {variable.name} the valid_range attribute must not be present "
"if the valid_min and/or valid_max attributes are present",
)
# *Just* valid_range should be added to total as well
elif hasattr(variable, "valid_range"):
total += 1

return Result(
BaseCheck.MEDIUM,
(len(fails), total),
(total - len(fails), total),
self.section_titles["2.5"],
msgs=fails,
)
Expand Down
1 change: 0 additions & 1 deletion compliance_checker/cf/cf_1_7.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,6 @@ def check_cell_boundaries_interval(self, ds):
reasoning,
)
ret_val.append(result)
print(ret_val)
return ret_val

def check_cell_measures(self, ds):
Expand Down
6 changes: 5 additions & 1 deletion compliance_checker/protocols/netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ def is_remote_netcdf(ds_str):
else:
content_type = head_req.headers.get("content-type")

if content_type is None:
return False

# if the Content-Type header returned was "application/x-netcdf",
# or a netCDF file (not OPeNDAP) we can open this into a Dataset
return content_type == "application/x-netcdf"
# Add support for application/x-netcdf;ver=4
return content_type.split(";")[0] == "application/x-netcdf"
19 changes: 14 additions & 5 deletions compliance_checker/suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@
from pathlib import Path
from urllib.parse import urlparse

import importlib_metadata
import requests
from lxml import etree as ET
from netCDF4 import Dataset
from owslib.sos import SensorObservationService
from owslib.swe.sensor.sml import SensorML
from packaging.version import parse
from pkg_resources import working_set

from compliance_checker import __version__, tempnc
from compliance_checker.base import BaseCheck, GenericFile, Result, fix_return_value
Expand Down Expand Up @@ -73,8 +73,10 @@ def _get_generator_plugins(cls):
"""

if not hasattr(cls, "suite_generators"):
gens = working_set.iter_entry_points("compliance_checker.generators")
cls.suite_generators = [x.resolve() for x in gens]
gens = importlib_metadata.entry_points(
group="compliance_checker.generators",
)
cls.suite_generators = [x.load() for x in gens]

return cls.suite_generators

Expand Down Expand Up @@ -136,7 +138,9 @@ def load_all_available_checkers(cls):
Helper method to retrieve all sub checker classes derived from various
base classes.
"""
cls._load_checkers(working_set.iter_entry_points("compliance_checker.suites"))
cls._load_checkers(
importlib_metadata.entry_points(group="compliance_checker.suites"),
)

@classmethod
def _load_checkers(cls, checkers):
Expand All @@ -147,7 +151,7 @@ def _load_checkers(cls, checkers):

for c in checkers:
try:
check_obj = c.resolve()
check_obj = c.load()
if hasattr(check_obj, "_cc_spec") and hasattr(
check_obj,
"_cc_spec_version",
Expand Down Expand Up @@ -867,6 +871,11 @@ def load_remote_dataset(self, ds_str):
content_type = response.headers.get("content-type")
if content_type.split(";")[0] == "text/xml":
return self.process_doc(response.content)
elif content_type.split(";")[0] == "application/x-netcdf":
return Dataset(
urlparse(response.url).path,
memory=response.content,
)
else:
raise ValueError(
f"Unknown service with content-type: {content_type}",
Expand Down
10 changes: 9 additions & 1 deletion compliance_checker/tests/helpers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import tempfile

from netCDF4 import Dataset
from netCDF4._netCDF4 import Dataset


class MockNetCDF(Dataset):
Expand All @@ -23,6 +23,14 @@ def __init__(self, filename=None):
persist=False,
)

# suppress usual dealloc routine to prevent caught exception messages
# from printing
def __dealloc__(self):
try:
super().__dealloc__()
except AttributeError:
pass


class MockTimeSeries(MockNetCDF):
"""
Expand Down
9 changes: 5 additions & 4 deletions compliance_checker/tests/test_cf.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ def test_check_fill_value_equal_missing_value(self):

assert result.msgs == expected_msgs

def test_check_valid_range_or_valid_min_max_present(self):
def test_check_valid_range_and_valid_min_max_present(self):
"""
2.5.1 Missing data, valid and actual range of data
Requirements:
Expand Down Expand Up @@ -426,7 +426,7 @@ def test_check_valid_range_or_valid_min_max_present(self):
dataset.variables["c"][1] = 2
dataset.variables["c"].setncattr("valid_range", [-10, 10])

result = self.cf.check_valid_range_or_valid_min_max_present(dataset)
result = self.cf.check_valid_range_and_valid_min_max_present(dataset)

# check if the test fails when when variable "a" is checked.
expected_msgs = [
Expand All @@ -436,7 +436,7 @@ def test_check_valid_range_or_valid_min_max_present(self):
]

assert result.msgs == expected_msgs
assert result.value[0] == result.value[1]
assert result.value[0] < result.value[1]

def test_check_fill_value_outside_valid_range(self):
"""
Expand Down Expand Up @@ -1794,7 +1794,8 @@ def test_64bit(self):
dataset = self.load_dataset(STATIC_FILES["ints64"])
suite = CheckSuite()
suite.checkers = {"cf": CF1_6Check}
suite.run(dataset, "cf")
# suite.run(dataset, "cf")
suite.run_all(dataset, ["cf"], skip_checks=["cf"])

def test_variable_feature_check(self):
# non-compliant dataset -- 1/1 fail
Expand Down
12 changes: 8 additions & 4 deletions compliance_checker/tests/test_cf_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,8 @@ def get_results(self, check_results, checksuite):
], # must be specified to load this param at runtime, instead of at collection
)
def test_cf_integration(self, loaded_dataset, expected_messages, cs):
check_results = cs.run(loaded_dataset, [], "cf")
# check_results = cs.run(loaded_dataset, [], "cf")
check_results = cs.run_all(loaded_dataset, ["cf"], skip_checks=[])
scored, out_of, messages = self.get_results(check_results, cs)

assert scored < out_of
Expand All @@ -270,14 +271,16 @@ def test_cf_integration(self, loaded_dataset, expected_messages, cs):
indirect=["loaded_dataset"],
)
def test_no_incorrect_errors(self, cs, loaded_dataset, wrong_message):
check_results = cs.run(loaded_dataset, [], True, "cf")
# check_results = cs.run(loaded_dataset, [], True, "cf")
check_results = cs.run_all(loaded_dataset, ["cf"], skip_checks=[])
messages = self.get_results(check_results, cs)[-1]

assert wrong_message not in "".join(messages)

@pytest.mark.parametrize("loaded_dataset", ["fvcom"], indirect=True)
def test_fvcom(self, cs, loaded_dataset):
check_results = cs.run(loaded_dataset, [], True, "cf")
# check_results = cs.run(loaded_dataset, [], True, "cf")
check_results = cs.run_all(loaded_dataset, ["cf"], skip_checks=[])
scored, out_of, messages = self.get_results(check_results, cs)
assert scored < out_of

Expand Down Expand Up @@ -305,6 +308,7 @@ def test_ncei_templates(self, cs, loaded_dataset):
Tests some of the NCEI NetCDF templates, which usually should get a
perfect score.
"""
check_results = cs.run(loaded_dataset, [], "cf")
# check_results = cs.run(loaded_dataset, [], "cf")
check_results = cs.run_all(loaded_dataset, ["cf"], skip_checks=[])
scored, out_of, messages = self.get_results(check_results, cs)
assert scored < out_of
2 changes: 1 addition & 1 deletion compliance_checker/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def checker_1():
def checker_2():
return Namespace(_cc_spec="checker_2", _cc_spec_version="2.2")

mock_checkers = [Namespace(resolve=checker_1), Namespace(resolve=checker_2)]
mock_checkers = [Namespace(load=checker_1), Namespace(load=checker_2)]
with pytest.warns(DeprecationWarning):
CheckSuite._load_checkers(mock_checkers)

Expand Down
11 changes: 9 additions & 2 deletions compliance_checker/tests/test_protocols.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ def test_hyrax():
"""
Tests that a connection can be made to Hyrax
"""
url = "http://test.opendap.org:8080/opendap/ioos/mday_joinExist.ncml"
# Returns: error 405
# url = "http://test.opendap.org:8080/opendap/ioos/mday_joinExist.ncml"
# More direct file
url = "http://test.opendap.org:8080/opendap/ioos/mday_joinExist.ncml.dap.nc4"
cs = CheckSuite()
ds = cs.load_dataset(url)
assert ds is not None
Expand All @@ -48,13 +51,17 @@ def test_thredds():
"""
Tests that a connection can be made to a remote THREDDS endpoint
"""
url = "http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p25deg_ana/TP"
# Returns: error 400
# url = "http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p25deg_ana/TP"
# Use a smaller dataset
url = "https://thredds.ucar.edu/thredds/ncss/grid/grib/NCEP/GFS/Global_0p25deg_ana/TP?var=Temperature_altitude_above_msl&accept=netcdf3"

cs = CheckSuite()
ds = cs.load_dataset(url)
assert ds is not None


@pytest.mark.skip(reason="The thredds endpoint is no longer serving SOS.")
def test_sos():
"""
Tests that a connection can be made to an SOS endpoint
Expand Down
21 changes: 14 additions & 7 deletions compliance_checker/tests/test_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,19 @@ def test_suite(self):
# BWA: what's the purpose of this test? Just to see if the suite
# runs without errors?
ds = self.cs.load_dataset(static_files["2dim"])
self.cs.run(ds, [], "acdd")
# self.cs.run(ds, [], "acdd")
self.cs.run_all(ds, ["acdd"], skip_checks=[])

def test_suite_pathlib(self):
path_obj = Path(static_files["2dim"])
ds = self.cs.load_dataset(path_obj)
self.cs.run(ds, [], "acdd")
# self.cs.run(ds, [], "acdd")
self.cs.run_all(ds, ["acdd"], skip_checks=[])

def test_unicode_formatting(self):
ds = self.cs.load_dataset(static_files["bad_region"])
score_groups = self.cs.run(ds, [], "cf")
# score_groups = self.cs.run(ds, [], "cf")
score_groups = self.cs.run_all(ds, ["cf"], skip_checks=[])

limit = 2
for checker, rpair in score_groups.items():
Expand Down Expand Up @@ -151,7 +154,8 @@ def test_group_func(self):
# This is checking for issue #183, where group_func results in
# IndexError: list index out of range
ds = self.cs.load_dataset(static_files["bad_data_type"])
score_groups = self.cs.run(ds, [], "cf")
# score_groups = self.cs.run(ds, [], "cf")
score_groups = self.cs.run_all(ds, ["cf"], skip_checks=[])

limit = 2
for checker, rpair in score_groups.items():
Expand Down Expand Up @@ -186,7 +190,8 @@ def test_cdl_file(self):
# Testing whether you can run compliance checker on a .cdl file
# Load the cdl file
ds = self.cs.load_dataset(static_files["test_cdl"])
vals = self.cs.run(ds, [], "cf")
# vals = self.cs.run(ds, [], "cf")
vals = self.cs.run_all(ds, ["cf"], skip_checks=[])

limit = 2
for checker, rpair in vals.items():
Expand All @@ -209,7 +214,8 @@ def test_cdl_file(self):

# Ok now load the nc file that it came from
ds = self.cs.load_dataset(static_files["test_cdl_nc"])
vals = self.cs.run(ds, [], "cf")
# vals = self.cs.run(ds, [], "cf")
vals = self.cs.run_all(ds, ["cf"], skip_checks=[])

limit = 2
for checker, rpair in vals.items():
Expand Down Expand Up @@ -247,7 +253,8 @@ def test_standard_output_score_header(self):
of potential issues, rather than the weighted score
"""
ds = self.cs.load_dataset(static_files["bad_region"])
score_groups = self.cs.run(ds, [], "cf")
# score_groups = self.cs.run(ds, [], "cf")
score_groups = self.cs.run_all(ds, ["cf"], skip_checks=[])
limit = 2
groups, errors = score_groups["cf"]
score_list, all_passed, out_of = self.cs.standard_output(
Expand Down
11 changes: 11 additions & 0 deletions docs/source/development.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Developer Notes

## pytest

When running the python test suite, there may be test errors. Certain
tests `record` responses to remote queries for information. If tests
fail, they will appear to continue to fail as the queries are cached.

To perform tests using fresh queries from remote services, use
`pytest --disable-vcr`. In certain cases, clearing the cache is
also advised, use `pytest --clear-cache`.
6 changes: 4 additions & 2 deletions docs/source/faq.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ The Compliance Checker is completely open-source and available on [GitHub](https

## Disclaimer

The objective of the IOOS Compliance Checker is to check your file against our interpretation of select dataset metadata standards to use as a guideline in generating compliant files.
The compliance checker should not be considered the authoritative source on whether your file is 100% "compliant".
The objective of the IOOS Compliance Checker is to check your file against
our interpretation of select dataset metadata standards to use as a
guideline in generating compliant files. The compliance checker should
not be considered the authoritative source on whether your file is 100% "compliant".
Instead, we recommend that users use the results as a guide to work towards compliance.
1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Python tool to check your datasets against compliance standards.
quickintro
compliance_checker_api
faq
development

Indices and tables
==================
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
cf-units>=2
cftime>=1.1.0
importlib-metadata # drop this when dropping Python 3.8
importlib-resources # drop this when dropping Python 3.8
isodate>=0.6.1
jinja2>=2.7.3
Expand Down

0 comments on commit da97a6d

Please sign in to comment.