From 3034976556fe2a0f9fb61c6ff9575051f54ad195 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 16 Jan 2024 14:45:54 +0000 Subject: [PATCH 01/37] Correct/clarify a few regex rules --- checksit/rules/rules.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/checksit/rules/rules.py b/checksit/rules/rules.py index e92f1336..60c61327 100644 --- a/checksit/rules/rules.py +++ b/checksit/rules/rules.py @@ -23,7 +23,7 @@ def __init__(self): "integer": r"-?\d+", "valid-email": r"[^@\s]+@[^@\s]+\.[^\s@]+", "valid-url": r"https?://[^\s]+\.[^\s]*[^\s\.](/[^\s]+)?", - "valid-url-or-na": r"(https?://[^\s]+\.[^\s]*[^\s\.](/[^\s]+))|" + _NOT_APPLICABLE_RULES, + "valid-url-or-na": r"(https?://[^\s]+\.[^\s]*[^\s\.](/[^\s]+)?)|" + _NOT_APPLICABLE_RULES, "match:vN.M": r"v\d\.\d", "datetime": r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?", "datetime-or-na": @@ -32,9 +32,9 @@ def __init__(self): "location": r'(.)+(\,\ )(.)+', "latitude-image": r'[\+|\-]?[0-9]{1,2}\.[0-9]{0,6}', "longitude-image": r'[\+|\-]?1?[0-9]{1,2}\.[0-9]{0,6}', - "title": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?_(.)+_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)', + "title": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?(_.)?_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)', "title-data-product": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?_(plot|photo)((.)+)?_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)', - "name-format": r'(.)+, (.)+ ?((.)+|((.)\.))', + "name-format": r'([^,])+, ([^,])+( ?[^,]+|((.)\.))', "name-characters": r'[A-Za-z_À-ÿ\-\'\ \.\,]+', "altitude-image-warning": r'-?\d+\sm', # should be integers only for images "altitude-image": r'-?\d+(\.\d+)?\sm', From dea2d88038d2e50619623713f33917ff7af8d0b9 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 16 Jan 2024 14:46:22 +0000 Subject: [PATCH 02/37] Add tests for regex rules --- tests/test_rules.py | 150 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 137 insertions(+), 13 deletions(-) diff --git a/tests/test_rules.py b/tests/test_rules.py index 3d45c10e..f5136a1e 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -1,4 +1,6 @@ import os +import re +import pytest from checksit.rules import rules as r from checksit.rules.rule_funcs import match_file_name @@ -47,16 +49,138 @@ def test_type_rules(): for value in 3, 4.5, ["hi"]: assert tt(_type, value) != ([], []) -def test_regex_rules(): - rule = "regex-rule:integer" - assert r.check(rule, "-1") == ([], []) - assert r.check(rule, "500") == ([], []) - assert r.check(rule, "1.3") != ([], []) - - rule = "regex-rule:valid-email" - assert r.check(rule, "freda.bloggs@amail.com") == ([], []) - assert r.check(rule, "@amail.com") != ([], []) - assert r.check(rule, "freda.bloggs@") != ([], []) - -#TODO: Add checks for all the published rules -#TODO: Add checks for some regular expressions to check they are executed correctly +# static regex rule tests +@pytest.fixture +def rules(): + return r.static_regex_rules + +def test_integer_rule(rules): + assert re.fullmatch(rules['integer'], '123') + assert re.fullmatch(rules['integer'], '-123') + assert not re.fullmatch(rules['integer'], '123.45') + assert not re.fullmatch(rules['integer'], 'abc') + assert not re.fullmatch(rules['integer'], '') + +def test_valid_email_rule(rules): + assert re.fullmatch(rules['valid-email'], 'test@example.com') + assert re.fullmatch(rules['valid-email'], 'test.test@example.com') + assert not re.fullmatch(rules['valid-email'], 'test@example') + assert not re.fullmatch(rules['valid-email'], 'test@.com') + assert not re.fullmatch(rules['valid-email'], 'test@com') + +def test_valid_url_rule(rules): + assert re.fullmatch(rules['valid-url'], 'https://example.com') + assert re.fullmatch(rules['valid-url'], 'http://example.com') + assert not re.fullmatch(rules['valid-url'], 'htp://example.com') + assert not re.fullmatch(rules['valid-url'], 'https:/example.com') + assert not re.fullmatch(rules['valid-url'], 'https://example') + +def test_valid_url_or_na_rule(rules): + assert re.fullmatch(rules['valid-url-or-na'], 'https://example.com') + assert re.fullmatch(rules['valid-url-or-na'], 'http://example.com') + assert re.fullmatch(rules['valid-url-or-na'], 'N/A') + assert not re.fullmatch(rules['valid-url-or-na'], 'htp://example.com') + assert not re.fullmatch(rules['valid-url-or-na'], 'https:/example.com') + assert not re.fullmatch(rules['valid-url-or-na'], 'nan') + +def test_match_vN_M_rule(rules): + assert re.fullmatch(rules['match:vN.M'], 'v1.0') + assert re.fullmatch(rules['match:vN.M'], 'v2.1') + assert not re.fullmatch(rules['match:vN.M'], 'v10') + assert not re.fullmatch(rules['match:vN.M'], 'v1.01') + assert not re.fullmatch(rules['match:vN.M'], 'v.1.0') + +def test_datetime_rule(rules): + assert re.fullmatch(rules['datetime'], '2022-01-01T00:00:00') + assert re.fullmatch(rules['datetime'], '2022-01-01T00:00:00.123') + assert not re.fullmatch(rules['datetime'], '2022-01-01 00:00:00') + assert not re.fullmatch(rules['datetime'], '2022-01-01T00:00') + assert not re.fullmatch(rules['datetime'], '2022-01-01') + +def test_datetime_or_na_rule(rules): + assert re.fullmatch(rules['datetime-or-na'], '2022-01-01T00:00:00') + assert re.fullmatch(rules['datetime-or-na'], '2022-01-01T00:00:00.123') + assert re.fullmatch(rules['datetime-or-na'], 'N/A') + assert re.fullmatch(rules['datetime-or-na'], 'NA') + assert re.fullmatch(rules['datetime-or-na'], 'Not Applicable') + assert not re.fullmatch(rules['datetime-or-na'], '2022-01-01 00:00:00') + assert not re.fullmatch(rules['datetime-or-na'], '2022-01-01T00:00') + assert not re.fullmatch(rules['datetime-or-na'], '2022-01-01') + +def test_number_rule(rules): + assert re.fullmatch(rules['number'], '123.45') + assert re.fullmatch(rules['number'], '-123.45') + assert re.fullmatch(rules['number'], '-123.') + assert not re.fullmatch(rules['number'], 'abc') + assert not re.fullmatch(rules['number'], '') + assert not re.fullmatch(rules['number'], '123.45abc') + +def test_location_rule(rules): + assert re.fullmatch(rules['location'], 'City, Country') + assert re.fullmatch(rules['location'], 'City, Country, State') + assert not re.fullmatch(rules['location'], 'City Country') + assert not re.fullmatch(rules['location'], 'City,') + assert not re.fullmatch(rules['location'], ',Country') + +def test_latitude_image_rule(rules): + assert re.fullmatch(rules['latitude-image'], '+12.345678') + assert re.fullmatch(rules['latitude-image'], '-12.345678') + assert not re.fullmatch(rules['latitude-image'], '123.45') + assert not re.fullmatch(rules['latitude-image'], '+123.456789') + assert not re.fullmatch(rules['latitude-image'], '-123.456789') + +def test_longitude_image_rule(rules): + assert re.fullmatch(rules['longitude-image'], '+123.45678') + assert re.fullmatch(rules['longitude-image'], '-123.45678') + assert not re.fullmatch(rules['longitude-image'], '123') + assert not re.fullmatch(rules['longitude-image'], '+1234.56789') + assert not re.fullmatch(rules['longitude-image'], '-1234.56789') + +def test_title_rule(rules): + assert re.fullmatch(rules['title'], 'prefix_suffix_2022_v1.0.png') + assert re.fullmatch(rules['title'], 'prefix_suffix_2022_v1.0.jpg') + assert not re.fullmatch(rules['title'], 'prefix_suffix_2022_v1.0.txt') + assert not re.fullmatch(rules['title'], 'prefix_suffix_2022_v1.png') + assert not re.fullmatch(rules['title'], 'prefix_suffix_2022_v1.0') + +def test_title_data_product_rule(rules): + assert re.fullmatch(rules['title-data-product'], 'prefix_suffix_2022_plot_v1.0.png') + assert re.fullmatch(rules['title-data-product'], 'prefix_suffix_2022_photo_v1.0.jpg') + assert not re.fullmatch(rules['title-data-product'], 'prefix_suffix_2022_v1.0.txt') + assert not re.fullmatch(rules['title-data-product'], 'prefix_suffix_2022_plot_v1.png') + assert not re.fullmatch(rules['title-data-product'], 'prefix_suffix_2022_plot_v1.0') + +def test_name_format_rule(rules): + assert re.fullmatch(rules['name-format'], 'Last, First M.') + assert re.fullmatch(rules['name-format'], 'Last, First') + assert not re.fullmatch(rules['name-format'], 'First Last') + assert not re.fullmatch(rules['name-format'], 'Last, First, M.') + assert not re.fullmatch(rules['name-format'], 'Last First M.') + +def test_name_characters_rule(rules): + assert re.fullmatch(rules['name-characters'], 'John_Doe') + assert re.fullmatch(rules['name-characters'], 'John-Doe') + assert not re.fullmatch(rules['name-characters'], 'John Doe!') + assert not re.fullmatch(rules['name-characters'], 'John Doe@') + assert not re.fullmatch(rules['name-characters'], 'John Doe#') + +def test_altitude_image_warning_rule(rules): + assert re.fullmatch(rules['altitude-image-warning'], '123 m') + assert re.fullmatch(rules['altitude-image-warning'], '-123 m') + assert not re.fullmatch(rules['altitude-image-warning'], '123.45 m') + assert not re.fullmatch(rules['altitude-image-warning'], '123') + assert not re.fullmatch(rules['altitude-image-warning'], '123m') + +def test_altitude_image_rule(rules): + assert re.fullmatch(rules['altitude-image'], '123.45 m') + assert re.fullmatch(rules['altitude-image'], '-123.45 m') + assert not re.fullmatch(rules['altitude-image'], '123') + assert not re.fullmatch(rules['altitude-image'], '123.45') + assert not re.fullmatch(rules['altitude-image'], '123.45m') + +def test_ncas_email_rule(rules): + assert re.fullmatch(rules['ncas-email'], 'test@ncas.ac.uk') + assert re.fullmatch(rules['ncas-email'], 'test.test@ncas.ac.uk') + assert not re.fullmatch(rules['ncas-email'], 'test@example.com') + assert not re.fullmatch(rules['ncas-email'], 'test@ncas.com') + assert not re.fullmatch(rules['ncas-email'], 'test@ncas.ac') \ No newline at end of file From 2bd1eb7a183f2882ffc56ddde03aefafbe50e280 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 16 Jan 2024 14:47:52 +0000 Subject: [PATCH 03/37] Correct test on number rule --- tests/test_rules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_rules.py b/tests/test_rules.py index f5136a1e..c7d98fc5 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -110,7 +110,7 @@ def test_datetime_or_na_rule(rules): def test_number_rule(rules): assert re.fullmatch(rules['number'], '123.45') assert re.fullmatch(rules['number'], '-123.45') - assert re.fullmatch(rules['number'], '-123.') + assert not re.fullmatch(rules['number'], '-123.') assert not re.fullmatch(rules['number'], 'abc') assert not re.fullmatch(rules['number'], '') assert not re.fullmatch(rules['number'], '123.45abc') From 256dd0d7496835263613c7e47770308d32734763 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Wed, 17 Jan 2024 10:49:18 +0000 Subject: [PATCH 04/37] Re-arrange checks in image_date_time and headline --- checksit/rules/rule_funcs.py | 42 ++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/checksit/rules/rule_funcs.py b/checksit/rules/rule_funcs.py index 99237cae..b73d17a0 100644 --- a/checksit/rules/rule_funcs.py +++ b/checksit/rules/rule_funcs.py @@ -42,7 +42,8 @@ def match_file_name(value, context, extras=None, label=""): def match_one_of(value, context, extras=None, label=""): """ - Matches only one of... + value matches one of options defined in extras + default rule splitter is '|' and defined in checksit.ini file """ options = [x.strip() for x in extras[0].split(rule_splitter)] errors = [] @@ -55,7 +56,7 @@ def match_one_of(value, context, extras=None, label=""): def match_one_or_more_of(value, context, extras=None, label=""): """ - Matches one of more of... + String value or list value must match one of more of list given in extras """ def as_set(x, sep): return set([i.strip() for i in x.split(sep)]) options = as_set(extras[0], rule_splitter) @@ -93,10 +94,15 @@ def validate_image_date_time(value, context, extras=None, label=""): """ errors = [] - try: - if value != datetime.strptime(value, "%Y:%m:%d %H:%M:%S").strftime("%Y:%m:%d %H:%M:%S") and value != datetime.strptime(value, "%Y:%m:%d #%H:%M:%S.%f").strftime("%Y:%m:%d %H:%M:%S.%f"): - errors.append(f"{label} '{value}' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s") - except ValueError: + match = False + for f in ["%Y:%m:%d %H:%M:%S", "%Y:%m:%d %H:%M:%S.%f"]: + if match == False: + try: + match = (value == datetime.strptime(value, f).strftime(f)) + except ValueError: + pass + + if not match: errors.append(f"{label} '{value}' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s") return errors @@ -126,7 +132,7 @@ def validate_orcid_ID(value, context, extras=None, label=""): value[32] != "-" or # Check that the last characters contain only "-" and digits - not PI_orcid_digits_only.isdigit): + not PI_orcid_digits_only.isdigit()): errors.append(f"{label} '{value}' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX") @@ -164,17 +170,21 @@ def headline(value, context, extras=None, label=""): """ warnings = [] - if len(value) > 150: - warnings.append(f"{label} '{value}' should contain no more than one sentence") + if value == "": + warnings.append(f"{label} '{value}' should not be empty") + + else: + if len(value) > 150: + warnings.append(f"{label} '{value}' should contain no more than one sentence") - if value.count(".") >= 2: - warnings.append(f"{label} '{value}' should contain no more than one sentence") + if value.count(".") >= 2: + warnings.append(f"{label} '{value}' should contain no more than one sentence") - if not value[0].isupper(): - warnings.append(f"{label} '{value}' should start with a capital letter") + if not value[0].isupper(): + warnings.append(f"{label} '{value}' should start with a capital letter") - if len(value) < 10: - warnings.append(f"{label} '{value}' should be at least 10 characters") + if len(value) < 10: + warnings.append(f"{label} '{value}' should be at least 10 characters") return warnings @@ -218,7 +228,7 @@ def relation_url_checker(value, context, extras=None, label=""): else: relation_url = value.partition(" ")[2] # extract only the url part of the relation string if url_checker(relation_url, context, extras, label) != []: - errors.append(url_checker(relation_url, context, extras, label)) # check the url exists using the url_checker() function defined above + errors.extend(url_checker(relation_url, context, extras, label)) # check the url exists using the url_checker() function defined above return errors From 4b82cda3f09fa7aa66958183d028cbd3a9234537 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Wed, 17 Jan 2024 10:49:39 +0000 Subject: [PATCH 05/37] More tests for rule_funcs --- tests/test_rules.py | 204 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 201 insertions(+), 3 deletions(-) diff --git a/tests/test_rules.py b/tests/test_rules.py index c7d98fc5..5ca4de20 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -1,11 +1,12 @@ import os import re import pytest +from numbers import Number from checksit.rules import rules as r -from checksit.rules.rule_funcs import match_file_name - +from checksit.rules.rule_funcs import match_file_name, string_of_length, match_one_of, match_one_or_more_of, validate_image_date_time, validate_orcid_ID, list_of_names, headline, title_check, url_checker, relation_url_checker, latitude, longitude +# rule_funcs.py def test_match_file_name(): file_path = "happy_netcdf" value = "happy_NetCDF.nc" @@ -14,6 +15,193 @@ def test_match_file_name(): assert len(match_file_name(value, context, ["lowercase"])) == 1 assert len(match_file_name(value, context, ["lowercase", "no_extension"])) == 0 + +def test_string_of_length(): + # Test that the function correctly handles strings of the minimum length + assert string_of_length('abc', {}, ['3'], 'Test') == [] + assert string_of_length('abcd', {}, ['3+'], 'Test') == [] + + # Test that the function correctly handles strings shorter than the minimum length + assert string_of_length('ab', {}, ['3'], 'Test') == ["Test 'ab' must be exactly 3 characters"] + assert string_of_length('ab', {}, ['3+'], 'Test') == ["Test 'ab' must be at least 3 characters"] + + # Test that the function correctly handles strings longer than the minimum length + assert string_of_length('abcd', {}, ['3'], 'Test') == ["Test 'abcd' must be exactly 3 characters"] + assert string_of_length('abcd', {}, ['3+'], 'Test') == [] + + # Test that the function correctly handles empty strings + assert string_of_length('', {}, ['0'], 'Test') == [] + assert string_of_length('', {}, ['1'], 'Test') == ["Test '' must be exactly 1 characters"] + assert string_of_length('', {}, ['1+'], 'Test') == ["Test '' must be at least 1 characters"] + + +def test_match_one_of(): + # Test that the function correctly handles valid inputs + assert match_one_of('apple', {}, ['apple|banana|orange'], 'Test') == [] + + # Test that the function correctly handles invalid inputs + assert match_one_of('kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'kiwi' must be one of: '['apple', 'banana', 'orange']'"] + + # Test that the function correctly handles empty strings + assert match_one_of('', {}, ['apple|banana|orange'], 'Test') == ["Test '' must be one of: '['apple', 'banana', 'orange']'"] + + +def test_match_one_or_more_of(): + # Test that the function correctly handles valid inputs + assert match_one_or_more_of('apple,banana', {}, ['apple|banana|orange'], 'Test') == [] + assert match_one_or_more_of('apple', {}, ['apple|banana|orange'], 'Test') == [] + + # Test that the function correctly handles invalid inputs + assert match_one_or_more_of('apple,kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'apple,kiwi' must be one or more of: '['apple', 'banana', 'orange']'"] + assert match_one_or_more_of('kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'kiwi' must be one or more of: '['apple', 'banana', 'orange']'"] + + # Test that the function correctly handles empty strings + assert match_one_or_more_of('', {}, ['apple|banana|orange'], 'Test') == ["Test '' must be one or more of: '['apple', 'banana', 'orange']'"] + + +def test_validate_image_date_time(): + # Test that the function correctly handles valid date-time strings + assert validate_image_date_time('2022:01:01 12:00:00', {}, label = 'Test') == [] + assert validate_image_date_time('2022:01:01 12:00:00.000000', {}, label = 'Test') == [] + + # Test that the function correctly handles invalid date-time strings + assert validate_image_date_time('2022-01-01 12:00:00', {}, label = 'Test') == ["Test '2022-01-01 12:00:00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"] + assert validate_image_date_time('2022:01:01 12:00', {}, label = 'Test') == ["Test '2022:01:01 12:00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"] + assert validate_image_date_time('2022:01:01', {}, label = 'Test') == ["Test '2022:01:01' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"] + assert validate_image_date_time('2022:01:01 12:00:00.00', {}, label = 'Test') == ["Test '2022:01:01 12:00:00.00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"] + + # Test that the function correctly handles empty strings + assert validate_image_date_time('', {}, label = 'Test') == ["Test '' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"] + + +def test_validate_orcid_ID(): + # Test that the function correctly handles valid ORCID IDs + assert validate_orcid_ID('https://orcid.org/0000-0002-1825-0097', {}, label='Test') == [] + assert validate_orcid_ID('https://orcid.org/1234-5678-9012-3456', {}, label='Test') == [] + + # Test that the function correctly handles ORCID IDs with incorrect lengths + assert validate_orcid_ID('https://orcid.org/0000-0002-1825-009', {}, label='Test') == ["Test 'https://orcid.org/0000-0002-1825-009' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] + assert validate_orcid_ID('https://orcid.org/1234-5678-9012-34567', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-34567' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] + + # Test that the function correctly handles ORCID IDs with incorrect formats + assert validate_orcid_ID('https://orcid.org/0000-0002-1825-009Z', {}, label='Test') == ["Test 'https://orcid.org/0000-0002-1825-009Z' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] + assert validate_orcid_ID('https://orcid.org/1234-5678-9012-345X', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-345X' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] + assert validate_orcid_ID('https://orcid.org/1234-5678-9012-3456-', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456-' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] + assert validate_orcid_ID('https://orcid.org/1234-5678-9012-3456X', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456X' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] + + # Test that the function correctly handles empty strings + assert validate_orcid_ID('', {}, label='Test') == ["Test '' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] + + +def test_list_of_names(): + # Test that the function correctly handles valid names + assert list_of_names('Doe, John', {}, label='Test') == [] + assert list_of_names('Doe, John J.', {}, label='Test') == [] + assert list_of_names(['Doe, John', 'Smith, Jane'], {}, label='Test') == [] + + # Test that the function correctly handles names with incorrect formats + assert list_of_names('John Doe', {}, label='Test') == ["Test 'John Doe' should be of the format , or , where appropriate"] + assert list_of_names('Doe John', {}, label='Test') == ["Test 'Doe John' should be of the format , or , where appropriate"] + assert list_of_names(['Doe, John', 'Jane Smith'], {}, label='Test') == ["Test '['Doe, John', 'Jane Smith']' should be of the format , or , where appropriate"] + + # Test that the function correctly handles names with invalid characters + assert list_of_names('Doe, J0hn', {}, label='Test') == ["Test 'Doe, J0hn' - please use characters A-Z, a-z, À-ÿ where appropriate"] + assert list_of_names('Doe, John!', {}, label='Test') == ["Test 'Doe, John!' - please use characters A-Z, a-z, À-ÿ where appropriate"] + assert list_of_names(['Doe, John', 'Smith, J@ne'], {}, label='Test') == ["Test '['Doe, John', 'Smith, J@ne']' - please use characters A-Z, a-z, À-ÿ where appropriate"] + + # Test that the function correctly handles empty strings + assert list_of_names('', {}, label='Test') == ["Test '' should be of the format , or , where appropriate", "Test '' - please use characters A-Z, a-z, À-ÿ where appropriate"] + assert list_of_names([], {}, label='Test') == [] + + +def test_headline(): + # Test that the function correctly handles valid headlines + assert headline('This is a valid headline.', {}, label='Test') == [] + assert headline('This headline is exactly 150 characters long ' + 'a' * 105, {}, label='Test') == [] + assert headline('This headline is exactly 10 characters.', {}, label='Test') == [] + + # Test that the function correctly handles headlines longer than 150 characters + assert headline('This headline is longer than 150 characters.' + 'a' * 120, {}, label='Test') == ["Test 'This headline is longer than 150 characters." + "a" * 120 + "' should contain no more than one sentence"] + + # Test that the function correctly handles headlines with more than one sentence + assert headline('This is a headline. It has two sentences.', {}, label='Test') == ["Test 'This is a headline. It has two sentences.' should contain no more than one sentence"] + + # Test that the function correctly handles headlines that do not start with a capital letter + assert headline('this headline does not start with a capital letter.', {}, label='Test') == ["Test 'this headline does not start with a capital letter.' should start with a capital letter"] + + # Test that the function correctly handles headlines shorter than 10 characters + assert headline('Too short', {}, label='Test') == ["Test 'Too short' should be at least 10 characters"] + + # Test that the function correctly handles empty strings + assert headline('', {}, label='Test') == ["Test '' should not be empty"] + + +def test_title_check(): + # Test that the function correctly handles titles that match the filename + assert title_check('happy_netcdf', "/path/to/file/happy_netcdf", label='Test') == [] + assert title_check('happy_NetCDF.nc', "/path/to/file/happy_NetCDF.nc", label='Test') == [] + + # Test that the function correctly handles titles that do not match the filename + assert title_check('sad_netcdf', "/path/to/file/happy_netcdf", label='Test') == ["Test 'sad_netcdf' must match the name of the file"] + assert title_check('happy_NetCDF.nc', "/path/to/file/sad_NetCDF.nc", label='Test') == ["Test 'happy_NetCDF.nc' must match the name of the file"] + + # Test that the function correctly handles empty titles + assert title_check('', "/path/to/file/happy_netcdf", label='Test') == ["Test '' must match the name of the file"] + + +def test_url_checker(): + # Test that the function correctly handles a reachable URL + assert url_checker("https://www.example.com", {}, label="Test") == [] + + # Test that the function correctly handles an unreachable URL + assert url_checker("https://www.nonexistenturl.com", {}, label="Test") == ["Test 'https://www.nonexistenturl.com' is not a reachable url"] + + # Test that the function correctly handles an existing but unreachable URL + assert url_checker("https://www.example.com/nonexistentpage", {}, label="Test") == ["Test 'https://www.example.com/nonexistentpage' is not a reachable url"] + + # Test that the function correctly handles an empty URL + assert url_checker("", {}, label="Test") == ["Test '' is not a reachable url"] + + +def test_relation_url_checker(): + # Test that the function correctly handles valid inputs + assert relation_url_checker('relation https://example.com', {}, label='Test') == [] + + # Test that the function correctly handles inputs without a space + assert relation_url_checker('relationhttps://example.com', {}, label='Test') == ["Test 'relationhttps://example.com' should contain a space before the url"] + + # Test that the function correctly handles inputs with an invalid URL + assert relation_url_checker('relation https://', {}, label='Test') == ["Test 'https://' is not a reachable url"] + + # Test that the function correctly handles empty strings + assert relation_url_checker('', {}, label='Test') == ["Test '' should contain a space before the url"] + + +def test_latitude(): + # Test that the function correctly handles valid latitudes + assert latitude('45.1234', {}, label='Test') == [] + assert latitude('-90.0000', {}, label='Test') == [] + assert latitude('90.0000', {}, label='Test') == [] + + # Test that the function correctly handles invalid latitudes + assert latitude('90.0001', {}, label='Test') == ["Test '90.0001' must be within -90 and +90 "] + assert latitude('-90.0001', {}, label='Test') == ["Test '-90.0001' must be within -90 and +90 "] + assert latitude('100.0000', {}, label='Test') == ["Test '100.0000' must be within -90 and +90 "] + + +def test_longitude(): + # Test that the function correctly handles valid longitudes + assert longitude('45.1234', {}, label='Test') == [] + assert longitude('-180.0000', {}, label='Test') == [] + assert longitude('180.0000', {}, label='Test') == [] + + # Test that the function correctly handles invalid longitudes + assert longitude('180.0001', {}, label='Test') == ["Test '180.0001' must be within -180 and +180 "] + assert longitude('-180.0001', {}, label='Test') == ["Test '-180.0001' must be within -180 and +180 "] + assert longitude('200.0000', {}, label='Test') == ["Test '200.0000' must be within -180 and +180 "] + + +# rules.py def _test_type(_type, value): return r.check(f"type-rule:{_type}", value) @@ -183,4 +371,14 @@ def test_ncas_email_rule(rules): assert re.fullmatch(rules['ncas-email'], 'test.test@ncas.ac.uk') assert not re.fullmatch(rules['ncas-email'], 'test@example.com') assert not re.fullmatch(rules['ncas-email'], 'test@ncas.com') - assert not re.fullmatch(rules['ncas-email'], 'test@ncas.ac') \ No newline at end of file + assert not re.fullmatch(rules['ncas-email'], 'test@ncas.ac') + +def test_map_type_rule(): + assert r._map_type_rule('number') == Number + assert r._map_type_rule('integer') == int + assert r._map_type_rule('int') == int + assert r._map_type_rule('float') == float + assert r._map_type_rule('string') == str + assert r._map_type_rule('str') == str + with pytest.raises(KeyError): + r._map_type_rule('nonexistent') \ No newline at end of file From 86ce55061d6e0a359e50ec39fc619b6171ff6fc4 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Wed, 17 Jan 2024 13:52:49 +0000 Subject: [PATCH 06/37] See if check should be warning first --- checksit/rules/rules.py | 44 ++++++++++++----------------------------- 1 file changed, 13 insertions(+), 31 deletions(-) diff --git a/checksit/rules/rules.py b/checksit/rules/rules.py index 60c61327..b27df912 100644 --- a/checksit/rules/rules.py +++ b/checksit/rules/rules.py @@ -66,57 +66,39 @@ def check(self, rule_lookup, value, context=None, label=""): for i in rule_lookup_list: - if i.startswith("rule-func:"): - rule_comps = i.split(":") - rule_func = getattr(rule_funcs, rule_comps[1].replace("-", "_")) - extras = rule_comps[2:] - errors.extend(rule_func(value, context, extras, label=label)) + if i.split(":")[0].endswith("-warning"): + output = warnings + else: + output = errors - elif i.startswith("rule-func-warning:"): + if i.startswith("rule-func"): rule_comps = i.split(":") rule_func = getattr(rule_funcs, rule_comps[1].replace("-", "_")) extras = rule_comps[2:] - warnings.extend(rule_func(value, context, extras, label=label)) + output.extend(rule_func(value, context, extras, label=label)) elif i.startswith("type-rule"): type_rule = i.split(":")[1] if not isinstance(value, self._map_type_rule(type_rule)): - errors.append(f"{label} Value '{value}' is not of required type: '{type_rule}'.") - - elif i.startswith("regex-warning:"): - pattern = ':'.join(i.split(":")[1:]) # in case pattern has colons in it, e.g. a URL - if not re.match(f"^{pattern}$", value): - warnings.append(f"{label} Value '{value}' does not match regular expression: '{pattern}'.") + output.append(f"{label} Value '{value}' is not of required type: '{type_rule}'.") - elif i.startswith("regex:"): - pattern = ':'.join(i.split(":")[1:]) # in case pattern has colons in it, e.g. a URL - if not re.match(f"^{pattern}$", value): - errors.append(f"{label} Value '{value}' does not match regular expression: '{pattern}'.") - - elif i.startswith("regex-rule-warning:"): + elif i.startswith("regex-rule"): regex_rule = i.split(":", 1)[1] if regex_rule in self.static_regex_rules: pattern = self.static_regex_rules[regex_rule] if not re.match("^" + pattern + "$", value): - warnings.append(f"{label} Value '{value}' does not match regex rule: '{regex_rule}'.") + output.append(f"{label} Value '{value}' does not match regex rule: '{regex_rule}'.") else: raise Exception(f"Rule not found with rule ID: {rule_lookup}.") - elif i.startswith("regex-rule:"): - regex_rule = i.split(":", 1)[1] - - if regex_rule in self.static_regex_rules: - pattern = self.static_regex_rules[regex_rule] - - if not re.match("^" + pattern + "$", value): - errors.append(f"{label} Value '{value}' does not match regex rule: '{regex_rule}'.") - - else: - raise Exception(f"Rule not found with rule ID: {rule_lookup}.") + elif i.startswith("regex"): + pattern = ':'.join(i.split(":")[1:]) # in case pattern has colons in it, e.g. a URL + if not re.match(f"^{pattern}$", value): + output.append(f"{label} Value '{value}' does not match regular expression: '{pattern}'.") else: raise Exception(f"Rule not found with rule ID: {rule_lookup}.") From 6b7e169174b6664c9bba414a4c21951c21bbed25 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Wed, 17 Jan 2024 13:53:46 +0000 Subject: [PATCH 07/37] Change ORCID tests --- tests/test_rules.py | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/tests/test_rules.py b/tests/test_rules.py index 5ca4de20..6b87acd2 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -78,6 +78,7 @@ def test_validate_orcid_ID(): # Test that the function correctly handles valid ORCID IDs assert validate_orcid_ID('https://orcid.org/0000-0002-1825-0097', {}, label='Test') == [] assert validate_orcid_ID('https://orcid.org/1234-5678-9012-3456', {}, label='Test') == [] + assert validate_orcid_ID('https://orcid.org/1234-5678-9012-345X', {}, label='Test') == [] # Test that the function correctly handles ORCID IDs with incorrect lengths assert validate_orcid_ID('https://orcid.org/0000-0002-1825-009', {}, label='Test') == ["Test 'https://orcid.org/0000-0002-1825-009' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] @@ -85,7 +86,6 @@ def test_validate_orcid_ID(): # Test that the function correctly handles ORCID IDs with incorrect formats assert validate_orcid_ID('https://orcid.org/0000-0002-1825-009Z', {}, label='Test') == ["Test 'https://orcid.org/0000-0002-1825-009Z' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] - assert validate_orcid_ID('https://orcid.org/1234-5678-9012-345X', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-345X' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] assert validate_orcid_ID('https://orcid.org/1234-5678-9012-3456-', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456-' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] assert validate_orcid_ID('https://orcid.org/1234-5678-9012-3456X', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456X' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] @@ -381,4 +381,37 @@ def test_map_type_rule(): assert r._map_type_rule('string') == str assert r._map_type_rule('str') == str with pytest.raises(KeyError): - r._map_type_rule('nonexistent') \ No newline at end of file + r._map_type_rule('nonexistent') + +def test_check(): + rules_instance = r + + # Test that the function correctly handles rule-func + assert rules_instance.check("rule-func:string_of_length:3", "abc", {}, label="Test") == ([], []) + assert rules_instance.check("rule-func:string_of_length:3", "abcd", {}, label="Test") == (["Test 'abcd' must be exactly 3 characters"], []) + + # Test that the function correctly handles rule-func-warning + assert rules_instance.check("rule-func-warning:string_of_length:3", "abc", {}, label="Test") == ([], []) + assert rules_instance.check("rule-func-warning:string_of_length:3", "abcd", {}, label="Test") == ([], ["Test 'abcd' must be exactly 3 characters"]) + + # Test that the function correctly handles type-rule + assert rules_instance.check("type-rule:int", 123, {}, label="Test") == ([], []) + assert rules_instance.check("type-rule:int", "abc", {}, label="Test") == (["Test Value 'abc' is not of required type: 'int'."], []) + + # Test that the function correctly handles regex-warning + assert rules_instance.check("regex-warning:^[a-z]+$", "abc", {}, label="Test") == ([], []) + assert rules_instance.check("regex-warning:^[a-z]+$", "ABC", {}, label="Test") == ([], ["Test Value 'ABC' does not match regular expression: '^[a-z]+$'."]) + + # Test that the function correctly handles regex + assert rules_instance.check("regex:^[a-z]+$", "abc", {}, label="Test") == ([], []) + assert rules_instance.check("regex:^[a-z]+$", "ABC", {}, label="Test") == (["Test Value 'ABC' does not match regular expression: '^[a-z]+$'."], []) + + # Test that the function correctly handles regex-rule-warning + rules_instance.static_regex_rules = {"lowercase": "^[a-z]+$"} + assert rules_instance.check("regex-rule-warning:lowercase", "abc", {}, label="Test") == ([], []) + assert rules_instance.check("regex-rule-warning:lowercase", "ABC", {}, label="Test") == ([], ["Test Value 'ABC' does not match regex rule: 'lowercase'."]) + + # Test that the function correctly handles regex-rule + rules_instance.static_regex_rules = {"lowercase": "^[a-z]+$"} + assert rules_instance.check("regex-rule:lowercase", "abc", {}, label="Test") == ([], []) + assert rules_instance.check("regex-rule:lowercase", "ABC", {}, label="Test") == (["Test Value 'ABC' does not match regex rule: 'lowercase'."], []) \ No newline at end of file From 96399c130570ddc013c575fe204fccacfad6648c Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Wed, 17 Jan 2024 13:58:26 +0000 Subject: [PATCH 08/37] Allow ORCID to end with "X" --- checksit/rules/rule_funcs.py | 7 +++++-- tests/test_rules.py | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/checksit/rules/rule_funcs.py b/checksit/rules/rule_funcs.py index b73d17a0..604f9cfc 100644 --- a/checksit/rules/rule_funcs.py +++ b/checksit/rules/rule_funcs.py @@ -131,8 +131,11 @@ def validate_orcid_ID(value, context, extras=None, label=""): value[27] != "-" or value[32] != "-" or - # Check that the last characters contain only "-" and digits - not PI_orcid_digits_only.isdigit()): + # Check that the last characters contain only "-" and digits (plus 'X' for last digit) + not ( + PI_orcid_digits_only.isdigit() or (PI_orcid_digits_only[0:15].isdigit() and PI_orcid_digits_only[15] == "X") + ) + ): errors.append(f"{label} '{value}' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX") diff --git a/tests/test_rules.py b/tests/test_rules.py index 6b87acd2..13b36b00 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -86,6 +86,7 @@ def test_validate_orcid_ID(): # Test that the function correctly handles ORCID IDs with incorrect formats assert validate_orcid_ID('https://orcid.org/0000-0002-1825-009Z', {}, label='Test') == ["Test 'https://orcid.org/0000-0002-1825-009Z' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] + assert validate_orcid_ID('https://orcid.org/1234-5678-9012-34X5', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-34X5' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] assert validate_orcid_ID('https://orcid.org/1234-5678-9012-3456-', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456-' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] assert validate_orcid_ID('https://orcid.org/1234-5678-9012-3456X', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456X' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] From 5181d76c273dfb5309bafc24d1ae9582fefe2bbf Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Wed, 17 Jan 2024 14:37:30 +0000 Subject: [PATCH 09/37] Correct the mistake I made when "correcting" the title regex check --- checksit/rules/rules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/checksit/rules/rules.py b/checksit/rules/rules.py index b27df912..c872617b 100644 --- a/checksit/rules/rules.py +++ b/checksit/rules/rules.py @@ -32,7 +32,7 @@ def __init__(self): "location": r'(.)+(\,\ )(.)+', "latitude-image": r'[\+|\-]?[0-9]{1,2}\.[0-9]{0,6}', "longitude-image": r'[\+|\-]?1?[0-9]{1,2}\.[0-9]{0,6}', - "title": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?(_.)?_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)', + "title": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?(_.+)?_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)', "title-data-product": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?_(plot|photo)((.)+)?_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)', "name-format": r'([^,])+, ([^,])+( ?[^,]+|((.)\.))', "name-characters": r'[A-Za-z_À-ÿ\-\'\ \.\,]+', From 00932aed45afae3d7e72cad6dbeda354a8c4c8ad Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Thu, 18 Jan 2024 09:17:07 +0000 Subject: [PATCH 10/37] Change to exception messages on invalid rules --- checksit/rules/rules.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/checksit/rules/rules.py b/checksit/rules/rules.py index c872617b..568a96f6 100644 --- a/checksit/rules/rules.py +++ b/checksit/rules/rules.py @@ -93,15 +93,15 @@ def check(self, rule_lookup, value, context=None, label=""): output.append(f"{label} Value '{value}' does not match regex rule: '{regex_rule}'.") else: - raise Exception(f"Rule not found with rule ID: {rule_lookup}.") + raise Exception(f"Regex rule not found with rule ID: {i}.") elif i.startswith("regex"): - pattern = ':'.join(i.split(":")[1:]) # in case pattern has colons in it, e.g. a URL + pattern = i.split(":", 1)[1] # in case pattern has colons in it, e.g. a URL if not re.match(f"^{pattern}$", value): output.append(f"{label} Value '{value}' does not match regular expression: '{pattern}'.") - + else: - raise Exception(f"Rule not found with rule ID: {rule_lookup}.") + raise Exception(f"Rule not found with rule ID: {i}.") return errors, warnings From 3275a7a3052dae47c50bd6efc0e2947ae70b1b3d Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Thu, 18 Jan 2024 09:17:40 +0000 Subject: [PATCH 11/37] Use already existing static regex rules rather than making some up --- tests/test_rules.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/test_rules.py b/tests/test_rules.py index 13b36b00..8056fcce 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -408,11 +408,9 @@ def test_check(): assert rules_instance.check("regex:^[a-z]+$", "ABC", {}, label="Test") == (["Test Value 'ABC' does not match regular expression: '^[a-z]+$'."], []) # Test that the function correctly handles regex-rule-warning - rules_instance.static_regex_rules = {"lowercase": "^[a-z]+$"} - assert rules_instance.check("regex-rule-warning:lowercase", "abc", {}, label="Test") == ([], []) - assert rules_instance.check("regex-rule-warning:lowercase", "ABC", {}, label="Test") == ([], ["Test Value 'ABC' does not match regex rule: 'lowercase'."]) + assert rules_instance.check("regex-rule-warning:integer", "123", {}, label="Test") == ([], []) + assert rules_instance.check("regex-rule-warning:integer", "123.45", {}, label="Test") == ([], ["Test Value '123.45' does not match regex rule: 'integer'."]) # Test that the function correctly handles regex-rule - rules_instance.static_regex_rules = {"lowercase": "^[a-z]+$"} - assert rules_instance.check("regex-rule:lowercase", "abc", {}, label="Test") == ([], []) - assert rules_instance.check("regex-rule:lowercase", "ABC", {}, label="Test") == (["Test Value 'ABC' does not match regex rule: 'lowercase'."], []) \ No newline at end of file + assert rules_instance.check("regex-rule:integer", "123", {}, label="Test") == ([], []) + assert rules_instance.check("regex-rule:integer", "123.45", {}, label="Test") == (["Test Value '123.45' does not match regex rule: 'integer'."], []) From b02fc871f9dca9b696b1ab0dc85db61b4e5298b3 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Thu, 18 Jan 2024 09:18:11 +0000 Subject: [PATCH 12/37] Dummy specs for testing show specs --- specs/groups/tests/test.yml | 13 +++++++++++++ tests/test_show_specs.py | 23 +++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 specs/groups/tests/test.yml create mode 100644 tests/test_show_specs.py diff --git a/specs/groups/tests/test.yml b/specs/groups/tests/test.yml new file mode 100644 index 00000000..2b947ec0 --- /dev/null +++ b/specs/groups/tests/test.yml @@ -0,0 +1,13 @@ +var-requires: + func: checksit.generic.check_var_attrs + params: + defined_attrs: + - long_name + +required-global-attrs: + func: checksit.generic.check_dim_exists + params: + dimensions: + - time + + diff --git a/tests/test_show_specs.py b/tests/test_show_specs.py new file mode 100644 index 00000000..640ddab6 --- /dev/null +++ b/tests/test_show_specs.py @@ -0,0 +1,23 @@ +import pytest +import json +from checksit.specs import show_specs + + +def test_show_specs_all(capsys): + # Call the function + show_specs(["tests/test"]) + + # Capture the output of the print statements + captured = capsys.readouterr() + + # Check that the print function was called with the correct arguments + expected_output = ( + 'Specifications:\n\ntests/test:\n{\n "var-requires": {\n' + ' "func": "checksit.generic.check_var_attrs",\n "params": {\n' + ' "defined_attrs": [\n "long_name"\n ]\n' + ' }\n },\n "required-global-attrs": {\n' + ' "func": "checksit.generic.check_dim_exists",\n "params": {\n' + ' "dimensions": [\n "time"\n' + ' ]\n }\n }\n}\n' + ) + assert captured.out == expected_output From 7d7dcdbf6cc459a675fb265b12e3a5b244676e3c Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Thu, 18 Jan 2024 09:31:17 +0000 Subject: [PATCH 13/37] Correct printing by show-specs when no specs given --- checksit/specs.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/checksit/specs.py b/checksit/specs.py index 562e80aa..e45b036e 100644 --- a/checksit/specs.py +++ b/checksit/specs.py @@ -19,22 +19,20 @@ def load_specs(spec_ids=None): spec_files = [f"{specs_dir}/{spec_id}.yml" for spec_id in spec_ids] or \ glob.glob(f"{specs_dir}/*.yml") - return _parse_specs(spec_files) - + return _parse_specs(spec_files) + def show_specs(spec_ids=None, verbose=False): - all_specs = load_specs(spec_ids) spec_ids_names = tuple([(spec_id.split("/")[-1]) for spec_id in spec_ids]) if not spec_ids: - specs = all_specs + specs = all_specs.items() else: specs = [(spec_ids[spec_ids_names.index(spec_id)], spec) for (spec_id, spec) in all_specs.items() if spec_id in spec_ids_names] print("Specifications:") for spec_id, spec in specs: - print(f"\n{spec_id}:") print(json.dumps(spec, indent=4).replace("\\\\", "\\")) @@ -68,6 +66,6 @@ def run_checks(self, record, skip_spellcheck=False): record, check_dict, skip_spellcheck=skip_spellcheck ) errors.extend(check_errors) - warnings.extend(check_warnings) + warnings.extend(check_warnings) return errors, warnings From fbffa839d748bd0e1e876026ad3d7738f3139d47 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Thu, 18 Jan 2024 09:42:46 +0000 Subject: [PATCH 14/37] Add show-specs check when no spec specified --- tests/test_show_specs.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test_show_specs.py b/tests/test_show_specs.py index 640ddab6..bf1054fe 100644 --- a/tests/test_show_specs.py +++ b/tests/test_show_specs.py @@ -21,3 +21,14 @@ def test_show_specs_all(capsys): ' ]\n }\n }\n}\n' ) assert captured.out == expected_output + + +def test_show_specs_none_specified(capsys): + # When no spec is specified, all specs in specs/groups are shown + show_specs([]) + captured_empty = capsys.readouterr() + + show_specs(["ceda-base"]) + captured_ceda_base = capsys.readouterr() + + assert captured_empty.out == captured_ceda_base.out From 323aa92bb751366a06dbe8746832c9ebd9b43f9f Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Thu, 18 Jan 2024 10:17:35 +0000 Subject: [PATCH 15/37] Add tests for exceptions on non existent rules --- tests/test_rules.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_rules.py b/tests/test_rules.py index 8056fcce..e38caab6 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -414,3 +414,12 @@ def test_check(): # Test that the function correctly handles regex-rule assert rules_instance.check("regex-rule:integer", "123", {}, label="Test") == ([], []) assert rules_instance.check("regex-rule:integer", "123.45", {}, label="Test") == (["Test Value '123.45' does not match regex rule: 'integer'."], []) + + # Test that correct exceptions are raised when the rule or regex is not found + with pytest.raises(Exception) as e_info: + rules_instance.check("rules-func:nonexistent", "abc", {}, label="Test") + assert str(e_info.value) == "Rule not found with rule ID: rules-func:nonexistent." + + with pytest.raises(Exception) as e_info: + rules_instance.check("regex-rule:nonexistent", "abc", {}, label="Test") + assert str(e_info.value) == "Regex rule not found with rule ID: regex-rule:nonexistent." From 7ad518bf3aead8c87a3cfa949e4ecdc66861e90e Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Thu, 18 Jan 2024 11:32:13 +0000 Subject: [PATCH 16/37] Add checks on uppercase processor --- tests/test_rules.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_rules.py b/tests/test_rules.py index e38caab6..5050ed1d 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -13,7 +13,9 @@ def test_match_file_name(): context = {"file_path": file_path} assert len(match_file_name(value, context)) == 1 assert len(match_file_name(value, context, ["lowercase"])) == 1 + assert len(match_file_name(value, context, ["uppercase"])) == 1 assert len(match_file_name(value, context, ["lowercase", "no_extension"])) == 0 + assert len(match_file_name(value, context, ["uppercase", "no_extension"])) == 1 def test_string_of_length(): From 6764a80d5cb56472d9b2602f5dc64d3d31697b4a Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Mon, 22 Jan 2024 15:28:55 +0000 Subject: [PATCH 17/37] Correct formatting on error message for flag_values --- checksit/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/checksit/generic.py b/checksit/generic.py index 421ebabd..5b153f7e 100644 --- a/checksit/generic.py +++ b/checksit/generic.py @@ -222,8 +222,8 @@ def check_var(dct, variable, defined_attrs, skip_spellcheck=False): attr_value = np.array(attr_value, dtype=np.int8) if not np.all(dct["variables"][variable].get(attr_key) == attr_value): errors.append( - f"[variable**************:{variable}]: Attribute '{attr_key}' must have definition {attr_value}, " - f"not {dct['variables'][variable].get(attr_key) if skip_spellcheck else ''}." + f"[variable**************:{variable}]: Attribute '{attr_key}' must have definition '{attr_value}', " + f"not '{dct['variables'][variable].get(attr_key)}'." ) #elif attr_key == 'flag_meanings': # print(attr_value) From 8cd8dbe58a820e0fa23652f5382570d74ba4e479 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Mon, 22 Jan 2024 15:29:07 +0000 Subject: [PATCH 18/37] Create test vocabs --- checksit/vocabs/tests/test_instruments.json | 10 ++++++++++ checksit/vocabs/tests/test_platforms.json | 12 ++++++++++++ checksit/vocabs/tests/test_products.json | 6 ++++++ 3 files changed, 28 insertions(+) create mode 100644 checksit/vocabs/tests/test_instruments.json create mode 100644 checksit/vocabs/tests/test_platforms.json create mode 100644 checksit/vocabs/tests/test_products.json diff --git a/checksit/vocabs/tests/test_instruments.json b/checksit/vocabs/tests/test_instruments.json new file mode 100644 index 00000000..265904ce --- /dev/null +++ b/checksit/vocabs/tests/test_instruments.json @@ -0,0 +1,10 @@ +{ + "test_instruments": { + "inst1": { + "instrument_id": "inst1" + }, + "inst2": { + "instrument_id": "inst2" + } + } +} \ No newline at end of file diff --git a/checksit/vocabs/tests/test_platforms.json b/checksit/vocabs/tests/test_platforms.json new file mode 100644 index 00000000..9a4e2051 --- /dev/null +++ b/checksit/vocabs/tests/test_platforms.json @@ -0,0 +1,12 @@ +{ + "test_platforms": { + "plat1": { + "platform_id": "plat1", + "description": "test platform 1" + }, + "plat2": { + "platform_id": "plat2", + "description": "test platform 2" + } + } +} \ No newline at end of file diff --git a/checksit/vocabs/tests/test_products.json b/checksit/vocabs/tests/test_products.json new file mode 100644 index 00000000..1cd59a7b --- /dev/null +++ b/checksit/vocabs/tests/test_products.json @@ -0,0 +1,6 @@ +{ + "test_products": [ + "prod1", + "prod2" + ] +} \ No newline at end of file From 6b29b84e9a4827d64f906f9d2d5f8b5d8599ea8f Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Mon, 22 Jan 2024 15:29:20 +0000 Subject: [PATCH 19/37] Tests for generic.py functions --- tests/test_generic.py | 309 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 309 insertions(+) create mode 100644 tests/test_generic.py diff --git a/tests/test_generic.py b/tests/test_generic.py new file mode 100644 index 00000000..52ab00c7 --- /dev/null +++ b/tests/test_generic.py @@ -0,0 +1,309 @@ +#from checksit.generic import one_spelling_mistake, two_spelling_mistakes, search_close_match, check_var_attrs, check_global_attrs, check_var_exists, check_dim_exists, check_var +import checksit.generic as cg +import numpy as np + + +def test_spelling_mistake_checks(): + # Test that the function correctly generates all one-delete mistakes + assert len(cg.one_spelling_mistake("abc")) == 195 + assert len(cg.one_spelling_mistake("abcdefg")) == 507 + assert len(cg.two_spelling_mistakes("abc")) == 16306 + assert len(cg.two_spelling_mistakes("abcdefg")) == 118314 + + +def test_search_close_match(): + # Test that the function correctly finds a close match + assert cg.search_close_match('abc', ['abd', 'abe', 'abf']) == "'abd' was found in this file, should this be 'abc'?" + + # Test that the function correctly handles no close matches + assert cg.search_close_match('abc', ['def', 'ghi', 'jkl']) == "" + + # Test that the function correctly handles case sensitivity + assert cg.search_close_match('abc', ['ABD', 'ABE', 'ABF']) == "'ABD' was found in this file, should this be 'abc'?" + + # Test that the function correctly handles an empty search_in list + assert cg.search_close_match('abc', []) == "" + + # Test that the function correctly handles an empty search_for string + assert cg.search_close_match('', ['abd', 'abe', 'abf']) == "" + + +def test_check_var_attrs(): + # Test that the function correctly identifies missing attributes + dct = { + "variables": { + "var1": {"long_name": "Variable 1", "units": "m"}, + "var2": {"long_name": "Variable 2"} + } + } + defined_attrs = ["long_name", "units"] + errors, warnings = cg.check_var_attrs(dct, defined_attrs) + assert errors == ["[variable**************:var2]: Attribute 'units' must have a valid definition."] + assert warnings == [] + + # Test that the function correctly handles empty attributes + dct = { + "variables": { + "var1": {"long_name": "", "units": "m"}, + "var2": {"long_name": "Variable 2", "units": ""} + } + } + errors, warnings = cg.check_var_attrs(dct, defined_attrs) + assert errors == ["[variable**************:var1]: Attribute 'long_name' must have a valid definition.", "[variable**************:var2]: Attribute 'units' must have a valid definition."] + assert warnings == [] + + # Test that the function correctly handles variables with all attributes defined + dct = { + "variables": { + "var1": {"long_name": "Variable 1", "units": "m"}, + "var2": {"long_name": "Variable 2", "units": "kg"} + } + } + errors, warnings = cg.check_var_attrs(dct, defined_attrs) + assert errors == [] + assert warnings == [] + + # Test that the function correctly handles an empty dct + dct = {"variables": {}} + errors, warnings = cg.check_var_attrs(dct, defined_attrs) + assert errors == [] + assert warnings == [] + + +def test_check_global_attrs(): + # Test that the function correctly identifies missing attributes + dct = { + "global_attributes": { + "attr1": "value1", + "attr2": "value2" + } + } + defined_attrs = ["attr1", "attr3"] + errors, warnings = cg.check_global_attrs(dct, defined_attrs, skip_spellcheck=True) + assert errors == ["[global-attributes:**************:attr3]: Attribute 'attr3' does not exist. "] + assert warnings == [] + + # Test that the function correctly handles empty attributes + dct = { + "global_attributes": { + "attr1": "", + "attr2": "value2" + } + } + defined_attrs = ["attr1", "attr2"] + errors, warnings = cg.check_global_attrs(dct, defined_attrs) + assert errors == ["[global-attributes:**************:attr1]: No value defined for attribute 'attr1'."] + assert warnings == [] + + # Test that the function correctly handles attributes with all values defined + dct = { + "global_attributes": { + "attr1": "value1", + "attr2": "value2" + } + } + errors, warnings = cg.check_global_attrs(dct, defined_attrs) + assert errors == [] + assert warnings == [] + + # Test that the function correctly handles an empty dct + dct = {"global_attributes": {}} + errors, warnings = cg.check_global_attrs(dct, defined_attrs) + assert errors == ["[global-attributes:**************:attr1]: Attribute 'attr1' does not exist. ", "[global-attributes:**************:attr2]: Attribute 'attr2' does not exist. "] + assert warnings == [] + + +def test_check_var_exists(): + # Test that the function correctly identifies missing variables + dct = { + "variables": { + "var1": {"long_name": "Variable 1", "units": "m"}, + "var2": {"long_name": "Variable 2", "units": "kg"} + } + } + variables = ["var1", "var3"] + errors, warnings = cg.check_var_exists(dct, variables, skip_spellcheck=True) + assert errors == ["[variable**************:var3]: Does not exist in file. "] + assert warnings == [] + + # Test that the function correctly handles optional variables + variables = ["var1", "var3:__OPTIONAL__"] + errors, warnings = cg.check_var_exists(dct, variables, skip_spellcheck=True) + assert errors == [] + assert warnings == ["[variable**************:var3]: Optional variable does not exist in file. "] + + # Test that the function correctly handles variables that exist + variables = ["var1", "var2"] + errors, warnings = cg.check_var_exists(dct, variables) + assert errors == [] + assert warnings == [] + + # Test that the function correctly handles an empty dct + dct = {"variables": {}} + variables = ["var1", "var2"] + errors, warnings = cg.check_var_exists(dct, variables) + assert errors == ["[variable**************:var1]: Does not exist in file. ", "[variable**************:var2]: Does not exist in file. "] + assert warnings == [] + + +def test_check_dim_exists(): + # Test that the function correctly identifies missing dimensions + dct = { + "dimensions": { + "dim1": {"long_name": "Dimension 1", "units": "m"}, + "dim2": {"long_name": "Dimension 2", "units": "kg"} + } + } + dimensions = ["dim1", "dim3"] + errors, warnings = cg.check_dim_exists(dct, dimensions, skip_spellcheck=True) + assert errors == ["[dimension**************:dim3]: Does not exist in file. "] + assert warnings == [] + + # Test that the function correctly handles optional dimensions + dimensions = ["dim1", "dim3:__OPTIONAL__"] + errors, warnings = cg.check_dim_exists(dct, dimensions, skip_spellcheck=True) + assert errors == [] + assert warnings == ["[dimension**************:dim3]: Optional dimension does not exist in file. "] + + # Test that the function correctly handles dimensions that exist + dimensions = ["dim1", "dim2"] + errors, warnings = cg.check_dim_exists(dct, dimensions) + assert errors == [] + assert warnings == [] + + # Test that the function correctly handles an empty dct + dct = {"dimensions": {}} + dimensions = ["dim1", "dim2"] + errors, warnings = cg.check_dim_exists(dct, dimensions) + assert errors == ["[dimension**************:dim1]: Does not exist in file. ", "[dimension**************:dim2]: Does not exist in file. "] + assert warnings == [] + + +def test_check_var(): + # Test that the function correctly identifies missing variables + dct = { + "variables": { + "var1": {"long_name": "Variable 1", "units": "m", "flag_values": np.array([0,1,2], dtype=np.int8)}, + "var2": {"long_name": "Variable 2", "units": "kg"}, + "var4": {"flag_values": "0b, 1b, 2b"} + } + } + variable = "var3" + defined_attrs = ["long_name:Variable 3", "units:s"] + errors, warnings = cg.check_var(dct, variable, defined_attrs, skip_spellcheck=True) + assert errors == ["[variable**************:var3]: Optional variable does not exist in file. "] + assert warnings == [] + + # Test that the function correctly handles optional variables + variable = "var3:__OPTIONAL__" + errors, warnings = cg.check_var(dct, variable, defined_attrs, skip_spellcheck=True) + assert errors == [] + assert warnings == ["[variable**************:var3]: Optional variable does not exist in file. "] + + # Test that the function correctly handles variables that exist + variable = "var1:__OPTIONAL__" + defined_attrs = ["long_name:Variable 1", "units:m"] + errors, warnings = cg.check_var(dct, variable, defined_attrs) + assert errors == [] + assert warnings == [] + + # Test that the function correctly identifies missing attributes + variable = "var2" + defined_attrs = ["long_name:Variable 2", "units:kg", "attr3:value 3"] + errors, warnings = cg.check_var(dct, variable, defined_attrs, skip_spellcheck=True) + assert errors == ["[variable**************:var2]: Attribute 'attr3' does not exist. "] + assert warnings == [] + + # Test that the function correctly identifies incorrect attributes + variable = "var2" + defined_attrs = ["long_name:Variable 2", "units:s"] + errors, warnings = cg.check_var(dct, variable, defined_attrs, skip_spellcheck=True) + assert errors == ["[variable**************:var2]: Attribute 'units' must have definition s, not kg."] + assert warnings == [] + + # Test that the function correctly handles badly formatted flag_values + variable = "var4:__OPTIONAL__" + defined_attrs = ["flag_values:0b, 1b, 2b"] + errors, warnings = cg.check_var(dct, variable, defined_attrs, skip_spellcheck=True) + assert errors == ["[variable**************:var4]: Attribute 'flag_values' must have definition '[0 1 2]', not '0b, 1b, 2b'."] + assert warnings == [] + + + # Test that the function correctly handles attributes with all values defined + variable = "var1:__OPTIONAL__" + defined_attrs = ["long_name:Variable 1", "units:m", "flag_values:0b, 1b, 2b"] + errors, warnings = cg.check_var(dct, variable, defined_attrs) + assert errors == [] + assert warnings == [] + + # Test that the function correctly handles an empty dct + variable = "var2" + dct = {"variables": {}} + errors, warnings = cg.check_var(dct, variable, defined_attrs) + assert errors == ["[variable**************:var2]: Optional variable does not exist in file. "] + assert warnings == [] + + +def test_check_file_name(): + # Test that the function correctly identifies invalid instrument name + vocab_checks = { + "instrument": "__vocabs__:tests/test_instruments:test_instruments:__all__", + "platform": "__vocabs__:tests/test_platforms:test_platforms:__all__", + "data_product": "__vocabs__:tests/test_products:test_products" + } + file_name = "inst3_plat1_20220101_prod1_v1.0.nc" + errors, warnings = cg.check_file_name(file_name, vocab_checks) + assert errors == ["[file name]: Invalid file name format - unknown instrument inst3"] + assert warnings == [] + + # Test that the function correctly identifies invalid platform name + file_name = "inst1_plat3_20220101_prod1_v1.0.nc" + errors, warnings = cg.check_file_name(file_name, vocab_checks) + assert errors == ["[file name]: Invalid file name format - unknown platform plat3"] + assert warnings == [] + + # Test that the function correctly identifies invalid date format + file_name = "inst1_plat1_2022010_prod1_v1.0.nc" + errors, warnings = cg.check_file_name(file_name, vocab_checks) + assert errors == ["[file name]: Invalid file name format - bad date format 2022010"] + assert warnings == [] + + # Test that the function correctly identifies invalid date + file_name = "inst1_plat1_20221301_prod1_v1.0.nc" + errors, warnings = cg.check_file_name(file_name, vocab_checks) + assert errors == ["[file name]: Invalid file name format - invalid date in file name 20221301"] + assert warnings == [] + + # Test that the function correctly identifies invalid data product + file_name = "inst1_plat1_20220101_prod3_v1.0.nc" + errors, warnings = cg.check_file_name(file_name, vocab_checks) + assert errors == ["[file name]: Invalid file name format - unknown data product prod3"] + assert warnings == [] + + # Test that the function correctly identifies invalid version number format + file_name = "inst1_plat1_20220101_prod1_v10.nc" + errors, warnings = cg.check_file_name(file_name, vocab_checks) + assert errors == ["[file name]: Invalid file name format - incorrect file version number v10"] + assert warnings == [] + + # Test that the function correctly identifies too many options in file name + file_name = "inst1_plat1_20220101_prod1_option1_option2_option3_option4_option5_v1.0.nc" + errors, warnings = cg.check_file_name(file_name, vocab_checks) + assert errors == ["[file name]: Invalid file name format - too many options in file name"] + assert warnings == [] + + # Test that the function correctly handles valid file names + file_name = "inst1_plat1_20220101_prod1_v1.0.nc" + errors, warnings = cg.check_file_name(file_name, vocab_checks) + assert errors == [] + assert warnings == [] + + file_name = "inst1_plat1_20220101_prod1_opt1_v1.0.nc" + errors, warnings = cg.check_file_name(file_name, vocab_checks) + assert errors == [] + assert warnings == [] + + file_name = "inst1_plat1_20220101_prod1_opt1_opt2_opt3_v1.0.nc" + errors, warnings = cg.check_file_name(file_name, vocab_checks) + assert errors == [] + assert warnings == [] \ No newline at end of file From cd68b27e789a5d95971979d0445bbc33ee0a0179 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Mon, 22 Jan 2024 15:32:19 +0000 Subject: [PATCH 20/37] Test for multiple errors in file name --- tests/test_generic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_generic.py b/tests/test_generic.py index 52ab00c7..fa8963ff 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -292,6 +292,12 @@ def test_check_file_name(): assert errors == ["[file name]: Invalid file name format - too many options in file name"] assert warnings == [] + # Test that the function correctly handles multiple errors + file_name = "inst3_plat3_20220101_prod1_v1.0.nc" + errors, warnings = cg.check_file_name(file_name, vocab_checks) + assert errors == ["[file name]: Invalid file name format - unknown instrument inst3","[file name]: Invalid file name format - unknown platform plat3"] + assert warnings == [] + # Test that the function correctly handles valid file names file_name = "inst1_plat1_20220101_prod1_v1.0.nc" errors, warnings = cg.check_file_name(file_name, vocab_checks) From a85f796e3b2ec103d14c4bc235525e7792059e77 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Mon, 22 Jan 2024 15:42:08 +0000 Subject: [PATCH 21/37] Add more tests to github workflow --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a6ac6456..a8e9f92f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -44,4 +44,4 @@ jobs: - name: Test with pytest run: | export UDUNITS2_XML_PATH=/opt/share/udunits/udunits2.xml - python -m pytest -v tests/test_readers.py tests/test_images.py + python -m pytest -v tests/test_readers.py tests/test_rules.py tests/test_show_specs.py tests/test_images.py tests/test_generic.py From a6132c9feefcb876a7becb4d10ce88adbd65a1af Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 23 Jan 2024 10:49:21 +0000 Subject: [PATCH 22/37] Complete tests for check_global_attrs --- tests/test_generic.py | 124 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 106 insertions(+), 18 deletions(-) diff --git a/tests/test_generic.py b/tests/test_generic.py index fa8963ff..8f3d6672 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -74,40 +74,128 @@ def test_check_global_attrs(): # Test that the function correctly identifies missing attributes dct = { "global_attributes": { - "attr1": "value1", - "attr2": "value2" - } + "attr1": "", + "attr2": "value2", + "attr3": "inst1" + }, + "inpt": "filename" } - defined_attrs = ["attr1", "attr3"] + defined_attrs = ["attr2", "attr4"] errors, warnings = cg.check_global_attrs(dct, defined_attrs, skip_spellcheck=True) - assert errors == ["[global-attributes:**************:attr3]: Attribute 'attr3' does not exist. "] + assert errors == ["[global-attributes:**************:attr4]: Attribute 'attr4' does not exist. "] assert warnings == [] # Test that the function correctly handles empty attributes - dct = { - "global_attributes": { - "attr1": "", - "attr2": "value2" - } - } defined_attrs = ["attr1", "attr2"] errors, warnings = cg.check_global_attrs(dct, defined_attrs) assert errors == ["[global-attributes:**************:attr1]: No value defined for attribute 'attr1'."] assert warnings == [] - # Test that the function correctly handles attributes with all values defined - dct = { - "global_attributes": { - "attr1": "value1", - "attr2": "value2" - } - } + # Test that the function correctly handles defined_attrs when all attributes are defined + defined_attrs = ["attr2", "attr3"] errors, warnings = cg.check_global_attrs(dct, defined_attrs) assert errors == [] assert warnings == [] + # Test function handles non-existent attributes with vocab checks correctly + vocab_attrs = { + "attr4": "__vocabs__:tests/test_products:test_products" + } + errors, warnings = cg.check_global_attrs(dct, vocab_attrs = vocab_attrs, skip_spellcheck=True) + assert errors == ["[global-attributes:**************:attr4]: Attribute 'attr4' does not exist. "] + assert warnings == [] + + # Test function handles undefined attributes with vocab checks correctly + vocab_attrs = { + "attr1": "__vocabs__:tests/test_platforms:test_platforms:__all__" + } + errors, warnings = cg.check_global_attrs(dct, vocab_attrs = vocab_attrs, skip_spellcheck=True) + assert errors == ["[global-attributes:**************:attr1]: No value defined for attribute 'attr1'."] + assert warnings == [] + + # Test function handles incorrect values with vocab checks correctly + vocab_attrs = { + "attr2": "__vocabs__:tests/test_platforms:test_platforms:__all__" + } + errors, warnings = cg.check_global_attrs(dct, vocab_attrs = vocab_attrs, skip_spellcheck=True) + assert errors == ["[global-attributes:******:attr2]*** 'value2' not in vocab options: ['plat1', 'plat2'] (using: '__vocabs__:tests/test_platforms:test_platforms:__all__')"] + assert warnings == [] + + # Test function handles correct values with vocab checks correctly + vocab_attrs = { + "attr3": "__vocabs__:tests/test_instruments:test_instruments:__all__" + } + errors, warnings = cg.check_global_attrs(dct, vocab_attrs = vocab_attrs, skip_spellcheck=True) + assert errors == [] + assert warnings == [] + + # Test function handles non-existent attributes with regex checks correctly + regex_attrs = { + "attr4": r"\d{4}-\d{2}-\d{2}" + } + errors, warnings = cg.check_global_attrs(dct, regex_attrs = regex_attrs, skip_spellcheck=True) + assert errors == ["[global-attributes:**************:attr4]: Attribute 'attr4' does not exist. "] + assert warnings == [] + + # Test function handles undefined attributes with regex checks correctly + regex_attrs = { + "attr1": r"\d{4}-\d{2}-\d{2}" + } + errors, warnings = cg.check_global_attrs(dct, regex_attrs = regex_attrs, skip_spellcheck=True) + assert errors == ["[global-attributes:**************:attr1]: No value defined for attribute 'attr1'."] + assert warnings == [] + + # Test function handles incorrect values with regex checks correctly + regex_attrs = { + "attr2": r"\d{4}-\d{2}-\d{2}" + } + errors, warnings = cg.check_global_attrs(dct, regex_attrs = regex_attrs, skip_spellcheck=True) + assert errors == ["[global-attributes:******:attr2]: 'value2' does not match regex pattern '\d{4}-\d{2}-\d{2}'."] + assert warnings == [] + + # Test function handles correct values with regex checks correctly + regex_attrs = { + "attr3": r"inst\d" + } + errors, warnings = cg.check_global_attrs(dct, regex_attrs = regex_attrs, skip_spellcheck=True) + assert errors == [] + assert warnings == [] + + # Test function handles non-existent attributes with rules checks correctly + rules_attrs = { + "attr4": "rule-func:string-of-length:5" + } + errors, warnings = cg.check_global_attrs(dct, rules_attrs = rules_attrs, skip_spellcheck=True) + assert errors == ["[global-attributes:**************:attr4]: Attribute 'attr4' does not exist. "] + assert warnings == [] + + # Test function handles undefined attributes with rules checks correctly + rules_attrs = { + "attr1": "rule-func:string-of-length:5" + } + errors, warnings = cg.check_global_attrs(dct, rules_attrs = rules_attrs, skip_spellcheck=True) + assert errors == ["[global-attributes:**************:attr1]: No value defined for attribute 'attr1'."] + assert warnings == [] + + # Test function handles incorrect values with rules checks correctly + rules_attrs = { + "attr2": "rule-func:string-of-length:5" + } + errors, warnings = cg.check_global_attrs(dct, rules_attrs = rules_attrs, skip_spellcheck=True) + assert errors == ["[global-attributes:******:attr2]*** 'value2' must be exactly 5 characters"] + assert warnings == [] + + # Test function handles correct values with rules checks correctly + rules_attrs = { + "attr3": "rule-func:string-of-length:5" + } + errors, warnings = cg.check_global_attrs(dct, rules_attrs = rules_attrs, skip_spellcheck=True) + assert errors == [] + assert warnings == [] + # Test that the function correctly handles an empty dct dct = {"global_attributes": {}} + defined_attrs = ["attr1", "attr2"] errors, warnings = cg.check_global_attrs(dct, defined_attrs) assert errors == ["[global-attributes:**************:attr1]: Attribute 'attr1' does not exist. ", "[global-attributes:**************:attr2]: Attribute 'attr2' does not exist. "] assert warnings == [] From dae31787bbb0e2362c1ef03c27962ab5b78a0955 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 23 Jan 2024 11:26:08 +0000 Subject: [PATCH 23/37] Add string_to_dict tests --- tests/test_utils.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 tests/test_utils.py diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 00000000..1fbf6639 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,34 @@ +import checksit.utils as cu +import pytest + + +def test_string_to_dict(): + # Test that the function correctly converts a string to a dictionary + s = "key1=value1,key2=value2,key3=value3" + d = cu.string_to_dict(s) + assert d == {"key1": "value1", "key2": "value2", "key3": "value3"} + + # Test that the function handles an empty string + s = "" + with pytest.raises(ValueError): + d = cu.string_to_dict(s) + + # Test that the function correctly handles a string with no equals signs + s = "key1,key2,key3" + with pytest.raises(ValueError): + d = cu.string_to_dict(s) + + # Test that the function correctly handles a string with multiple equals signs in a pair + s = "key1=value1=value1,key2=value2,key3=value3" + with pytest.raises(ValueError): + d = cu.string_to_dict(s) + + # Test that the function correctly handles a string with no commas + s = "key1=value1" + d = cu.string_to_dict(s) + assert d == {"key1": "value1"} + + # Test that the function correctly handles a string with spaces + s = "key1 = value1, key2 = value2, key3 = value3" + d = cu.string_to_dict(s) + assert d == {"key1 ": " value1", " key2 ": " value2", " key3 ": " value3"} \ No newline at end of file From fdec73b41fab9efcb2994e461fb71eb047bfa3ad Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 23 Jan 2024 11:28:56 +0000 Subject: [PATCH 24/37] Use raw string for string with regex formatting inside --- tests/test_generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_generic.py b/tests/test_generic.py index 8f3d6672..09d6a863 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -150,7 +150,7 @@ def test_check_global_attrs(): "attr2": r"\d{4}-\d{2}-\d{2}" } errors, warnings = cg.check_global_attrs(dct, regex_attrs = regex_attrs, skip_spellcheck=True) - assert errors == ["[global-attributes:******:attr2]: 'value2' does not match regex pattern '\d{4}-\d{2}-\d{2}'."] + assert errors == [r"[global-attributes:******:attr2]: 'value2' does not match regex pattern '\d{4}-\d{2}-\d{2}'."] assert warnings == [] # Test function handles correct values with regex checks correctly From e0e50779a5da5a80b731db47dc398a2b75594de0 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 23 Jan 2024 11:46:09 +0000 Subject: [PATCH 25/37] Add tests for all functions in utils.py --- tests/test_utils.py | 186 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 185 insertions(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 1fbf6639..f8aa8470 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,5 +1,6 @@ import checksit.utils as cu import pytest +import inspect def test_string_to_dict(): @@ -31,4 +32,187 @@ def test_string_to_dict(): # Test that the function correctly handles a string with spaces s = "key1 = value1, key2 = value2, key3 = value3" d = cu.string_to_dict(s) - assert d == {"key1 ": " value1", " key2 ": " value2", " key3 ": " value3"} \ No newline at end of file + assert d == {"key1 ": " value1", " key2 ": " value2", " key3 ": " value3"} + + +def test_string_to_list(): + # Test that the function correctly converts a string to a list + s = "value1,value2,value3" + lst = cu.string_to_list(s) + assert lst == ["value1", "value2", "value3"] + + # Test that the function handles an empty string + s = "" + lst = cu.string_to_list(s) + assert lst == [""] + + # Test that the function correctly handles a string with no commas + s = "value1" + lst = cu.string_to_list(s) + assert lst == ["value1"] + + # Test that the function correctly handles a string with spaces + s = "value1, value2, value3" + lst = cu.string_to_list(s) + assert lst == ["value1", " value2", " value3"] + + # Test that the function correctly handles a string with trailing comma + s = "value1,value2,value3," + lst = cu.string_to_list(s) + assert lst == ["value1", "value2", "value3", ""] + + # Test that the function correctly handles a string with leading comma + s = ",value1,value2,value3" + lst = cu.string_to_list(s) + assert lst == ["", "value1", "value2", "value3"] + + +def test_extension(): + # Test that the function correctly identifies the extension of a file + file_path = "/path/to/file.txt" + ext = cu.extension(file_path) + assert ext == "txt" + + # Test that the function correctly handles a file with multiple dots in the name + file_path = "/path/to/file.name.with.multiple.dots.txt" + ext = cu.extension(file_path) + assert ext == "txt" + + # Test that the function correctly handles a file with a dot at the start of the name + file_path = "/path/to/.file" + ext = cu.extension(file_path) + assert ext == "file" + + # Test that the function correctly handles a file with a dot at the end of the name + file_path = "/path/to/file." + ext = cu.extension(file_path) + assert ext == "" + + # Test that the function correctly handles an empty string + file_path = "" + ext = cu.extension(file_path) + assert ext == "" + + +def test_get_file_base(): + # Test that the function correctly gets the base of a file name with one underscore + file_path = "/path/to/file_base.txt" + base = cu.get_file_base(file_path) + assert base == "file" + + # Test that the function correctly gets the base of a file name with multiple underscores + file_path = "/path/to/file_base_part2_part3.txt" + base = cu.get_file_base(file_path) + assert base == "file_base_part2" + + # Test that the function correctly gets the base of a file name with an underscore at the start + file_path = "/path/to/_file.txt" + base = cu.get_file_base(file_path) + assert base == "" + + # Test that the function correctly gets the base of a file name with an underscore at the end + file_path = "/path/to/file_.txt" + base = cu.get_file_base(file_path) + assert base == "file" + + # Test that the function correctly handles an empty string + file_path = "" + base = cu.get_file_base(file_path) + assert base == "" + + +def test_map_to_rule(): + # Test that the function correctly maps a function name with one underscore + class TestClass: + def test_func_one(): + pass + rule = cu.map_to_rule(TestClass.test_func_one) + assert rule == "test-func-one" + + # Test that the function correctly maps a function name with multiple underscores + class TestClass: + def test_func_multiple_underscores(): + pass + rule = cu.map_to_rule(TestClass.test_func_multiple_underscores) + assert rule == "test-func-multiple-underscores" + + # Test that the function correctly maps a function name with no underscores + class TestClass: + def testfuncnone(): + pass + rule = cu.map_to_rule(TestClass.testfuncnone) + assert rule == "testfuncnone" + + # Test that the function correctly maps a function name with an underscore at the start + class TestClass: + def _test_func_start(): + pass + rule = cu.map_to_rule(TestClass._test_func_start) + assert rule == "-test-func-start" + + # Test that the function correctly maps a function name with an underscore at the end + class TestClass: + def test_func_end_(): + pass + rule = cu.map_to_rule(TestClass.test_func_end_) + assert rule == "test-func-end-" + + +def test_is_undefined(): + # Test that the function correctly identifies None as undefined + assert cu.is_undefined(None) + + # Test that the function correctly identifies an empty string as undefined + assert cu.is_undefined("") + + # Test that the function correctly identifies an empty list as undefined + assert cu.is_undefined([]) + + # Test that the function correctly identifies an empty dictionary as undefined + assert cu.is_undefined({}) + + # Test that the function correctly identifies zero as not undefined + assert not cu.is_undefined(0) + + # Test that the function correctly identifies a non-empty string as not undefined + assert not cu.is_undefined("non-empty string") + + # Test that the function correctly identifies a non-empty list as not undefined + assert not cu.is_undefined(["non-empty list"]) + + # Test that the function correctly identifies a non-empty dictionary as not undefined + assert not cu.is_undefined({"key": "value"}) + + +def test_get_public_funcs(): + # Test that the function correctly gets the public functions of a module + funcs = cu.get_public_funcs(cu) + assert all([inspect.isfunction(func) for func in funcs]) + assert all([func.__name__[0] != "_" for func in funcs]) + assert "get_config" not in [func.__name__ for func in funcs] + + # Test that the function correctly handles a module with no public functions + class TestModule: + def _private_func(): + pass + funcs = cu.get_public_funcs(TestModule) + assert funcs == [] + + # Test that the function correctly handles a module with only private functions + class TestModule: + def _private_func(): + pass + def get_config(): + pass + funcs = cu.get_public_funcs(TestModule) + assert funcs == [] + + # Test that the function correctly handles a module with both public and private functions + class TestModule: + def public_func(): + pass + def _private_func(): + pass + funcs = cu.get_public_funcs(TestModule) + assert len(funcs) == 1 + assert funcs[0].__name__ == "public_func" \ No newline at end of file From 34bd1be57c18798881ad96a18abc3775ffd17fb3 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 23 Jan 2024 11:46:50 +0000 Subject: [PATCH 26/37] Add test_utils.py to workflow --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a8e9f92f..3301eea5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -44,4 +44,4 @@ jobs: - name: Test with pytest run: | export UDUNITS2_XML_PATH=/opt/share/udunits/udunits2.xml - python -m pytest -v tests/test_readers.py tests/test_rules.py tests/test_show_specs.py tests/test_images.py tests/test_generic.py + python -m pytest -v tests/test_readers.py tests/test_rules.py tests/test_show_specs.py tests/test_images.py tests/test_generic.py tests/test_utils.py From b6ee1a4c7ac571ba11d7bc2f0e120ce3fb4a7f6f Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 23 Jan 2024 13:18:51 +0000 Subject: [PATCH 27/37] Test show specs from CLI --- tests/test_show_specs.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tests/test_show_specs.py b/tests/test_show_specs.py index bf1054fe..903c2a85 100644 --- a/tests/test_show_specs.py +++ b/tests/test_show_specs.py @@ -1,6 +1,6 @@ -import pytest -import json from checksit.specs import show_specs +from click.testing import CliRunner +from checksit import cli def test_show_specs_all(capsys): @@ -32,3 +32,18 @@ def test_show_specs_none_specified(capsys): captured_ceda_base = capsys.readouterr() assert captured_empty.out == captured_ceda_base.out + + +def test_show_specs_cli(): + runner = CliRunner() + result = runner.invoke(cli.show_specs, ["tests/test"]) + expected_output = ( + 'Specifications:\n\ntests/test:\n{\n "var-requires": {\n' + ' "func": "checksit.generic.check_var_attrs",\n "params": {\n' + ' "defined_attrs": [\n "long_name"\n ]\n' + ' }\n },\n "required-global-attrs": {\n' + ' "func": "checksit.generic.check_dim_exists",\n "params": {\n' + ' "dimensions": [\n "time"\n' + ' ]\n }\n }\n}\n' + ) + assert result.stdout == expected_output From 4143198a12cce23aed3e8a2477c925247b3e7b30 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 23 Jan 2024 15:12:49 +0000 Subject: [PATCH 28/37] Remove commented out import statement --- tests/test_generic.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_generic.py b/tests/test_generic.py index 09d6a863..b004368e 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -1,4 +1,3 @@ -#from checksit.generic import one_spelling_mistake, two_spelling_mistakes, search_close_match, check_var_attrs, check_global_attrs, check_var_exists, check_dim_exists, check_var import checksit.generic as cg import numpy as np From 20f8d9f428894169724016921948b5628f5ca552 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 23 Jan 2024 15:49:05 +0000 Subject: [PATCH 29/37] Test reading netCDF file --- tests/test_readers.py | 11 +++++++++++ tests/testdata/netcdf/test_netcdf.nc | Bin 0 -> 16192 bytes 2 files changed, 11 insertions(+) create mode 100644 tests/testdata/netcdf/test_netcdf.nc diff --git a/tests/test_readers.py b/tests/test_readers.py index b6f41d8e..b1419bb5 100644 --- a/tests/test_readers.py +++ b/tests/test_readers.py @@ -36,3 +36,14 @@ def test_cdl_reader_multiline_parser_2(): d = resp.to_dict() +def test_cdl_reader_netcdf(): + ncfile = os.path.join(TESTDATA_DIR, "netcdf/test_netcdf.nc") + resp = read_cdl(ncfile) + + d = resp.to_dict() + assert sorted(d.keys()) == sorted(["global_attributes", "dimensions", "variables", "inpt"]) + assert list(d["global_attributes"].keys()) == ["test_attribute_name"] + assert d["global_attributes"]["test_attribute_name"] == "test_attribute_value" + assert list(d["variables"].keys()) == ["T"] + assert sorted(d["dimensions"].keys()) == sorted(["x", "y", "z"]) + assert d["inpt"] == ncfile \ No newline at end of file diff --git a/tests/testdata/netcdf/test_netcdf.nc b/tests/testdata/netcdf/test_netcdf.nc new file mode 100644 index 0000000000000000000000000000000000000000..27f90ca776a6536d02ceaa7e5419d1bdd4a21479 GIT binary patch literal 16192 zcmeI2&xY@mDK!!lf0Tj>Dnl73qKl9_^h``NPKr!$A;?tgbVx%oQ%pKB zijccgcZDm(O)b@x3z2{z1^xs6z?Cj)t04Lx==0pVcP5Du;zC9EE_vrY=RNN^_tyQY zzn&4l`^cq--+J~vXZO#X33dIwtoYETbfxgnpT75&#~-=$#QwRT?SCk12X88++VWaH zyC3$n+Z6ZexpwvN z;$xS?xi51aG~Bf{T)Ftg%h?}`|H-FMo@@Ehq`V zjhm-;N-rGUIyw64GsoxKvhMGK2ij-;+rl{ybnVLV>DR75b>Y&!~0Ngpnd`& z{C@cGPyhK2-_3zsmEY(czI^xA_kVkb5Bz&~xV_^a{p}xq@XvR+-F3b5H~t=e_Z_)n2-tDiK`X1Jw>3X-X)ysR@zRu73S-%AiKDKrK z?OSKw;T{Kjp_N03AK1A5)bIc6pZZ629`OBg&hMD|Lw0!c=B?`|H;zt^pL_L>8ed8E zYpt@6yuCUhoO@>bPPm6}4aVL1%6RP2t54=1K)+4y1Ad?T1y`q3&Xe-jq{k|gOYY;o zvVIr&+phfh#m5gH`9i&d`gK-X@`(~7#ccmtn1eEhLj{*ABxEwTQu z0rj8U+uzT&zg2pi`j2$~hUlvP`p^1J4dF-k)M&@M8`#~z?gn-@u)BfX4eV}UcLTc{ z*xkVH26i{FyMf&e{O@j{y%>UUr5Hj_{2gGtg724lqXt6J?a>?6(X7b>9KZx3xpjN2 zFYu}c^mNvMdyF8V*S*bKphwN##Q!59mRwD^^r;6h0rh%rYjfP@>oHHD2hP-IkJ<=A ziJhTGy^-;Xf!H2ry7bs%Z;ac_NW2o?ey);FaDE1AbygtG^Qm=UCfDc87A)kWdC>=A z^GLn|b7Om*-50ihM$f&()*~=a1)(hU77XglNc8D(u25gpsQF9{@P3>bz)URwJ;tfP zdJkGK0sGY&V86LN)*`HC&HYw#S>1Yo)hb&P31l6PLQjc%*m<((zyz$jw|&AsYYnKH z^+ZkY0L)!_y@OUQZnXstKweAiK6)a#J=SN?8v~zJJ>Nsot7e~PF&}|@%M<8<`@5Q= z)%R>Dxqa?ed*lx2OP~jOJdbBsV6|ip+|QcxMBb0wIRV&X?2H!F^Vn;wZl7p?^Q;A+ zrpJD@3GTs4ya1PR0s+0wGIvfwv)7)w^r$;a&R#fz0Q3v%E_Kh~KK81sMe@7{+>_g@ zrzy1-);|J!1`w$0i=YENux@Rsy~O$=$aiJVa>g7t%|7!ExQF@*5>Rs)FQD#cZeI(W z>)EZ1U;@_EjU!(5&KS((_EmqQj+S8O46x9@qI;$uxb%d$X~wm0B3{tlv-TP1KJ)}a z$=zctz81F6ISsMsfSwhtFS)Va2{h=uk#MYMLO+^c=RBHO{@mv93fqT1_ zxXm$lroI-QKqOXkU-bZba<#@f(3=-E^9ktJBfGK|H(p@p+Mn;KDFELha!0oT5Ivn856NJ=gb8Ix%=z0=DxKa(K@uK4)j?|U?jHJv*_u;1U!c` z%&X=cXEz1*)cRWH&9HiaN4P$ZntkTR3w381+hZ-l5$=knCxMpSy7>(1jA)%R3hOJm z^YYnW5!){&pjWP5^$6^@W-dFsvj*#x_1PQ1Og_S?KwV9r94ZqoC3jvAWcP46(^&0x z5Amc9^k+}p>Usw>m%7he*BbBKV?b{~)yCqT@!02H?qS@34(QMGV3%3~ODyMwU7kTW zD*=6ahQh`HjN}biiQPw^ae%GQ#w^)+`g3euuu|OS_N5ldTVm(=IS;_TfNswM=g+^? z@>$W`+gbXY6^k~jkz03;9^qOde*kMP_tfhh)jMd3Mec!}ZO!Gp2IRiDJ^Ks~XwFr) zPya-m=Md*Q`9e)Z;2i7jZ5+^C#tE$P)%Rc?O3gm~_V&i=o)Jyfn?MA{GjQ%w*d8@& zxsQAT9SCUF8!)~C1976ZfQeW{Vn3U;^whdup(lTKoXJ_%gCOn;X7c=wA{*DamAm4z zHxb)!tnTc(|AID>%e7W}^XE$~QmcDKbFiY>W6dS2Hy{wtpe1&e?0(j~H+>_}mtgC1 zerB}3#I@I&u-}?`1Pfky1ld#OImg-hBRs)oxc6VkUM*0u8WN&04-E;uSbYPXwW8 z`WE2cLQlOv`~7^)?eAc{#?CO;qn2lq&%~kl8o163=&gRBSBya2e&ZFkp5PW1)+e}A z7wi|*fx4c0Mr(NntZt9}`5i630TvPTAQX=HTq%}w8B=p!t?4yyVedqq$rsQNr&1e@ zfqs4Nuh(8-&D`_W`%{~My#bFsdWWLxUrMefT<#lzb!XY{p4O}8tR4iAKxgcVcixvs zY>(`6FK4NVslYwVXX^3_>vd)eH^g!z?}_sw4#1gN zBlbBb&;WbgN3CKmek!@Pk~8#zDb`8cHiuQ>}iS? z#@MP@fu;^UbU{@-mz$PFTFk50?#0oSj~9}EfCk-xc10m(hFJ;$%n$mxes1I zqYh+y^mSmSHp1#FaRTbr?Q?k+edgBms2Tg5WAR3H;t9z5A{>feZwnfrUhg6I;jvEy zFcvoNK{79M)S3d%v6kL?7M%Hf_UUsUW1%PEcaItF;02CgBG%i05#&9w>W}2sUCt0{ z&Z&3d9Q)g%o7?LwXWF0lhxHk+ddmIgS>!Hf+h@Oh&eSWc4MoG6&uP7S^jo(-g1VX# zcW}M8zIbBX&NRaTO??E$!t=Vfx}K(J3vu>99ohs2kP7Q>3!I@R&@;jfuwNDv7(oa0 zMOc^?%_OP zU0$dqVC?)3%;ff33qalel~`>C`pjJ^ZrX%)duHjeZjWB)1h~FOl_R<}bLR#S@yR3b z+0FGizbUZCc>`=;U-AV_J;|&I-5zy45jJ08V`q+_1J>Qwv$&tR^Yw{ECU?o%7q|Wu zZqVcwp5X{4Vqv}%O?^aj##;1xj+&>^W3OwFfq8Enw|NWP6x~?eSu-rGb)W?+xv{7} z$3BCzdl0Elg#&9Ow$FKTqF!g_caqOWk39*5;u+u`*wcWTJJUXM_g$E0t|x&GxZK0o zef0$3tn7#Fap`l8{go4%^JCHMu|9!Oojkzy2B2Q&b;R!Da&L22f}LaE3OlEwDIRAz zXEG Date: Tue, 23 Jan 2024 16:42:44 +0000 Subject: [PATCH 30/37] Add test for auto-finding NCAS GENERAL specs --- tests/test_ncas_file_proc.py | 17 +++++++++++++++++ ...ument_platform_20230101_surface-met_v1.0.nc | Bin 0 -> 6469 bytes 2 files changed, 17 insertions(+) create mode 100644 tests/test_ncas_file_proc.py create mode 100644 tests/testdata/netcdf/ncas-instrument_platform_20230101_surface-met_v1.0.nc diff --git a/tests/test_ncas_file_proc.py b/tests/test_ncas_file_proc.py new file mode 100644 index 00000000..51e59638 --- /dev/null +++ b/tests/test_ncas_file_proc.py @@ -0,0 +1,17 @@ +from click.testing import CliRunner +from checksit import cli +import os +from .common import TESTDATA_DIR + + +def test_ncas_general_specs(): + """ + Test checksit finds correct specs for NCAS GENERAL file + """ + runner = CliRunner() + result = runner.invoke(cli.check, ["-p", os.path.join(TESTDATA_DIR, "netcdf/ncas-instrument_platform_20230101_surface-met_v1.0.nc")]) + output = result.output + templ_used = output.split("Template: ")[1].split("\n")[0] + specs_used = output.split("Spec Files: ")[1].split("\n")[0] + assert templ_used == "OFF" + assert specs_used == "['ncas-amof-2.0.0/amof-file-name', 'ncas-amof-2.0.0/amof-common-land', 'ncas-amof-2.0.0/amof-surface-met', 'ncas-amof-2.0.0/amof-global-attrs']" \ No newline at end of file diff --git a/tests/testdata/netcdf/ncas-instrument_platform_20230101_surface-met_v1.0.nc b/tests/testdata/netcdf/ncas-instrument_platform_20230101_surface-met_v1.0.nc new file mode 100644 index 0000000000000000000000000000000000000000..b121f1bba0af6ac14f03f86fbdb7dad2d0c3ba4d GIT binary patch literal 6469 zcmeH|&ubGw6vy9YZAs&{O>1jVgs#$q)^E5sPD~>)`o&d5$2K1%+9>`oj0F(**vXRYonR7 znN&Ir()NWpXZTEBEbP7c^|Mv4uBJvuwA2MQsi(x6j2Y}RDbUCzo2B-)aIew>*)TOc z#<)gf9Slgr2>?5ec7iAgzywf}33R>4Hyj%4Fvi_GuD#NWw}W0127EU<9qqOg^!k`WX>mRZ zlDqy!sc2od7UwtpjjN@CRVZ3lzWvyJ$DnASX;T=bX<7y&$z-ojfi`h|hK_}1LM{!t z$2b~7IC_TAybxfT%!A{aZG%o&#n&#=JLsI=ScTm+V)tIJKtZ7;XnPTQ_D95C*X z`w(|`D@})amxcG*X4p6J-+TV~ua{$hMzOs9%P=&Cmoi6BLgmTPR8~YP)04EFOi=bl zg{V(qf>;YOK|79nei(TPR@adPCY^#<7}FC$18NmjP=~6RW&g%qr7&UQP6c>&XN9-+E#3N`Az6t~Aa4>8?68 zXVr0Sr^C`+LIq0K_Y$ukB!_t))ElH}{ov+~IgZSN@+O;o^!^nesa6qC1QdaPOyH*7 zYOt3ky~)tz3zYw*;Ja8(@MZWoE(WSo1QY>9KoL*`6ahs*5l{pa0YyL&Py`f#|AxSC Dc6aqw literal 0 HcmV?d00001 From 4bd790cc353b57e6129ecd981b7a22cf2fd58e60 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 23 Jan 2024 16:43:33 +0000 Subject: [PATCH 31/37] Added test_ncas_file_proc to workflow --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3301eea5..b415b1f3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -44,4 +44,4 @@ jobs: - name: Test with pytest run: | export UDUNITS2_XML_PATH=/opt/share/udunits/udunits2.xml - python -m pytest -v tests/test_readers.py tests/test_rules.py tests/test_show_specs.py tests/test_images.py tests/test_generic.py tests/test_utils.py + python -m pytest -v tests/test_readers.py tests/test_rules.py tests/test_show_specs.py tests/test_images.py tests/test_generic.py tests/test_utils.py tests/test_ncas_file_proc.py From 7c262ea8189ba5cffa3baf5a8c846037e722a49c Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 23 Jan 2024 16:47:42 +0000 Subject: [PATCH 32/37] Removed check for exiftool in workflow --- .github/workflows/main.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b415b1f3..63db8b34 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -38,9 +38,6 @@ jobs: pip install flake8 black pytest if [ -f requirements.txt ]; then pip install -r requirements.txt; fi if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi - - name: Look for exiftool - run: | - which exiftool - name: Test with pytest run: | export UDUNITS2_XML_PATH=/opt/share/udunits/udunits2.xml From f5545aff67d68d40bcb06528d359a2e80d27f186 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Tue, 23 Jan 2024 16:52:01 +0000 Subject: [PATCH 33/37] Tidy up import of rule_funcs --- tests/test_rules.py | 168 ++++++++++++++++++++++---------------------- 1 file changed, 84 insertions(+), 84 deletions(-) diff --git a/tests/test_rules.py b/tests/test_rules.py index 5050ed1d..feb0da0e 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -4,204 +4,204 @@ from numbers import Number from checksit.rules import rules as r -from checksit.rules.rule_funcs import match_file_name, string_of_length, match_one_of, match_one_or_more_of, validate_image_date_time, validate_orcid_ID, list_of_names, headline, title_check, url_checker, relation_url_checker, latitude, longitude +import checksit.rules.rule_funcs as crf # rule_funcs.py def test_match_file_name(): file_path = "happy_netcdf" value = "happy_NetCDF.nc" context = {"file_path": file_path} - assert len(match_file_name(value, context)) == 1 - assert len(match_file_name(value, context, ["lowercase"])) == 1 - assert len(match_file_name(value, context, ["uppercase"])) == 1 - assert len(match_file_name(value, context, ["lowercase", "no_extension"])) == 0 - assert len(match_file_name(value, context, ["uppercase", "no_extension"])) == 1 + assert len(crf.match_file_name(value, context)) == 1 + assert len(crf.match_file_name(value, context, ["lowercase"])) == 1 + assert len(crf.match_file_name(value, context, ["uppercase"])) == 1 + assert len(crf.match_file_name(value, context, ["lowercase", "no_extension"])) == 0 + assert len(crf.match_file_name(value, context, ["uppercase", "no_extension"])) == 1 def test_string_of_length(): # Test that the function correctly handles strings of the minimum length - assert string_of_length('abc', {}, ['3'], 'Test') == [] - assert string_of_length('abcd', {}, ['3+'], 'Test') == [] + assert crf.string_of_length('abc', {}, ['3'], 'Test') == [] + assert crf.string_of_length('abcd', {}, ['3+'], 'Test') == [] # Test that the function correctly handles strings shorter than the minimum length - assert string_of_length('ab', {}, ['3'], 'Test') == ["Test 'ab' must be exactly 3 characters"] - assert string_of_length('ab', {}, ['3+'], 'Test') == ["Test 'ab' must be at least 3 characters"] + assert crf.string_of_length('ab', {}, ['3'], 'Test') == ["Test 'ab' must be exactly 3 characters"] + assert crf.string_of_length('ab', {}, ['3+'], 'Test') == ["Test 'ab' must be at least 3 characters"] # Test that the function correctly handles strings longer than the minimum length - assert string_of_length('abcd', {}, ['3'], 'Test') == ["Test 'abcd' must be exactly 3 characters"] - assert string_of_length('abcd', {}, ['3+'], 'Test') == [] + assert crf.string_of_length('abcd', {}, ['3'], 'Test') == ["Test 'abcd' must be exactly 3 characters"] + assert crf.string_of_length('abcd', {}, ['3+'], 'Test') == [] # Test that the function correctly handles empty strings - assert string_of_length('', {}, ['0'], 'Test') == [] - assert string_of_length('', {}, ['1'], 'Test') == ["Test '' must be exactly 1 characters"] - assert string_of_length('', {}, ['1+'], 'Test') == ["Test '' must be at least 1 characters"] + assert crf.string_of_length('', {}, ['0'], 'Test') == [] + assert crf.string_of_length('', {}, ['1'], 'Test') == ["Test '' must be exactly 1 characters"] + assert crf.string_of_length('', {}, ['1+'], 'Test') == ["Test '' must be at least 1 characters"] def test_match_one_of(): # Test that the function correctly handles valid inputs - assert match_one_of('apple', {}, ['apple|banana|orange'], 'Test') == [] + assert crf.match_one_of('apple', {}, ['apple|banana|orange'], 'Test') == [] # Test that the function correctly handles invalid inputs - assert match_one_of('kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'kiwi' must be one of: '['apple', 'banana', 'orange']'"] + assert crf.match_one_of('kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'kiwi' must be one of: '['apple', 'banana', 'orange']'"] # Test that the function correctly handles empty strings - assert match_one_of('', {}, ['apple|banana|orange'], 'Test') == ["Test '' must be one of: '['apple', 'banana', 'orange']'"] + assert crf.match_one_of('', {}, ['apple|banana|orange'], 'Test') == ["Test '' must be one of: '['apple', 'banana', 'orange']'"] def test_match_one_or_more_of(): # Test that the function correctly handles valid inputs - assert match_one_or_more_of('apple,banana', {}, ['apple|banana|orange'], 'Test') == [] - assert match_one_or_more_of('apple', {}, ['apple|banana|orange'], 'Test') == [] + assert crf.match_one_or_more_of('apple,banana', {}, ['apple|banana|orange'], 'Test') == [] + assert crf.match_one_or_more_of('apple', {}, ['apple|banana|orange'], 'Test') == [] # Test that the function correctly handles invalid inputs - assert match_one_or_more_of('apple,kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'apple,kiwi' must be one or more of: '['apple', 'banana', 'orange']'"] - assert match_one_or_more_of('kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'kiwi' must be one or more of: '['apple', 'banana', 'orange']'"] + assert crf.match_one_or_more_of('apple,kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'apple,kiwi' must be one or more of: '['apple', 'banana', 'orange']'"] + assert crf.match_one_or_more_of('kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'kiwi' must be one or more of: '['apple', 'banana', 'orange']'"] # Test that the function correctly handles empty strings - assert match_one_or_more_of('', {}, ['apple|banana|orange'], 'Test') == ["Test '' must be one or more of: '['apple', 'banana', 'orange']'"] + assert crf.match_one_or_more_of('', {}, ['apple|banana|orange'], 'Test') == ["Test '' must be one or more of: '['apple', 'banana', 'orange']'"] def test_validate_image_date_time(): # Test that the function correctly handles valid date-time strings - assert validate_image_date_time('2022:01:01 12:00:00', {}, label = 'Test') == [] - assert validate_image_date_time('2022:01:01 12:00:00.000000', {}, label = 'Test') == [] + assert crf.validate_image_date_time('2022:01:01 12:00:00', {}, label = 'Test') == [] + assert crf.validate_image_date_time('2022:01:01 12:00:00.000000', {}, label = 'Test') == [] # Test that the function correctly handles invalid date-time strings - assert validate_image_date_time('2022-01-01 12:00:00', {}, label = 'Test') == ["Test '2022-01-01 12:00:00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"] - assert validate_image_date_time('2022:01:01 12:00', {}, label = 'Test') == ["Test '2022:01:01 12:00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"] - assert validate_image_date_time('2022:01:01', {}, label = 'Test') == ["Test '2022:01:01' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"] - assert validate_image_date_time('2022:01:01 12:00:00.00', {}, label = 'Test') == ["Test '2022:01:01 12:00:00.00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"] + assert crf.validate_image_date_time('2022-01-01 12:00:00', {}, label = 'Test') == ["Test '2022-01-01 12:00:00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"] + assert crf.validate_image_date_time('2022:01:01 12:00', {}, label = 'Test') == ["Test '2022:01:01 12:00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"] + assert crf.validate_image_date_time('2022:01:01', {}, label = 'Test') == ["Test '2022:01:01' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"] + assert crf.validate_image_date_time('2022:01:01 12:00:00.00', {}, label = 'Test') == ["Test '2022:01:01 12:00:00.00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"] # Test that the function correctly handles empty strings - assert validate_image_date_time('', {}, label = 'Test') == ["Test '' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"] + assert crf.validate_image_date_time('', {}, label = 'Test') == ["Test '' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"] def test_validate_orcid_ID(): # Test that the function correctly handles valid ORCID IDs - assert validate_orcid_ID('https://orcid.org/0000-0002-1825-0097', {}, label='Test') == [] - assert validate_orcid_ID('https://orcid.org/1234-5678-9012-3456', {}, label='Test') == [] - assert validate_orcid_ID('https://orcid.org/1234-5678-9012-345X', {}, label='Test') == [] + assert crf.validate_orcid_ID('https://orcid.org/0000-0002-1825-0097', {}, label='Test') == [] + assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-3456', {}, label='Test') == [] + assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-345X', {}, label='Test') == [] # Test that the function correctly handles ORCID IDs with incorrect lengths - assert validate_orcid_ID('https://orcid.org/0000-0002-1825-009', {}, label='Test') == ["Test 'https://orcid.org/0000-0002-1825-009' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] - assert validate_orcid_ID('https://orcid.org/1234-5678-9012-34567', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-34567' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] + assert crf.validate_orcid_ID('https://orcid.org/0000-0002-1825-009', {}, label='Test') == ["Test 'https://orcid.org/0000-0002-1825-009' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] + assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-34567', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-34567' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] # Test that the function correctly handles ORCID IDs with incorrect formats - assert validate_orcid_ID('https://orcid.org/0000-0002-1825-009Z', {}, label='Test') == ["Test 'https://orcid.org/0000-0002-1825-009Z' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] - assert validate_orcid_ID('https://orcid.org/1234-5678-9012-34X5', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-34X5' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] - assert validate_orcid_ID('https://orcid.org/1234-5678-9012-3456-', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456-' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] - assert validate_orcid_ID('https://orcid.org/1234-5678-9012-3456X', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456X' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] + assert crf.validate_orcid_ID('https://orcid.org/0000-0002-1825-009Z', {}, label='Test') == ["Test 'https://orcid.org/0000-0002-1825-009Z' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] + assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-34X5', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-34X5' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] + assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-3456-', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456-' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] + assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-3456X', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456X' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] # Test that the function correctly handles empty strings - assert validate_orcid_ID('', {}, label='Test') == ["Test '' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] + assert crf.validate_orcid_ID('', {}, label='Test') == ["Test '' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"] def test_list_of_names(): # Test that the function correctly handles valid names - assert list_of_names('Doe, John', {}, label='Test') == [] - assert list_of_names('Doe, John J.', {}, label='Test') == [] - assert list_of_names(['Doe, John', 'Smith, Jane'], {}, label='Test') == [] + assert crf.list_of_names('Doe, John', {}, label='Test') == [] + assert crf.list_of_names('Doe, John J.', {}, label='Test') == [] + assert crf.list_of_names(['Doe, John', 'Smith, Jane'], {}, label='Test') == [] # Test that the function correctly handles names with incorrect formats - assert list_of_names('John Doe', {}, label='Test') == ["Test 'John Doe' should be of the format , or , where appropriate"] - assert list_of_names('Doe John', {}, label='Test') == ["Test 'Doe John' should be of the format , or , where appropriate"] - assert list_of_names(['Doe, John', 'Jane Smith'], {}, label='Test') == ["Test '['Doe, John', 'Jane Smith']' should be of the format , or , where appropriate"] + assert crf.list_of_names('John Doe', {}, label='Test') == ["Test 'John Doe' should be of the format , or , where appropriate"] + assert crf.list_of_names('Doe John', {}, label='Test') == ["Test 'Doe John' should be of the format , or , where appropriate"] + assert crf.list_of_names(['Doe, John', 'Jane Smith'], {}, label='Test') == ["Test '['Doe, John', 'Jane Smith']' should be of the format , or , where appropriate"] # Test that the function correctly handles names with invalid characters - assert list_of_names('Doe, J0hn', {}, label='Test') == ["Test 'Doe, J0hn' - please use characters A-Z, a-z, À-ÿ where appropriate"] - assert list_of_names('Doe, John!', {}, label='Test') == ["Test 'Doe, John!' - please use characters A-Z, a-z, À-ÿ where appropriate"] - assert list_of_names(['Doe, John', 'Smith, J@ne'], {}, label='Test') == ["Test '['Doe, John', 'Smith, J@ne']' - please use characters A-Z, a-z, À-ÿ where appropriate"] + assert crf.list_of_names('Doe, J0hn', {}, label='Test') == ["Test 'Doe, J0hn' - please use characters A-Z, a-z, À-ÿ where appropriate"] + assert crf.list_of_names('Doe, John!', {}, label='Test') == ["Test 'Doe, John!' - please use characters A-Z, a-z, À-ÿ where appropriate"] + assert crf.list_of_names(['Doe, John', 'Smith, J@ne'], {}, label='Test') == ["Test '['Doe, John', 'Smith, J@ne']' - please use characters A-Z, a-z, À-ÿ where appropriate"] # Test that the function correctly handles empty strings - assert list_of_names('', {}, label='Test') == ["Test '' should be of the format , or , where appropriate", "Test '' - please use characters A-Z, a-z, À-ÿ where appropriate"] - assert list_of_names([], {}, label='Test') == [] + assert crf.list_of_names('', {}, label='Test') == ["Test '' should be of the format , or , where appropriate", "Test '' - please use characters A-Z, a-z, À-ÿ where appropriate"] + assert crf.list_of_names([], {}, label='Test') == [] def test_headline(): # Test that the function correctly handles valid headlines - assert headline('This is a valid headline.', {}, label='Test') == [] - assert headline('This headline is exactly 150 characters long ' + 'a' * 105, {}, label='Test') == [] - assert headline('This headline is exactly 10 characters.', {}, label='Test') == [] + assert crf.headline('This is a valid headline.', {}, label='Test') == [] + assert crf.headline('This headline is exactly 150 characters long ' + 'a' * 105, {}, label='Test') == [] + assert crf.headline('This headline is exactly 10 characters.', {}, label='Test') == [] # Test that the function correctly handles headlines longer than 150 characters - assert headline('This headline is longer than 150 characters.' + 'a' * 120, {}, label='Test') == ["Test 'This headline is longer than 150 characters." + "a" * 120 + "' should contain no more than one sentence"] + assert crf.headline('This headline is longer than 150 characters.' + 'a' * 120, {}, label='Test') == ["Test 'This headline is longer than 150 characters." + "a" * 120 + "' should contain no more than one sentence"] # Test that the function correctly handles headlines with more than one sentence - assert headline('This is a headline. It has two sentences.', {}, label='Test') == ["Test 'This is a headline. It has two sentences.' should contain no more than one sentence"] + assert crf.headline('This is a headline. It has two sentences.', {}, label='Test') == ["Test 'This is a headline. It has two sentences.' should contain no more than one sentence"] # Test that the function correctly handles headlines that do not start with a capital letter - assert headline('this headline does not start with a capital letter.', {}, label='Test') == ["Test 'this headline does not start with a capital letter.' should start with a capital letter"] + assert crf.headline('this headline does not start with a capital letter.', {}, label='Test') == ["Test 'this headline does not start with a capital letter.' should start with a capital letter"] # Test that the function correctly handles headlines shorter than 10 characters - assert headline('Too short', {}, label='Test') == ["Test 'Too short' should be at least 10 characters"] + assert crf.headline('Too short', {}, label='Test') == ["Test 'Too short' should be at least 10 characters"] # Test that the function correctly handles empty strings - assert headline('', {}, label='Test') == ["Test '' should not be empty"] + assert crf.headline('', {}, label='Test') == ["Test '' should not be empty"] def test_title_check(): # Test that the function correctly handles titles that match the filename - assert title_check('happy_netcdf', "/path/to/file/happy_netcdf", label='Test') == [] - assert title_check('happy_NetCDF.nc', "/path/to/file/happy_NetCDF.nc", label='Test') == [] + assert crf.title_check('happy_netcdf', "/path/to/file/happy_netcdf", label='Test') == [] + assert crf.title_check('happy_NetCDF.nc', "/path/to/file/happy_NetCDF.nc", label='Test') == [] # Test that the function correctly handles titles that do not match the filename - assert title_check('sad_netcdf', "/path/to/file/happy_netcdf", label='Test') == ["Test 'sad_netcdf' must match the name of the file"] - assert title_check('happy_NetCDF.nc', "/path/to/file/sad_NetCDF.nc", label='Test') == ["Test 'happy_NetCDF.nc' must match the name of the file"] + assert crf.title_check('sad_netcdf', "/path/to/file/happy_netcdf", label='Test') == ["Test 'sad_netcdf' must match the name of the file"] + assert crf.title_check('happy_NetCDF.nc', "/path/to/file/sad_NetCDF.nc", label='Test') == ["Test 'happy_NetCDF.nc' must match the name of the file"] # Test that the function correctly handles empty titles - assert title_check('', "/path/to/file/happy_netcdf", label='Test') == ["Test '' must match the name of the file"] + assert crf.title_check('', "/path/to/file/happy_netcdf", label='Test') == ["Test '' must match the name of the file"] def test_url_checker(): # Test that the function correctly handles a reachable URL - assert url_checker("https://www.example.com", {}, label="Test") == [] + assert crf.url_checker("https://www.example.com", {}, label="Test") == [] # Test that the function correctly handles an unreachable URL - assert url_checker("https://www.nonexistenturl.com", {}, label="Test") == ["Test 'https://www.nonexistenturl.com' is not a reachable url"] + assert crf.url_checker("https://www.nonexistenturl.com", {}, label="Test") == ["Test 'https://www.nonexistenturl.com' is not a reachable url"] # Test that the function correctly handles an existing but unreachable URL - assert url_checker("https://www.example.com/nonexistentpage", {}, label="Test") == ["Test 'https://www.example.com/nonexistentpage' is not a reachable url"] + assert crf.url_checker("https://www.example.com/nonexistentpage", {}, label="Test") == ["Test 'https://www.example.com/nonexistentpage' is not a reachable url"] # Test that the function correctly handles an empty URL - assert url_checker("", {}, label="Test") == ["Test '' is not a reachable url"] + assert crf.url_checker("", {}, label="Test") == ["Test '' is not a reachable url"] def test_relation_url_checker(): # Test that the function correctly handles valid inputs - assert relation_url_checker('relation https://example.com', {}, label='Test') == [] + assert crf.relation_url_checker('relation https://example.com', {}, label='Test') == [] # Test that the function correctly handles inputs without a space - assert relation_url_checker('relationhttps://example.com', {}, label='Test') == ["Test 'relationhttps://example.com' should contain a space before the url"] + assert crf.relation_url_checker('relationhttps://example.com', {}, label='Test') == ["Test 'relationhttps://example.com' should contain a space before the url"] # Test that the function correctly handles inputs with an invalid URL - assert relation_url_checker('relation https://', {}, label='Test') == ["Test 'https://' is not a reachable url"] + assert crf.relation_url_checker('relation https://', {}, label='Test') == ["Test 'https://' is not a reachable url"] # Test that the function correctly handles empty strings - assert relation_url_checker('', {}, label='Test') == ["Test '' should contain a space before the url"] + assert crf.relation_url_checker('', {}, label='Test') == ["Test '' should contain a space before the url"] def test_latitude(): # Test that the function correctly handles valid latitudes - assert latitude('45.1234', {}, label='Test') == [] - assert latitude('-90.0000', {}, label='Test') == [] - assert latitude('90.0000', {}, label='Test') == [] + assert crf.latitude('45.1234', {}, label='Test') == [] + assert crf.latitude('-90.0000', {}, label='Test') == [] + assert crf.latitude('90.0000', {}, label='Test') == [] # Test that the function correctly handles invalid latitudes - assert latitude('90.0001', {}, label='Test') == ["Test '90.0001' must be within -90 and +90 "] - assert latitude('-90.0001', {}, label='Test') == ["Test '-90.0001' must be within -90 and +90 "] - assert latitude('100.0000', {}, label='Test') == ["Test '100.0000' must be within -90 and +90 "] + assert crf.latitude('90.0001', {}, label='Test') == ["Test '90.0001' must be within -90 and +90 "] + assert crf.latitude('-90.0001', {}, label='Test') == ["Test '-90.0001' must be within -90 and +90 "] + assert crf.latitude('100.0000', {}, label='Test') == ["Test '100.0000' must be within -90 and +90 "] def test_longitude(): # Test that the function correctly handles valid longitudes - assert longitude('45.1234', {}, label='Test') == [] - assert longitude('-180.0000', {}, label='Test') == [] - assert longitude('180.0000', {}, label='Test') == [] + assert crf.longitude('45.1234', {}, label='Test') == [] + assert crf.longitude('-180.0000', {}, label='Test') == [] + assert crf.longitude('180.0000', {}, label='Test') == [] # Test that the function correctly handles invalid longitudes - assert longitude('180.0001', {}, label='Test') == ["Test '180.0001' must be within -180 and +180 "] - assert longitude('-180.0001', {}, label='Test') == ["Test '-180.0001' must be within -180 and +180 "] - assert longitude('200.0000', {}, label='Test') == ["Test '200.0000' must be within -180 and +180 "] + assert crf.longitude('180.0001', {}, label='Test') == ["Test '180.0001' must be within -180 and +180 "] + assert crf.longitude('-180.0001', {}, label='Test') == ["Test '-180.0001' must be within -180 and +180 "] + assert crf.longitude('200.0000', {}, label='Test') == ["Test '200.0000' must be within -180 and +180 "] # rules.py From e7ad6c46ec7c545fc19bc80e9c103605724587d2 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Fri, 26 Jan 2024 13:42:51 +0000 Subject: [PATCH 34/37] Added checks on optional variables --- tests/test_generic.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_generic.py b/tests/test_generic.py index b004368e..a651a407 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -301,6 +301,12 @@ def test_check_var(): assert errors == ["[variable**************:var2]: Attribute 'attr3' does not exist. "] assert warnings == [] + variable = "var2:__OPTIONAL__" + defined_attrs = ["long_name:Variable 2", "units:kg", "attr3:value 3"] + errors, warnings = cg.check_var(dct, variable, defined_attrs, skip_spellcheck=True) + assert errors == ["[variable**************:var2]: Attribute 'attr3' does not exist. "] + assert warnings == [] + # Test that the function correctly identifies incorrect attributes variable = "var2" defined_attrs = ["long_name:Variable 2", "units:s"] @@ -308,6 +314,12 @@ def test_check_var(): assert errors == ["[variable**************:var2]: Attribute 'units' must have definition s, not kg."] assert warnings == [] + variable = "var2:__OPTIONAL__" + defined_attrs = ["long_name:Variable 2", "units:s"] + errors, warnings = cg.check_var(dct, variable, defined_attrs, skip_spellcheck=True) + assert errors == ["[variable**************:var2]: Attribute 'units' must have definition s, not kg."] + assert warnings == [] + # Test that the function correctly handles badly formatted flag_values variable = "var4:__OPTIONAL__" defined_attrs = ["flag_values:0b, 1b, 2b"] From db4ac3d7506de50e5fc7a5658e182cfaad6c3c79 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Fri, 26 Jan 2024 13:43:19 +0000 Subject: [PATCH 35/37] Add tests on vocab checks --- tests/test_cvs.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/tests/test_cvs.py b/tests/test_cvs.py index d3c2134a..f19669a5 100644 --- a/tests/test_cvs.py +++ b/tests/test_cvs.py @@ -1,22 +1,22 @@ -from checksit.cvs import vocabs, vc +from checksit.cvs import vocabs +import pytest -lookups = { - 'vocabs:ukcp18:variables:season_year': - {'dimensions': ['time'], 'units': '1', 'dtype': 'int', 'long_name': 'season_year'}, - 'vocabs:ukcp18:collection': - ['land-cpm', 'land-derived', 'land-gcm', 'land-indices', 'land-prob', 'land-rcm', 'land-rcm-gwl', 'marine-sim'], - 'vocabs:cf-netcdf:Conventions': - ["CF-1.5", "CF-1.6"] -} - - -for lookup, exp_value in lookups.items(): - value = vocabs.lookup(lookup) - assert exp_value == value - - -for lookup, exp_value in lookups.items(): - value = vc._lookup(lookup) - assert exp_value == value +def test_lookup(): + assert vocabs.lookup('__vocabs__:tests/test_instruments:test_instruments') == {'inst1': {"instrument_id": "inst1"}, "inst2": {"instrument_id": "inst2"}} + assert vocabs.lookup('__vocabs__:tests/test_instruments:test_instruments:__all__') == ["inst1", "inst2"] + assert vocabs.lookup('__vocabs__:tests/test_instruments:test_instruments:inst1') == {"instrument_id": "inst1"} + assert vocabs.lookup('__vocabs__:tests/test_instruments:test_instruments:__all__:instrument_id') == ["inst1", "inst2"] + with pytest.raises(ValueError): + vocabs.lookup('__vocabs__:tests/test_instruments:test_instruments:__all__:__all__') +def test_check(): + assert vocabs.check('__vocabs__:tests/test_instruments:test_instruments:__all__:instrument_id', 'inst1', label = "Test") == [] + assert vocabs.check( + "__vocabs__:tests/test_instruments:test_instruments:__all__:instrument_id", "inst3", label="Test", + ) == [ + "Test 'inst3' not in vocab options: ['inst1', 'inst2'] (using: '__vocabs__:tests/test_instruments:test_instruments:__all__:instrument_id')" + ] + assert vocabs.check('__vocabs__:tests/test_platforms:test_platforms:plat1', {"platform_id": "plat1"}, label = "Test") == ["Test does not have attribute 'description'"] + assert vocabs.check('__vocabs__:tests/test_platforms:test_platforms:plat1:platform_id', "plat1", label = "Test") == [] + assert vocabs.check('__vocabs__:tests/test_platforms:test_platforms:plat1:platform_id', "plat2", label = "Test") == ["Test 'plat2' does not equal required vocab value: 'plat1' (using: '__vocabs__:tests/test_platforms:test_platforms:plat1:platform_id')"] \ No newline at end of file From 06f2f0389b8c9fdace5d0dc6e6fee2f46b9c60b5 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Fri, 26 Jan 2024 13:43:54 +0000 Subject: [PATCH 36/37] Add test_cvs.py to main workflow --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 63db8b34..e6cfc7dd 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -41,4 +41,4 @@ jobs: - name: Test with pytest run: | export UDUNITS2_XML_PATH=/opt/share/udunits/udunits2.xml - python -m pytest -v tests/test_readers.py tests/test_rules.py tests/test_show_specs.py tests/test_images.py tests/test_generic.py tests/test_utils.py tests/test_ncas_file_proc.py + python -m pytest -v tests/test_readers.py tests/test_rules.py tests/test_show_specs.py tests/test_images.py tests/test_generic.py tests/test_utils.py tests/test_ncas_file_proc.py tests/test_cvs.py From b05a001ae5a7dd26a5c23f25e16f96d8da69902a Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Fri, 26 Jan 2024 15:06:21 +0000 Subject: [PATCH 37/37] Install netcdf in github action --- .github/workflows/main.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e6cfc7dd..e6e03c33 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -29,9 +29,10 @@ jobs: ./configure make all install ln -sf /opt/lib/* $LD_LIBRARY_PATH - - name: Install exiftool + - name: Install exiftool and netcdf run: | sudo apt install libimage-exiftool-perl -y + sudo apt-get install -y netcdf-bin - name: Install dependencies run: | python -m pip install --upgrade pip