From 3034976556fe2a0f9fb61c6ff9575051f54ad195 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Tue, 16 Jan 2024 14:45:54 +0000
Subject: [PATCH 01/37] Correct/clarify a few regex rules

---
 checksit/rules/rules.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/checksit/rules/rules.py b/checksit/rules/rules.py
index e92f1336..60c61327 100644
--- a/checksit/rules/rules.py
+++ b/checksit/rules/rules.py
@@ -23,7 +23,7 @@ def __init__(self):
             "integer": r"-?\d+",
             "valid-email": r"[^@\s]+@[^@\s]+\.[^\s@]+",
             "valid-url": r"https?://[^\s]+\.[^\s]*[^\s\.](/[^\s]+)?",
-            "valid-url-or-na": r"(https?://[^\s]+\.[^\s]*[^\s\.](/[^\s]+))|" + _NOT_APPLICABLE_RULES,
+            "valid-url-or-na": r"(https?://[^\s]+\.[^\s]*[^\s\.](/[^\s]+)?)|" + _NOT_APPLICABLE_RULES,
             "match:vN.M": r"v\d\.\d",
             "datetime": r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?",
             "datetime-or-na": 
@@ -32,9 +32,9 @@ def __init__(self):
             "location": r'(.)+(\,\ )(.)+',
             "latitude-image": r'[\+|\-]?[0-9]{1,2}\.[0-9]{0,6}',
             "longitude-image": r'[\+|\-]?1?[0-9]{1,2}\.[0-9]{0,6}',
-            "title": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?_(.)+_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)',
+            "title": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?(_.)?_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)',
             "title-data-product": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?_(plot|photo)((.)+)?_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)',
-            "name-format": r'(.)+, (.)+ ?((.)+|((.)\.))',
+            "name-format": r'([^,])+, ([^,])+( ?[^,]+|((.)\.))',
             "name-characters": r'[A-Za-z_À-ÿ\-\'\ \.\,]+',
             "altitude-image-warning": r'-?\d+\sm',    # should be integers only for images
             "altitude-image": r'-?\d+(\.\d+)?\sm',

From dea2d88038d2e50619623713f33917ff7af8d0b9 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Tue, 16 Jan 2024 14:46:22 +0000
Subject: [PATCH 02/37] Add tests for regex rules

---
 tests/test_rules.py | 150 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 137 insertions(+), 13 deletions(-)

diff --git a/tests/test_rules.py b/tests/test_rules.py
index 3d45c10e..f5136a1e 100644
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -1,4 +1,6 @@
 import os
+import re
+import pytest
 
 from checksit.rules import rules as r
 from checksit.rules.rule_funcs import match_file_name
@@ -47,16 +49,138 @@ def test_type_rules():
     for value in 3, 4.5, ["hi"]:
         assert tt(_type, value) != ([], [])
 
-def test_regex_rules():
-    rule = "regex-rule:integer"
-    assert r.check(rule, "-1") == ([], [])
-    assert r.check(rule, "500") == ([], [])
-    assert r.check(rule, "1.3") != ([], [])
-
-    rule = "regex-rule:valid-email"
-    assert r.check(rule, "freda.bloggs@amail.com") == ([], [])
-    assert r.check(rule, "@amail.com") != ([], [])
-    assert r.check(rule, "freda.bloggs@") != ([], [])
-
-#TODO: Add checks for all the published rules 
-#TODO: Add checks for some regular expressions to check they are executed correctly
+# static regex rule tests
+@pytest.fixture
+def rules():
+    return r.static_regex_rules
+
+def test_integer_rule(rules):
+    assert re.fullmatch(rules['integer'], '123')
+    assert re.fullmatch(rules['integer'], '-123')
+    assert not re.fullmatch(rules['integer'], '123.45')
+    assert not re.fullmatch(rules['integer'], 'abc')
+    assert not re.fullmatch(rules['integer'], '')
+
+def test_valid_email_rule(rules):
+    assert re.fullmatch(rules['valid-email'], 'test@example.com')
+    assert re.fullmatch(rules['valid-email'], 'test.test@example.com')
+    assert not re.fullmatch(rules['valid-email'], 'test@example')
+    assert not re.fullmatch(rules['valid-email'], 'test@.com')
+    assert not re.fullmatch(rules['valid-email'], 'test@com')
+
+def test_valid_url_rule(rules):
+    assert re.fullmatch(rules['valid-url'], 'https://example.com')
+    assert re.fullmatch(rules['valid-url'], 'http://example.com')
+    assert not re.fullmatch(rules['valid-url'], 'htp://example.com')
+    assert not re.fullmatch(rules['valid-url'], 'https:/example.com')
+    assert not re.fullmatch(rules['valid-url'], 'https://example')
+
+def test_valid_url_or_na_rule(rules):
+    assert re.fullmatch(rules['valid-url-or-na'], 'https://example.com')
+    assert re.fullmatch(rules['valid-url-or-na'], 'http://example.com')
+    assert re.fullmatch(rules['valid-url-or-na'], 'N/A')
+    assert not re.fullmatch(rules['valid-url-or-na'], 'htp://example.com')
+    assert not re.fullmatch(rules['valid-url-or-na'], 'https:/example.com')
+    assert not re.fullmatch(rules['valid-url-or-na'], 'nan')
+
+def test_match_vN_M_rule(rules):
+    assert re.fullmatch(rules['match:vN.M'], 'v1.0')
+    assert re.fullmatch(rules['match:vN.M'], 'v2.1')
+    assert not re.fullmatch(rules['match:vN.M'], 'v10')
+    assert not re.fullmatch(rules['match:vN.M'], 'v1.01')
+    assert not re.fullmatch(rules['match:vN.M'], 'v.1.0')
+
+def test_datetime_rule(rules):
+    assert re.fullmatch(rules['datetime'], '2022-01-01T00:00:00')
+    assert re.fullmatch(rules['datetime'], '2022-01-01T00:00:00.123')
+    assert not re.fullmatch(rules['datetime'], '2022-01-01 00:00:00')
+    assert not re.fullmatch(rules['datetime'], '2022-01-01T00:00')
+    assert not re.fullmatch(rules['datetime'], '2022-01-01')
+
+def test_datetime_or_na_rule(rules):
+    assert re.fullmatch(rules['datetime-or-na'], '2022-01-01T00:00:00')
+    assert re.fullmatch(rules['datetime-or-na'], '2022-01-01T00:00:00.123')
+    assert re.fullmatch(rules['datetime-or-na'], 'N/A')
+    assert re.fullmatch(rules['datetime-or-na'], 'NA')
+    assert re.fullmatch(rules['datetime-or-na'], 'Not Applicable')
+    assert not re.fullmatch(rules['datetime-or-na'], '2022-01-01 00:00:00')
+    assert not re.fullmatch(rules['datetime-or-na'], '2022-01-01T00:00')
+    assert not re.fullmatch(rules['datetime-or-na'], '2022-01-01')
+
+def test_number_rule(rules):
+    assert re.fullmatch(rules['number'], '123.45')
+    assert re.fullmatch(rules['number'], '-123.45')
+    assert re.fullmatch(rules['number'], '-123.')
+    assert not re.fullmatch(rules['number'], 'abc')
+    assert not re.fullmatch(rules['number'], '')
+    assert not re.fullmatch(rules['number'], '123.45abc')
+
+def test_location_rule(rules):
+    assert re.fullmatch(rules['location'], 'City, Country')
+    assert re.fullmatch(rules['location'], 'City, Country, State')
+    assert not re.fullmatch(rules['location'], 'City Country')
+    assert not re.fullmatch(rules['location'], 'City,')
+    assert not re.fullmatch(rules['location'], ',Country')
+
+def test_latitude_image_rule(rules):
+    assert re.fullmatch(rules['latitude-image'], '+12.345678')
+    assert re.fullmatch(rules['latitude-image'], '-12.345678')
+    assert not re.fullmatch(rules['latitude-image'], '123.45')
+    assert not re.fullmatch(rules['latitude-image'], '+123.456789')
+    assert not re.fullmatch(rules['latitude-image'], '-123.456789')
+
+def test_longitude_image_rule(rules):
+    assert re.fullmatch(rules['longitude-image'], '+123.45678')
+    assert re.fullmatch(rules['longitude-image'], '-123.45678')
+    assert not re.fullmatch(rules['longitude-image'], '123')
+    assert not re.fullmatch(rules['longitude-image'], '+1234.56789')
+    assert not re.fullmatch(rules['longitude-image'], '-1234.56789')
+
+def test_title_rule(rules):
+    assert re.fullmatch(rules['title'], 'prefix_suffix_2022_v1.0.png')
+    assert re.fullmatch(rules['title'], 'prefix_suffix_2022_v1.0.jpg')
+    assert not re.fullmatch(rules['title'], 'prefix_suffix_2022_v1.0.txt')
+    assert not re.fullmatch(rules['title'], 'prefix_suffix_2022_v1.png')
+    assert not re.fullmatch(rules['title'], 'prefix_suffix_2022_v1.0')
+
+def test_title_data_product_rule(rules):
+    assert re.fullmatch(rules['title-data-product'], 'prefix_suffix_2022_plot_v1.0.png')
+    assert re.fullmatch(rules['title-data-product'], 'prefix_suffix_2022_photo_v1.0.jpg')
+    assert not re.fullmatch(rules['title-data-product'], 'prefix_suffix_2022_v1.0.txt')
+    assert not re.fullmatch(rules['title-data-product'], 'prefix_suffix_2022_plot_v1.png')
+    assert not re.fullmatch(rules['title-data-product'], 'prefix_suffix_2022_plot_v1.0')
+
+def test_name_format_rule(rules):
+    assert re.fullmatch(rules['name-format'], 'Last, First M.')
+    assert re.fullmatch(rules['name-format'], 'Last, First')
+    assert not re.fullmatch(rules['name-format'], 'First Last')
+    assert not re.fullmatch(rules['name-format'], 'Last, First, M.')
+    assert not re.fullmatch(rules['name-format'], 'Last First M.')
+
+def test_name_characters_rule(rules):
+    assert re.fullmatch(rules['name-characters'], 'John_Doe')
+    assert re.fullmatch(rules['name-characters'], 'John-Doe')
+    assert not re.fullmatch(rules['name-characters'], 'John Doe!')
+    assert not re.fullmatch(rules['name-characters'], 'John Doe@')
+    assert not re.fullmatch(rules['name-characters'], 'John Doe#')
+
+def test_altitude_image_warning_rule(rules):
+    assert re.fullmatch(rules['altitude-image-warning'], '123 m')
+    assert re.fullmatch(rules['altitude-image-warning'], '-123 m')
+    assert not re.fullmatch(rules['altitude-image-warning'], '123.45 m')
+    assert not re.fullmatch(rules['altitude-image-warning'], '123')
+    assert not re.fullmatch(rules['altitude-image-warning'], '123m')
+
+def test_altitude_image_rule(rules):
+    assert re.fullmatch(rules['altitude-image'], '123.45 m')
+    assert re.fullmatch(rules['altitude-image'], '-123.45 m')
+    assert not re.fullmatch(rules['altitude-image'], '123')
+    assert not re.fullmatch(rules['altitude-image'], '123.45')
+    assert not re.fullmatch(rules['altitude-image'], '123.45m')
+
+def test_ncas_email_rule(rules):
+    assert re.fullmatch(rules['ncas-email'], 'test@ncas.ac.uk')
+    assert re.fullmatch(rules['ncas-email'], 'test.test@ncas.ac.uk')
+    assert not re.fullmatch(rules['ncas-email'], 'test@example.com')
+    assert not re.fullmatch(rules['ncas-email'], 'test@ncas.com')
+    assert not re.fullmatch(rules['ncas-email'], 'test@ncas.ac')
\ No newline at end of file

From 2bd1eb7a183f2882ffc56ddde03aefafbe50e280 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Tue, 16 Jan 2024 14:47:52 +0000
Subject: [PATCH 03/37] Correct test on number rule

---
 tests/test_rules.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_rules.py b/tests/test_rules.py
index f5136a1e..c7d98fc5 100644
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -110,7 +110,7 @@ def test_datetime_or_na_rule(rules):
 def test_number_rule(rules):
     assert re.fullmatch(rules['number'], '123.45')
     assert re.fullmatch(rules['number'], '-123.45')
-    assert re.fullmatch(rules['number'], '-123.')
+    assert not re.fullmatch(rules['number'], '-123.')
     assert not re.fullmatch(rules['number'], 'abc')
     assert not re.fullmatch(rules['number'], '')
     assert not re.fullmatch(rules['number'], '123.45abc')

From 256dd0d7496835263613c7e47770308d32734763 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Wed, 17 Jan 2024 10:49:18 +0000
Subject: [PATCH 04/37] Re-arrange checks in image_date_time and headline

---
 checksit/rules/rule_funcs.py | 42 ++++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/checksit/rules/rule_funcs.py b/checksit/rules/rule_funcs.py
index 99237cae..b73d17a0 100644
--- a/checksit/rules/rule_funcs.py
+++ b/checksit/rules/rule_funcs.py
@@ -42,7 +42,8 @@ def match_file_name(value, context, extras=None, label=""):
 
 def match_one_of(value, context, extras=None, label=""):
     """
-    Matches only one of...
+    value matches one of options defined in extras
+    default rule splitter is '|' and defined in checksit.ini file
     """
     options = [x.strip() for x in extras[0].split(rule_splitter)]
     errors = []
@@ -55,7 +56,7 @@ def match_one_of(value, context, extras=None, label=""):
 
 def match_one_or_more_of(value, context, extras=None, label=""):
     """
-    Matches one of more of...
+    String value or list value must match one of more of list given in extras
     """
     def as_set(x, sep): return set([i.strip() for i in x.split(sep)])
     options = as_set(extras[0], rule_splitter)
@@ -93,10 +94,15 @@ def validate_image_date_time(value, context, extras=None, label=""):
     """
     errors = []
 
-    try:
-        if value != datetime.strptime(value, "%Y:%m:%d %H:%M:%S").strftime("%Y:%m:%d %H:%M:%S") and value != datetime.strptime(value, "%Y:%m:%d #%H:%M:%S.%f").strftime("%Y:%m:%d %H:%M:%S.%f"):
-            errors.append(f"{label} '{value}' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s")
-    except ValueError:
+    match = False
+    for f in ["%Y:%m:%d %H:%M:%S", "%Y:%m:%d %H:%M:%S.%f"]:
+        if match == False:
+            try:
+                match = (value == datetime.strptime(value, f).strftime(f))
+            except ValueError:
+                pass
+
+    if not match:
         errors.append(f"{label} '{value}' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s")
     
     return errors
@@ -126,7 +132,7 @@ def validate_orcid_ID(value, context, extras=None, label=""):
         value[32] != "-" or
         
         # Check that the last characters contain only "-" and digits
-        not PI_orcid_digits_only.isdigit):
+        not PI_orcid_digits_only.isdigit()):
 
         errors.append(f"{label} '{value}' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX")
 
@@ -164,17 +170,21 @@ def headline(value, context, extras=None, label=""):
     """
     warnings = []
 
-    if len(value) > 150:
-        warnings.append(f"{label} '{value}' should contain no more than one sentence")
+    if value == "":
+        warnings.append(f"{label} '{value}' should not be empty")
+
+    else:
+        if len(value) > 150:
+            warnings.append(f"{label} '{value}' should contain no more than one sentence")
 
-    if value.count(".") >= 2:
-        warnings.append(f"{label} '{value}' should contain no more than one sentence")
+        if value.count(".") >= 2:
+            warnings.append(f"{label} '{value}' should contain no more than one sentence")
 
-    if not value[0].isupper():
-        warnings.append(f"{label} '{value}' should start with a capital letter")
+        if not value[0].isupper():
+            warnings.append(f"{label} '{value}' should start with a capital letter")
 
-    if len(value) < 10:
-        warnings.append(f"{label} '{value}' should be at least 10 characters")
+        if len(value) < 10:
+            warnings.append(f"{label} '{value}' should be at least 10 characters")
 
     return warnings
 
@@ -218,7 +228,7 @@ def relation_url_checker(value, context, extras=None, label=""):
     else:
         relation_url = value.partition(" ")[2]        # extract only the url part of the relation string
         if url_checker(relation_url, context, extras, label) != []:
-            errors.append(url_checker(relation_url, context, extras, label))       # check the url exists using the url_checker() function defined above
+            errors.extend(url_checker(relation_url, context, extras, label))       # check the url exists using the url_checker() function defined above
 
     return errors
 

From 4b82cda3f09fa7aa66958183d028cbd3a9234537 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Wed, 17 Jan 2024 10:49:39 +0000
Subject: [PATCH 05/37] More tests for rule_funcs

---
 tests/test_rules.py | 204 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 201 insertions(+), 3 deletions(-)

diff --git a/tests/test_rules.py b/tests/test_rules.py
index c7d98fc5..5ca4de20 100644
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -1,11 +1,12 @@
 import os
 import re
 import pytest
+from numbers import Number
 
 from checksit.rules import rules as r
-from checksit.rules.rule_funcs import match_file_name
-
+from checksit.rules.rule_funcs import match_file_name, string_of_length, match_one_of, match_one_or_more_of, validate_image_date_time, validate_orcid_ID, list_of_names, headline, title_check, url_checker, relation_url_checker, latitude, longitude
 
+# rule_funcs.py
 def test_match_file_name():
     file_path = "happy_netcdf"
     value = "happy_NetCDF.nc"
@@ -14,6 +15,193 @@ def test_match_file_name():
     assert len(match_file_name(value, context, ["lowercase"])) == 1
     assert len(match_file_name(value, context, ["lowercase", "no_extension"])) == 0
 
+
+def test_string_of_length():
+    # Test that the function correctly handles strings of the minimum length
+    assert string_of_length('abc', {}, ['3'], 'Test') == []
+    assert string_of_length('abcd', {}, ['3+'], 'Test') == []
+
+    # Test that the function correctly handles strings shorter than the minimum length
+    assert string_of_length('ab', {}, ['3'], 'Test') == ["Test 'ab' must be exactly 3 characters"]
+    assert string_of_length('ab', {}, ['3+'], 'Test') == ["Test 'ab' must be at least 3 characters"]
+
+    # Test that the function correctly handles strings longer than the minimum length
+    assert string_of_length('abcd', {}, ['3'], 'Test') == ["Test 'abcd' must be exactly 3 characters"]
+    assert string_of_length('abcd', {}, ['3+'], 'Test') == []
+
+    # Test that the function correctly handles empty strings
+    assert string_of_length('', {}, ['0'], 'Test') == []
+    assert string_of_length('', {}, ['1'], 'Test') == ["Test '' must be exactly 1 characters"]
+    assert string_of_length('', {}, ['1+'], 'Test') == ["Test '' must be at least 1 characters"]
+
+
+def test_match_one_of():
+    # Test that the function correctly handles valid inputs
+    assert match_one_of('apple', {}, ['apple|banana|orange'], 'Test') == []
+
+    # Test that the function correctly handles invalid inputs
+    assert match_one_of('kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'kiwi' must be one of: '['apple', 'banana', 'orange']'"]
+
+    # Test that the function correctly handles empty strings
+    assert match_one_of('', {}, ['apple|banana|orange'], 'Test') == ["Test '' must be one of: '['apple', 'banana', 'orange']'"]
+
+
+def test_match_one_or_more_of():
+    # Test that the function correctly handles valid inputs
+    assert match_one_or_more_of('apple,banana', {}, ['apple|banana|orange'], 'Test') == []
+    assert match_one_or_more_of('apple', {}, ['apple|banana|orange'], 'Test') == []
+
+    # Test that the function correctly handles invalid inputs
+    assert match_one_or_more_of('apple,kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'apple,kiwi' must be one or more of: '['apple', 'banana', 'orange']'"]
+    assert match_one_or_more_of('kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'kiwi' must be one or more of: '['apple', 'banana', 'orange']'"]
+
+    # Test that the function correctly handles empty strings
+    assert match_one_or_more_of('', {}, ['apple|banana|orange'], 'Test') == ["Test '' must be one or more of: '['apple', 'banana', 'orange']'"]
+
+
+def test_validate_image_date_time():
+    # Test that the function correctly handles valid date-time strings
+    assert validate_image_date_time('2022:01:01 12:00:00', {}, label = 'Test') == []
+    assert validate_image_date_time('2022:01:01 12:00:00.000000', {}, label = 'Test') == []
+
+    # Test that the function correctly handles invalid date-time strings
+    assert validate_image_date_time('2022-01-01 12:00:00', {}, label = 'Test') == ["Test '2022-01-01 12:00:00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"]
+    assert validate_image_date_time('2022:01:01 12:00', {}, label = 'Test') == ["Test '2022:01:01 12:00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"]
+    assert validate_image_date_time('2022:01:01', {}, label = 'Test') == ["Test '2022:01:01' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"]
+    assert validate_image_date_time('2022:01:01 12:00:00.00', {}, label = 'Test') == ["Test '2022:01:01 12:00:00.00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"]
+
+    # Test that the function correctly handles empty strings
+    assert validate_image_date_time('', {}, label = 'Test') == ["Test '' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"]
+
+
+def test_validate_orcid_ID():
+    # Test that the function correctly handles valid ORCID IDs
+    assert validate_orcid_ID('https://orcid.org/0000-0002-1825-0097', {}, label='Test') == []
+    assert validate_orcid_ID('https://orcid.org/1234-5678-9012-3456', {}, label='Test') == []
+
+    # Test that the function correctly handles ORCID IDs with incorrect lengths
+    assert validate_orcid_ID('https://orcid.org/0000-0002-1825-009', {}, label='Test') == ["Test 'https://orcid.org/0000-0002-1825-009' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
+    assert validate_orcid_ID('https://orcid.org/1234-5678-9012-34567', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-34567' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
+
+    # Test that the function correctly handles ORCID IDs with incorrect formats
+    assert validate_orcid_ID('https://orcid.org/0000-0002-1825-009Z', {}, label='Test') == ["Test 'https://orcid.org/0000-0002-1825-009Z' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
+    assert validate_orcid_ID('https://orcid.org/1234-5678-9012-345X', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-345X' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
+    assert validate_orcid_ID('https://orcid.org/1234-5678-9012-3456-', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456-' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
+    assert validate_orcid_ID('https://orcid.org/1234-5678-9012-3456X', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456X' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
+
+    # Test that the function correctly handles empty strings
+    assert validate_orcid_ID('', {}, label='Test') == ["Test '' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
+
+
+def test_list_of_names():
+    # Test that the function correctly handles valid names
+    assert list_of_names('Doe, John', {}, label='Test') == []
+    assert list_of_names('Doe, John J.', {}, label='Test') == []
+    assert list_of_names(['Doe, John', 'Smith, Jane'], {}, label='Test') == []
+
+    # Test that the function correctly handles names with incorrect formats
+    assert list_of_names('John Doe', {}, label='Test') == ["Test 'John Doe' should be of the format <last name>, <first name> <middle initials(s)> or <last name>, <first name> <middle name(s)> where appropriate"]
+    assert list_of_names('Doe John', {}, label='Test') == ["Test 'Doe John' should be of the format <last name>, <first name> <middle initials(s)> or <last name>, <first name> <middle name(s)> where appropriate"]
+    assert list_of_names(['Doe, John', 'Jane Smith'], {}, label='Test') == ["Test '['Doe, John', 'Jane Smith']' should be of the format <last name>, <first name> <middle initials(s)> or <last name>, <first name> <middle name(s)> where appropriate"]
+
+    # Test that the function correctly handles names with invalid characters
+    assert list_of_names('Doe, J0hn', {}, label='Test') == ["Test 'Doe, J0hn' - please use characters A-Z, a-z, À-ÿ where appropriate"]
+    assert list_of_names('Doe, John!', {}, label='Test') == ["Test 'Doe, John!' - please use characters A-Z, a-z, À-ÿ where appropriate"]
+    assert list_of_names(['Doe, John', 'Smith, J@ne'], {}, label='Test') == ["Test '['Doe, John', 'Smith, J@ne']' - please use characters A-Z, a-z, À-ÿ where appropriate"]
+
+    # Test that the function correctly handles empty strings
+    assert list_of_names('', {}, label='Test') == ["Test '' should be of the format <last name>, <first name> <middle initials(s)> or <last name>, <first name> <middle name(s)> where appropriate", "Test '' - please use characters A-Z, a-z, À-ÿ where appropriate"]
+    assert list_of_names([], {}, label='Test') == []
+
+
+def test_headline():
+    # Test that the function correctly handles valid headlines
+    assert headline('This is a valid headline.', {}, label='Test') == []
+    assert headline('This headline is exactly 150 characters long ' + 'a' * 105, {}, label='Test') == []
+    assert headline('This headline is exactly 10 characters.', {}, label='Test') == []
+
+    # Test that the function correctly handles headlines longer than 150 characters
+    assert headline('This headline is longer than 150 characters.' + 'a' * 120, {}, label='Test') == ["Test 'This headline is longer than 150 characters." + "a" * 120 + "' should contain no more than one sentence"]
+
+    # Test that the function correctly handles headlines with more than one sentence
+    assert headline('This is a headline. It has two sentences.', {}, label='Test') == ["Test 'This is a headline. It has two sentences.' should contain no more than one sentence"]
+
+    # Test that the function correctly handles headlines that do not start with a capital letter
+    assert headline('this headline does not start with a capital letter.', {}, label='Test') == ["Test 'this headline does not start with a capital letter.' should start with a capital letter"]
+
+    # Test that the function correctly handles headlines shorter than 10 characters
+    assert headline('Too short', {}, label='Test') == ["Test 'Too short' should be at least 10 characters"]
+
+    # Test that the function correctly handles empty strings
+    assert headline('', {}, label='Test') == ["Test '' should not be empty"]
+
+
+def test_title_check():
+    # Test that the function correctly handles titles that match the filename
+    assert title_check('happy_netcdf', "/path/to/file/happy_netcdf", label='Test') == []
+    assert title_check('happy_NetCDF.nc', "/path/to/file/happy_NetCDF.nc", label='Test') == []
+
+    # Test that the function correctly handles titles that do not match the filename
+    assert title_check('sad_netcdf', "/path/to/file/happy_netcdf", label='Test') == ["Test 'sad_netcdf' must match the name of the file"]
+    assert title_check('happy_NetCDF.nc', "/path/to/file/sad_NetCDF.nc", label='Test') == ["Test 'happy_NetCDF.nc' must match the name of the file"]
+
+    # Test that the function correctly handles empty titles
+    assert title_check('', "/path/to/file/happy_netcdf", label='Test') == ["Test '' must match the name of the file"]
+
+
+def test_url_checker():
+    # Test that the function correctly handles a reachable URL
+    assert url_checker("https://www.example.com", {}, label="Test") == []
+
+    # Test that the function correctly handles an unreachable URL
+    assert url_checker("https://www.nonexistenturl.com", {}, label="Test") == ["Test 'https://www.nonexistenturl.com' is not a reachable url"]
+
+    # Test that the function correctly handles an existing but unreachable URL
+    assert url_checker("https://www.example.com/nonexistentpage", {}, label="Test") == ["Test 'https://www.example.com/nonexistentpage' is not a reachable url"]
+
+    # Test that the function correctly handles an empty URL
+    assert url_checker("", {}, label="Test") == ["Test '' is not a reachable url"]
+
+
+def test_relation_url_checker():
+    # Test that the function correctly handles valid inputs
+    assert relation_url_checker('relation https://example.com', {}, label='Test') == []
+
+    # Test that the function correctly handles inputs without a space
+    assert relation_url_checker('relationhttps://example.com', {}, label='Test') == ["Test 'relationhttps://example.com' should contain a space before the url"]
+
+    # Test that the function correctly handles inputs with an invalid URL
+    assert relation_url_checker('relation https://', {}, label='Test') == ["Test 'https://' is not a reachable url"]
+
+    # Test that the function correctly handles empty strings
+    assert relation_url_checker('', {}, label='Test') == ["Test '' should contain a space before the url"]
+
+
+def test_latitude():
+    # Test that the function correctly handles valid latitudes
+    assert latitude('45.1234', {}, label='Test') == []
+    assert latitude('-90.0000', {}, label='Test') == []
+    assert latitude('90.0000', {}, label='Test') == []
+
+    # Test that the function correctly handles invalid latitudes
+    assert latitude('90.0001', {}, label='Test') == ["Test '90.0001' must be within -90 and +90 "]
+    assert latitude('-90.0001', {}, label='Test') == ["Test '-90.0001' must be within -90 and +90 "]
+    assert latitude('100.0000', {}, label='Test') == ["Test '100.0000' must be within -90 and +90 "]
+
+
+def test_longitude():
+    # Test that the function correctly handles valid longitudes
+    assert longitude('45.1234', {}, label='Test') == []
+    assert longitude('-180.0000', {}, label='Test') == []
+    assert longitude('180.0000', {}, label='Test') == []
+
+    # Test that the function correctly handles invalid longitudes
+    assert longitude('180.0001', {}, label='Test') == ["Test '180.0001' must be within -180 and +180 "]
+    assert longitude('-180.0001', {}, label='Test') == ["Test '-180.0001' must be within -180 and +180 "]
+    assert longitude('200.0000', {}, label='Test') == ["Test '200.0000' must be within -180 and +180 "]
+
+
+# rules.py
 def _test_type(_type, value):
     return r.check(f"type-rule:{_type}", value)
 
@@ -183,4 +371,14 @@ def test_ncas_email_rule(rules):
     assert re.fullmatch(rules['ncas-email'], 'test.test@ncas.ac.uk')
     assert not re.fullmatch(rules['ncas-email'], 'test@example.com')
     assert not re.fullmatch(rules['ncas-email'], 'test@ncas.com')
-    assert not re.fullmatch(rules['ncas-email'], 'test@ncas.ac')
\ No newline at end of file
+    assert not re.fullmatch(rules['ncas-email'], 'test@ncas.ac')
+
+def test_map_type_rule():
+    assert r._map_type_rule('number') == Number
+    assert r._map_type_rule('integer') == int
+    assert r._map_type_rule('int') == int
+    assert r._map_type_rule('float') == float
+    assert r._map_type_rule('string') == str
+    assert r._map_type_rule('str') == str
+    with pytest.raises(KeyError):
+        r._map_type_rule('nonexistent')
\ No newline at end of file

From 86ce55061d6e0a359e50ec39fc619b6171ff6fc4 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Wed, 17 Jan 2024 13:52:49 +0000
Subject: [PATCH 06/37] See if check should be warning first

---
 checksit/rules/rules.py | 44 ++++++++++++-----------------------------
 1 file changed, 13 insertions(+), 31 deletions(-)

diff --git a/checksit/rules/rules.py b/checksit/rules/rules.py
index 60c61327..b27df912 100644
--- a/checksit/rules/rules.py
+++ b/checksit/rules/rules.py
@@ -66,57 +66,39 @@ def check(self, rule_lookup, value, context=None, label=""):
 
         for i in rule_lookup_list:
 
-            if i.startswith("rule-func:"):
-                rule_comps = i.split(":")
-                rule_func = getattr(rule_funcs, rule_comps[1].replace("-", "_"))
-                extras = rule_comps[2:]
-                errors.extend(rule_func(value, context, extras, label=label))
+            if i.split(":")[0].endswith("-warning"):
+                output = warnings
+            else:
+                output = errors
 
-            elif i.startswith("rule-func-warning:"):
+            if i.startswith("rule-func"):
                 rule_comps = i.split(":")
                 rule_func = getattr(rule_funcs, rule_comps[1].replace("-", "_"))
                 extras = rule_comps[2:]
-                warnings.extend(rule_func(value, context, extras, label=label))
+                output.extend(rule_func(value, context, extras, label=label))
 
             elif i.startswith("type-rule"):
                 type_rule = i.split(":")[1]
 
                 if not isinstance(value, self._map_type_rule(type_rule)):
-                    errors.append(f"{label} Value '{value}' is not of required type: '{type_rule}'.")
-                
-            elif i.startswith("regex-warning:"):
-                pattern = ':'.join(i.split(":")[1:])  # in case pattern has colons in it, e.g. a URL 
-                if not re.match(f"^{pattern}$", value):
-                    warnings.append(f"{label} Value '{value}' does not match regular expression: '{pattern}'.")
+                    output.append(f"{label} Value '{value}' is not of required type: '{type_rule}'.")
 
-            elif i.startswith("regex:"):
-                pattern = ':'.join(i.split(":")[1:])  # in case pattern has colons in it, e.g. a URL 
-                if not re.match(f"^{pattern}$", value):
-                    errors.append(f"{label} Value '{value}' does not match regular expression: '{pattern}'.")
-
-            elif i.startswith("regex-rule-warning:"):
+            elif i.startswith("regex-rule"):
                 regex_rule = i.split(":", 1)[1]
 
                 if regex_rule in self.static_regex_rules:
                     pattern = self.static_regex_rules[regex_rule]
 
                     if not re.match("^" + pattern + "$", value):
-                        warnings.append(f"{label} Value '{value}' does not match regex rule: '{regex_rule}'.")
+                        output.append(f"{label} Value '{value}' does not match regex rule: '{regex_rule}'.")
 
                 else:
                     raise Exception(f"Rule not found with rule ID: {rule_lookup}.")
 
-            elif i.startswith("regex-rule:"):
-                regex_rule = i.split(":", 1)[1]
-
-                if regex_rule in self.static_regex_rules:
-                    pattern = self.static_regex_rules[regex_rule]
-
-                    if not re.match("^" + pattern + "$", value):
-                        errors.append(f"{label} Value '{value}' does not match regex rule: '{regex_rule}'.")
-
-                else:
-                    raise Exception(f"Rule not found with rule ID: {rule_lookup}.")
+            elif i.startswith("regex"):
+                pattern = ':'.join(i.split(":")[1:])  # in case pattern has colons in it, e.g. a URL 
+                if not re.match(f"^{pattern}$", value):
+                    output.append(f"{label} Value '{value}' does not match regular expression: '{pattern}'.")
             
             else:
                 raise Exception(f"Rule not found with rule ID: {rule_lookup}.")

From 6b7e169174b6664c9bba414a4c21951c21bbed25 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Wed, 17 Jan 2024 13:53:46 +0000
Subject: [PATCH 07/37] Change ORCID tests

---
 tests/test_rules.py | 37 +++++++++++++++++++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/tests/test_rules.py b/tests/test_rules.py
index 5ca4de20..6b87acd2 100644
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -78,6 +78,7 @@ def test_validate_orcid_ID():
     # Test that the function correctly handles valid ORCID IDs
     assert validate_orcid_ID('https://orcid.org/0000-0002-1825-0097', {}, label='Test') == []
     assert validate_orcid_ID('https://orcid.org/1234-5678-9012-3456', {}, label='Test') == []
+    assert validate_orcid_ID('https://orcid.org/1234-5678-9012-345X', {}, label='Test') == []
 
     # Test that the function correctly handles ORCID IDs with incorrect lengths
     assert validate_orcid_ID('https://orcid.org/0000-0002-1825-009', {}, label='Test') == ["Test 'https://orcid.org/0000-0002-1825-009' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
@@ -85,7 +86,6 @@ def test_validate_orcid_ID():
 
     # Test that the function correctly handles ORCID IDs with incorrect formats
     assert validate_orcid_ID('https://orcid.org/0000-0002-1825-009Z', {}, label='Test') == ["Test 'https://orcid.org/0000-0002-1825-009Z' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
-    assert validate_orcid_ID('https://orcid.org/1234-5678-9012-345X', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-345X' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
     assert validate_orcid_ID('https://orcid.org/1234-5678-9012-3456-', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456-' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
     assert validate_orcid_ID('https://orcid.org/1234-5678-9012-3456X', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456X' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
 
@@ -381,4 +381,37 @@ def test_map_type_rule():
     assert r._map_type_rule('string') == str
     assert r._map_type_rule('str') == str
     with pytest.raises(KeyError):
-        r._map_type_rule('nonexistent')
\ No newline at end of file
+        r._map_type_rule('nonexistent')
+
+def test_check():
+    rules_instance = r
+
+    # Test that the function correctly handles rule-func
+    assert rules_instance.check("rule-func:string_of_length:3", "abc", {}, label="Test") == ([], [])
+    assert rules_instance.check("rule-func:string_of_length:3", "abcd", {}, label="Test") == (["Test 'abcd' must be exactly 3 characters"], [])
+
+    # Test that the function correctly handles rule-func-warning
+    assert rules_instance.check("rule-func-warning:string_of_length:3", "abc", {}, label="Test") == ([], [])
+    assert rules_instance.check("rule-func-warning:string_of_length:3", "abcd", {}, label="Test") == ([], ["Test 'abcd' must be exactly 3 characters"])
+
+    # Test that the function correctly handles type-rule
+    assert rules_instance.check("type-rule:int", 123, {}, label="Test") == ([], [])
+    assert rules_instance.check("type-rule:int", "abc", {}, label="Test") == (["Test Value 'abc' is not of required type: 'int'."], [])
+
+    # Test that the function correctly handles regex-warning
+    assert rules_instance.check("regex-warning:^[a-z]+$", "abc", {}, label="Test") == ([], [])
+    assert rules_instance.check("regex-warning:^[a-z]+$", "ABC", {}, label="Test") == ([], ["Test Value 'ABC' does not match regular expression: '^[a-z]+$'."])
+
+    # Test that the function correctly handles regex
+    assert rules_instance.check("regex:^[a-z]+$", "abc", {}, label="Test") == ([], [])
+    assert rules_instance.check("regex:^[a-z]+$", "ABC", {}, label="Test") == (["Test Value 'ABC' does not match regular expression: '^[a-z]+$'."], [])
+
+    # Test that the function correctly handles regex-rule-warning
+    rules_instance.static_regex_rules = {"lowercase": "^[a-z]+$"}
+    assert rules_instance.check("regex-rule-warning:lowercase", "abc", {}, label="Test") == ([], [])
+    assert rules_instance.check("regex-rule-warning:lowercase", "ABC", {}, label="Test") == ([], ["Test Value 'ABC' does not match regex rule: 'lowercase'."])
+
+    # Test that the function correctly handles regex-rule
+    rules_instance.static_regex_rules = {"lowercase": "^[a-z]+$"}
+    assert rules_instance.check("regex-rule:lowercase", "abc", {}, label="Test") == ([], [])
+    assert rules_instance.check("regex-rule:lowercase", "ABC", {}, label="Test") == (["Test Value 'ABC' does not match regex rule: 'lowercase'."], [])
\ No newline at end of file

From 96399c130570ddc013c575fe204fccacfad6648c Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Wed, 17 Jan 2024 13:58:26 +0000
Subject: [PATCH 08/37] Allow ORCID to end with "X"

---
 checksit/rules/rule_funcs.py | 7 +++++--
 tests/test_rules.py          | 1 +
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/checksit/rules/rule_funcs.py b/checksit/rules/rule_funcs.py
index b73d17a0..604f9cfc 100644
--- a/checksit/rules/rule_funcs.py
+++ b/checksit/rules/rule_funcs.py
@@ -131,8 +131,11 @@ def validate_orcid_ID(value, context, extras=None, label=""):
         value[27] != "-" or
         value[32] != "-" or
         
-        # Check that the last characters contain only "-" and digits
-        not PI_orcid_digits_only.isdigit()):
+        # Check that the last characters contain only "-" and digits (plus 'X' for last digit)
+        not (
+            PI_orcid_digits_only.isdigit() or (PI_orcid_digits_only[0:15].isdigit() and PI_orcid_digits_only[15] == "X")
+        )
+    ):
 
         errors.append(f"{label} '{value}' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX")
 
diff --git a/tests/test_rules.py b/tests/test_rules.py
index 6b87acd2..13b36b00 100644
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -86,6 +86,7 @@ def test_validate_orcid_ID():
 
     # Test that the function correctly handles ORCID IDs with incorrect formats
     assert validate_orcid_ID('https://orcid.org/0000-0002-1825-009Z', {}, label='Test') == ["Test 'https://orcid.org/0000-0002-1825-009Z' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
+    assert validate_orcid_ID('https://orcid.org/1234-5678-9012-34X5', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-34X5' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
     assert validate_orcid_ID('https://orcid.org/1234-5678-9012-3456-', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456-' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
     assert validate_orcid_ID('https://orcid.org/1234-5678-9012-3456X', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456X' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
 

From 5181d76c273dfb5309bafc24d1ae9582fefe2bbf Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Wed, 17 Jan 2024 14:37:30 +0000
Subject: [PATCH 09/37] Correct the mistake I made when "correcting" the title
 regex check

---
 checksit/rules/rules.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/checksit/rules/rules.py b/checksit/rules/rules.py
index b27df912..c872617b 100644
--- a/checksit/rules/rules.py
+++ b/checksit/rules/rules.py
@@ -32,7 +32,7 @@ def __init__(self):
             "location": r'(.)+(\,\ )(.)+',
             "latitude-image": r'[\+|\-]?[0-9]{1,2}\.[0-9]{0,6}',
             "longitude-image": r'[\+|\-]?1?[0-9]{1,2}\.[0-9]{0,6}',
-            "title": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?(_.)?_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)',
+            "title": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?(_.+)?_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)',
             "title-data-product": r'(.)+_(.)+_([1-2][0-9][0-9][0-9])([0][0-9]|[1][0-2])?([0-2][0-9]|[3][0-1])?-?([0-1][0-9]|[2][0-3])?([0-5][0-9])?([0-5][0-9])?_(plot|photo)((.)+)?_v([0-9]+)\.([0-9]+)\.(png|PNG|jpg|JPG|jpeg|JPEG)',
             "name-format": r'([^,])+, ([^,])+( ?[^,]+|((.)\.))',
             "name-characters": r'[A-Za-z_À-ÿ\-\'\ \.\,]+',

From 00932aed45afae3d7e72cad6dbeda354a8c4c8ad Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Thu, 18 Jan 2024 09:17:07 +0000
Subject: [PATCH 10/37] Change to exception messages on invalid rules

---
 checksit/rules/rules.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/checksit/rules/rules.py b/checksit/rules/rules.py
index c872617b..568a96f6 100644
--- a/checksit/rules/rules.py
+++ b/checksit/rules/rules.py
@@ -93,15 +93,15 @@ def check(self, rule_lookup, value, context=None, label=""):
                         output.append(f"{label} Value '{value}' does not match regex rule: '{regex_rule}'.")
 
                 else:
-                    raise Exception(f"Rule not found with rule ID: {rule_lookup}.")
+                    raise Exception(f"Regex rule not found with rule ID: {i}.")
 
             elif i.startswith("regex"):
-                pattern = ':'.join(i.split(":")[1:])  # in case pattern has colons in it, e.g. a URL 
+                pattern = i.split(":", 1)[1]  # in case pattern has colons in it, e.g. a URL
                 if not re.match(f"^{pattern}$", value):
                     output.append(f"{label} Value '{value}' does not match regular expression: '{pattern}'.")
-            
+
             else:
-                raise Exception(f"Rule not found with rule ID: {rule_lookup}.")
+                raise Exception(f"Rule not found with rule ID: {i}.")
 
         return errors, warnings
 

From 3275a7a3052dae47c50bd6efc0e2947ae70b1b3d Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Thu, 18 Jan 2024 09:17:40 +0000
Subject: [PATCH 11/37] Use already existing static regex rules rather than
 making some up

---
 tests/test_rules.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/tests/test_rules.py b/tests/test_rules.py
index 13b36b00..8056fcce 100644
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -408,11 +408,9 @@ def test_check():
     assert rules_instance.check("regex:^[a-z]+$", "ABC", {}, label="Test") == (["Test Value 'ABC' does not match regular expression: '^[a-z]+$'."], [])
 
     # Test that the function correctly handles regex-rule-warning
-    rules_instance.static_regex_rules = {"lowercase": "^[a-z]+$"}
-    assert rules_instance.check("regex-rule-warning:lowercase", "abc", {}, label="Test") == ([], [])
-    assert rules_instance.check("regex-rule-warning:lowercase", "ABC", {}, label="Test") == ([], ["Test Value 'ABC' does not match regex rule: 'lowercase'."])
+    assert rules_instance.check("regex-rule-warning:integer", "123", {}, label="Test") == ([], [])
+    assert rules_instance.check("regex-rule-warning:integer", "123.45", {}, label="Test") == ([], ["Test Value '123.45' does not match regex rule: 'integer'."])
 
     # Test that the function correctly handles regex-rule
-    rules_instance.static_regex_rules = {"lowercase": "^[a-z]+$"}
-    assert rules_instance.check("regex-rule:lowercase", "abc", {}, label="Test") == ([], [])
-    assert rules_instance.check("regex-rule:lowercase", "ABC", {}, label="Test") == (["Test Value 'ABC' does not match regex rule: 'lowercase'."], [])
\ No newline at end of file
+    assert rules_instance.check("regex-rule:integer", "123", {}, label="Test") == ([], [])
+    assert rules_instance.check("regex-rule:integer", "123.45", {}, label="Test") == (["Test Value '123.45' does not match regex rule: 'integer'."], [])

From b02fc871f9dca9b696b1ab0dc85db61b4e5298b3 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Thu, 18 Jan 2024 09:18:11 +0000
Subject: [PATCH 12/37] Dummy specs for testing show specs

---
 specs/groups/tests/test.yml | 13 +++++++++++++
 tests/test_show_specs.py    | 23 +++++++++++++++++++++++
 2 files changed, 36 insertions(+)
 create mode 100644 specs/groups/tests/test.yml
 create mode 100644 tests/test_show_specs.py

diff --git a/specs/groups/tests/test.yml b/specs/groups/tests/test.yml
new file mode 100644
index 00000000..2b947ec0
--- /dev/null
+++ b/specs/groups/tests/test.yml
@@ -0,0 +1,13 @@
+var-requires:
+  func: checksit.generic.check_var_attrs
+  params:
+    defined_attrs: 
+      - long_name
+
+required-global-attrs:
+  func: checksit.generic.check_dim_exists
+  params:
+    dimensions:
+      - time
+       
+
diff --git a/tests/test_show_specs.py b/tests/test_show_specs.py
new file mode 100644
index 00000000..640ddab6
--- /dev/null
+++ b/tests/test_show_specs.py
@@ -0,0 +1,23 @@
+import pytest
+import json
+from checksit.specs import show_specs
+
+
+def test_show_specs_all(capsys):
+    # Call the function
+    show_specs(["tests/test"])
+
+    # Capture the output of the print statements
+    captured = capsys.readouterr()
+
+    # Check that the print function was called with the correct arguments
+    expected_output = (
+        'Specifications:\n\ntests/test:\n{\n    "var-requires": {\n'
+        '        "func": "checksit.generic.check_var_attrs",\n        "params": {\n'
+        '            "defined_attrs": [\n                "long_name"\n            ]\n'
+        '        }\n    },\n    "required-global-attrs": {\n'
+        '        "func": "checksit.generic.check_dim_exists",\n        "params": {\n'
+        '            "dimensions": [\n                "time"\n'
+        '            ]\n        }\n    }\n}\n'
+        )
+    assert captured.out == expected_output

From 7d7dcdbf6cc459a675fb265b12e3a5b244676e3c Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Thu, 18 Jan 2024 09:31:17 +0000
Subject: [PATCH 13/37] Correct printing  by show-specs when no specs given

---
 checksit/specs.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/checksit/specs.py b/checksit/specs.py
index 562e80aa..e45b036e 100644
--- a/checksit/specs.py
+++ b/checksit/specs.py
@@ -19,22 +19,20 @@ def load_specs(spec_ids=None):
     spec_files = [f"{specs_dir}/{spec_id}.yml" for spec_id in spec_ids] or \
                  glob.glob(f"{specs_dir}/*.yml")
 
-    return _parse_specs(spec_files) 
-     
+    return _parse_specs(spec_files)
+
 
 def show_specs(spec_ids=None, verbose=False):
-    
     all_specs = load_specs(spec_ids)
     spec_ids_names = tuple([(spec_id.split("/")[-1]) for spec_id in spec_ids])
 
     if not spec_ids:
-        specs = all_specs
+        specs = all_specs.items()
     else:
         specs = [(spec_ids[spec_ids_names.index(spec_id)], spec) for (spec_id, spec) in all_specs.items() if spec_id in spec_ids_names]
 
     print("Specifications:")
     for spec_id, spec in specs:
-     
         print(f"\n{spec_id}:")
         print(json.dumps(spec, indent=4).replace("\\\\", "\\"))
 
@@ -68,6 +66,6 @@ def run_checks(self, record, skip_spellcheck=False):
                                                record, check_dict, skip_spellcheck=skip_spellcheck
                                            )
             errors.extend(check_errors)
-            warnings.extend(check_warnings) 
+            warnings.extend(check_warnings)
 
         return errors, warnings

From fbffa839d748bd0e1e876026ad3d7738f3139d47 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Thu, 18 Jan 2024 09:42:46 +0000
Subject: [PATCH 14/37] Add show-specs check when no spec specified

---
 tests/test_show_specs.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tests/test_show_specs.py b/tests/test_show_specs.py
index 640ddab6..bf1054fe 100644
--- a/tests/test_show_specs.py
+++ b/tests/test_show_specs.py
@@ -21,3 +21,14 @@ def test_show_specs_all(capsys):
         '            ]\n        }\n    }\n}\n'
         )
     assert captured.out == expected_output
+
+
+def test_show_specs_none_specified(capsys):
+    # When no spec is specified, all specs in specs/groups are shown
+    show_specs([])
+    captured_empty = capsys.readouterr()
+
+    show_specs(["ceda-base"])
+    captured_ceda_base = capsys.readouterr()
+
+    assert captured_empty.out == captured_ceda_base.out

From 323aa92bb751366a06dbe8746832c9ebd9b43f9f Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Thu, 18 Jan 2024 10:17:35 +0000
Subject: [PATCH 15/37] Add tests for exceptions on non existent rules

---
 tests/test_rules.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tests/test_rules.py b/tests/test_rules.py
index 8056fcce..e38caab6 100644
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -414,3 +414,12 @@ def test_check():
     # Test that the function correctly handles regex-rule
     assert rules_instance.check("regex-rule:integer", "123", {}, label="Test") == ([], [])
     assert rules_instance.check("regex-rule:integer", "123.45", {}, label="Test") == (["Test Value '123.45' does not match regex rule: 'integer'."], [])
+
+    # Test that correct exceptions are raised when the rule or regex is not found
+    with pytest.raises(Exception) as e_info:
+        rules_instance.check("rules-func:nonexistent", "abc", {}, label="Test")
+    assert str(e_info.value) == "Rule not found with rule ID: rules-func:nonexistent."
+
+    with pytest.raises(Exception) as e_info:
+        rules_instance.check("regex-rule:nonexistent", "abc", {}, label="Test")
+    assert str(e_info.value) == "Regex rule not found with rule ID: regex-rule:nonexistent."

From 7ad518bf3aead8c87a3cfa949e4ecdc66861e90e Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Thu, 18 Jan 2024 11:32:13 +0000
Subject: [PATCH 16/37] Add checks on uppercase processor

---
 tests/test_rules.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_rules.py b/tests/test_rules.py
index e38caab6..5050ed1d 100644
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -13,7 +13,9 @@ def test_match_file_name():
     context = {"file_path": file_path}
     assert len(match_file_name(value, context)) == 1
     assert len(match_file_name(value, context, ["lowercase"])) == 1
+    assert len(match_file_name(value, context, ["uppercase"])) == 1
     assert len(match_file_name(value, context, ["lowercase", "no_extension"])) == 0
+    assert len(match_file_name(value, context, ["uppercase", "no_extension"])) == 1
 
 
 def test_string_of_length():

From 6764a80d5cb56472d9b2602f5dc64d3d31697b4a Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Mon, 22 Jan 2024 15:28:55 +0000
Subject: [PATCH 17/37] Correct formatting on error message for flag_values

---
 checksit/generic.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/checksit/generic.py b/checksit/generic.py
index 421ebabd..5b153f7e 100644
--- a/checksit/generic.py
+++ b/checksit/generic.py
@@ -222,8 +222,8 @@ def check_var(dct, variable, defined_attrs, skip_spellcheck=False):
                     attr_value = np.array(attr_value, dtype=np.int8)
                     if not np.all(dct["variables"][variable].get(attr_key) == attr_value):
                         errors.append(
-                            f"[variable**************:{variable}]: Attribute '{attr_key}' must have definition {attr_value}, "
-                            f"not {dct['variables'][variable].get(attr_key) if skip_spellcheck else ''}."
+                            f"[variable**************:{variable}]: Attribute '{attr_key}' must have definition '{attr_value}', "
+                            f"not '{dct['variables'][variable].get(attr_key)}'."
                         )
                 #elif attr_key == 'flag_meanings':
                 #    print(attr_value)

From 8cd8dbe58a820e0fa23652f5382570d74ba4e479 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Mon, 22 Jan 2024 15:29:07 +0000
Subject: [PATCH 18/37] Create test vocabs

---
 checksit/vocabs/tests/test_instruments.json | 10 ++++++++++
 checksit/vocabs/tests/test_platforms.json   | 12 ++++++++++++
 checksit/vocabs/tests/test_products.json    |  6 ++++++
 3 files changed, 28 insertions(+)
 create mode 100644 checksit/vocabs/tests/test_instruments.json
 create mode 100644 checksit/vocabs/tests/test_platforms.json
 create mode 100644 checksit/vocabs/tests/test_products.json

diff --git a/checksit/vocabs/tests/test_instruments.json b/checksit/vocabs/tests/test_instruments.json
new file mode 100644
index 00000000..265904ce
--- /dev/null
+++ b/checksit/vocabs/tests/test_instruments.json
@@ -0,0 +1,10 @@
+{
+    "test_instruments": {
+        "inst1": {
+            "instrument_id": "inst1"
+        },
+        "inst2": {
+            "instrument_id": "inst2"
+        }
+    }
+}
\ No newline at end of file
diff --git a/checksit/vocabs/tests/test_platforms.json b/checksit/vocabs/tests/test_platforms.json
new file mode 100644
index 00000000..9a4e2051
--- /dev/null
+++ b/checksit/vocabs/tests/test_platforms.json
@@ -0,0 +1,12 @@
+{
+    "test_platforms": {
+        "plat1": {
+            "platform_id": "plat1",
+            "description": "test platform 1"
+        },
+        "plat2": {
+            "platform_id": "plat2",
+            "description": "test platform 2"
+        }
+    }
+}
\ No newline at end of file
diff --git a/checksit/vocabs/tests/test_products.json b/checksit/vocabs/tests/test_products.json
new file mode 100644
index 00000000..1cd59a7b
--- /dev/null
+++ b/checksit/vocabs/tests/test_products.json
@@ -0,0 +1,6 @@
+{
+    "test_products": [
+        "prod1",
+        "prod2"
+    ]
+}
\ No newline at end of file

From 6b29b84e9a4827d64f906f9d2d5f8b5d8599ea8f Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Mon, 22 Jan 2024 15:29:20 +0000
Subject: [PATCH 19/37] Tests for generic.py functions

---
 tests/test_generic.py | 309 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 309 insertions(+)
 create mode 100644 tests/test_generic.py

diff --git a/tests/test_generic.py b/tests/test_generic.py
new file mode 100644
index 00000000..52ab00c7
--- /dev/null
+++ b/tests/test_generic.py
@@ -0,0 +1,309 @@
+#from checksit.generic import one_spelling_mistake, two_spelling_mistakes, search_close_match, check_var_attrs, check_global_attrs, check_var_exists, check_dim_exists, check_var
+import checksit.generic as cg
+import numpy as np
+
+
+def test_spelling_mistake_checks():
+    # Test that the function correctly generates all one-delete mistakes
+    assert len(cg.one_spelling_mistake("abc")) == 195
+    assert len(cg.one_spelling_mistake("abcdefg")) == 507
+    assert len(cg.two_spelling_mistakes("abc")) == 16306
+    assert len(cg.two_spelling_mistakes("abcdefg")) == 118314
+
+
+def test_search_close_match():
+    # Test that the function correctly finds a close match
+    assert cg.search_close_match('abc', ['abd', 'abe', 'abf']) == "'abd' was found in this file, should this be 'abc'?"
+
+    # Test that the function correctly handles no close matches
+    assert cg.search_close_match('abc', ['def', 'ghi', 'jkl']) == ""
+
+    # Test that the function correctly handles case sensitivity
+    assert cg.search_close_match('abc', ['ABD', 'ABE', 'ABF']) == "'ABD' was found in this file, should this be 'abc'?"
+
+    # Test that the function correctly handles an empty search_in list
+    assert cg.search_close_match('abc', []) == ""
+
+    # Test that the function correctly handles an empty search_for string
+    assert cg.search_close_match('', ['abd', 'abe', 'abf']) == ""
+
+
+def test_check_var_attrs():
+    # Test that the function correctly identifies missing attributes
+    dct = {
+        "variables": {
+            "var1": {"long_name": "Variable 1", "units": "m"},
+            "var2": {"long_name": "Variable 2"}
+        }
+    }
+    defined_attrs = ["long_name", "units"]
+    errors, warnings = cg.check_var_attrs(dct, defined_attrs)
+    assert errors == ["[variable**************:var2]: Attribute 'units' must have a valid definition."]
+    assert warnings == []
+
+    # Test that the function correctly handles empty attributes
+    dct = {
+        "variables": {
+            "var1": {"long_name": "", "units": "m"},
+            "var2": {"long_name": "Variable 2", "units": ""}
+        }
+    }
+    errors, warnings = cg.check_var_attrs(dct, defined_attrs)
+    assert errors == ["[variable**************:var1]: Attribute 'long_name' must have a valid definition.", "[variable**************:var2]: Attribute 'units' must have a valid definition."]
+    assert warnings == []
+
+    # Test that the function correctly handles variables with all attributes defined
+    dct = {
+        "variables": {
+            "var1": {"long_name": "Variable 1", "units": "m"},
+            "var2": {"long_name": "Variable 2", "units": "kg"}
+        }
+    }
+    errors, warnings = cg.check_var_attrs(dct, defined_attrs)
+    assert errors == []
+    assert warnings == []
+
+    # Test that the function correctly handles an empty dct
+    dct = {"variables": {}}
+    errors, warnings = cg.check_var_attrs(dct, defined_attrs)
+    assert errors == []
+    assert warnings == []
+
+
+def test_check_global_attrs():
+    # Test that the function correctly identifies missing attributes
+    dct = {
+        "global_attributes": {
+            "attr1": "value1",
+            "attr2": "value2"
+        }
+    }
+    defined_attrs = ["attr1", "attr3"]
+    errors, warnings = cg.check_global_attrs(dct, defined_attrs, skip_spellcheck=True)
+    assert errors == ["[global-attributes:**************:attr3]: Attribute 'attr3' does not exist. "]
+    assert warnings == []
+
+    # Test that the function correctly handles empty attributes
+    dct = {
+        "global_attributes": {
+            "attr1": "",
+            "attr2": "value2"
+        }
+    }
+    defined_attrs = ["attr1", "attr2"]
+    errors, warnings = cg.check_global_attrs(dct, defined_attrs)
+    assert errors == ["[global-attributes:**************:attr1]: No value defined for attribute 'attr1'."]
+    assert warnings == []
+
+    # Test that the function correctly handles attributes with all values defined
+    dct = {
+        "global_attributes": {
+            "attr1": "value1",
+            "attr2": "value2"
+        }
+    }
+    errors, warnings = cg.check_global_attrs(dct, defined_attrs)
+    assert errors == []
+    assert warnings == []
+
+    # Test that the function correctly handles an empty dct
+    dct = {"global_attributes": {}}
+    errors, warnings = cg.check_global_attrs(dct, defined_attrs)
+    assert errors == ["[global-attributes:**************:attr1]: Attribute 'attr1' does not exist. ", "[global-attributes:**************:attr2]: Attribute 'attr2' does not exist. "]
+    assert warnings == []
+
+
+def test_check_var_exists():
+    # Test that the function correctly identifies missing variables
+    dct = {
+        "variables": {
+            "var1": {"long_name": "Variable 1", "units": "m"},
+            "var2": {"long_name": "Variable 2", "units": "kg"}
+        }
+    }
+    variables = ["var1", "var3"]
+    errors, warnings = cg.check_var_exists(dct, variables, skip_spellcheck=True)
+    assert errors == ["[variable**************:var3]: Does not exist in file. "]
+    assert warnings == []
+
+    # Test that the function correctly handles optional variables
+    variables = ["var1", "var3:__OPTIONAL__"]
+    errors, warnings = cg.check_var_exists(dct, variables, skip_spellcheck=True)
+    assert errors == []
+    assert warnings == ["[variable**************:var3]: Optional variable does not exist in file. "]
+
+    # Test that the function correctly handles variables that exist
+    variables = ["var1", "var2"]
+    errors, warnings = cg.check_var_exists(dct, variables)
+    assert errors == []
+    assert warnings == []
+
+    # Test that the function correctly handles an empty dct
+    dct = {"variables": {}}
+    variables = ["var1", "var2"]
+    errors, warnings = cg.check_var_exists(dct, variables)
+    assert errors == ["[variable**************:var1]: Does not exist in file. ", "[variable**************:var2]: Does not exist in file. "]
+    assert warnings == []
+
+
+def test_check_dim_exists():
+    # Test that the function correctly identifies missing dimensions
+    dct = {
+        "dimensions": {
+            "dim1": {"long_name": "Dimension 1", "units": "m"},
+            "dim2": {"long_name": "Dimension 2", "units": "kg"}
+        }
+    }
+    dimensions = ["dim1", "dim3"]
+    errors, warnings = cg.check_dim_exists(dct, dimensions, skip_spellcheck=True)
+    assert errors == ["[dimension**************:dim3]: Does not exist in file. "]
+    assert warnings == []
+
+    # Test that the function correctly handles optional dimensions
+    dimensions = ["dim1", "dim3:__OPTIONAL__"]
+    errors, warnings = cg.check_dim_exists(dct, dimensions, skip_spellcheck=True)
+    assert errors == []
+    assert warnings == ["[dimension**************:dim3]: Optional dimension does not exist in file. "]
+
+    # Test that the function correctly handles dimensions that exist
+    dimensions = ["dim1", "dim2"]
+    errors, warnings = cg.check_dim_exists(dct, dimensions)
+    assert errors == []
+    assert warnings == []
+
+    # Test that the function correctly handles an empty dct
+    dct = {"dimensions": {}}
+    dimensions = ["dim1", "dim2"]
+    errors, warnings = cg.check_dim_exists(dct, dimensions)
+    assert errors == ["[dimension**************:dim1]: Does not exist in file. ", "[dimension**************:dim2]: Does not exist in file. "]
+    assert warnings == []
+
+
+def test_check_var():
+    # Test that the function correctly identifies missing variables
+    dct = {
+        "variables": {
+            "var1": {"long_name": "Variable 1", "units": "m", "flag_values": np.array([0,1,2], dtype=np.int8)},
+            "var2": {"long_name": "Variable 2", "units": "kg"},
+            "var4": {"flag_values": "0b, 1b, 2b"}
+        }
+    }
+    variable = "var3"
+    defined_attrs = ["long_name:Variable 3", "units:s"]
+    errors, warnings = cg.check_var(dct, variable, defined_attrs, skip_spellcheck=True)
+    assert errors == ["[variable**************:var3]: Optional variable does not exist in file. "]
+    assert warnings == []
+
+    # Test that the function correctly handles optional variables
+    variable = "var3:__OPTIONAL__"
+    errors, warnings = cg.check_var(dct, variable, defined_attrs, skip_spellcheck=True)
+    assert errors == []
+    assert warnings == ["[variable**************:var3]: Optional variable does not exist in file. "]
+
+    # Test that the function correctly handles variables that exist
+    variable = "var1:__OPTIONAL__"
+    defined_attrs = ["long_name:Variable 1", "units:m"]
+    errors, warnings = cg.check_var(dct, variable, defined_attrs)
+    assert errors == []
+    assert warnings == []
+
+    # Test that the function correctly identifies missing attributes
+    variable = "var2"
+    defined_attrs = ["long_name:Variable 2", "units:kg", "attr3:value 3"]
+    errors, warnings = cg.check_var(dct, variable, defined_attrs, skip_spellcheck=True)
+    assert errors == ["[variable**************:var2]: Attribute 'attr3' does not exist. "]
+    assert warnings == []
+
+    # Test that the function correctly identifies incorrect attributes
+    variable = "var2"
+    defined_attrs = ["long_name:Variable 2", "units:s"]
+    errors, warnings = cg.check_var(dct, variable, defined_attrs, skip_spellcheck=True)
+    assert errors == ["[variable**************:var2]: Attribute 'units' must have definition s, not kg."]
+    assert warnings == []
+
+    # Test that the function correctly handles badly formatted flag_values
+    variable = "var4:__OPTIONAL__"
+    defined_attrs = ["flag_values:0b, 1b, 2b"]
+    errors, warnings = cg.check_var(dct, variable, defined_attrs, skip_spellcheck=True)
+    assert errors == ["[variable**************:var4]: Attribute 'flag_values' must have definition '[0 1 2]', not '0b, 1b, 2b'."]
+    assert warnings == []
+
+
+    # Test that the function correctly handles attributes with all values defined
+    variable = "var1:__OPTIONAL__"
+    defined_attrs = ["long_name:Variable 1", "units:m", "flag_values:0b, 1b, 2b"]
+    errors, warnings = cg.check_var(dct, variable, defined_attrs)
+    assert errors == []
+    assert warnings == []
+
+    # Test that the function correctly handles an empty dct
+    variable = "var2"
+    dct = {"variables": {}}
+    errors, warnings = cg.check_var(dct, variable, defined_attrs)
+    assert errors == ["[variable**************:var2]: Optional variable does not exist in file. "]
+    assert warnings == []
+
+
+def test_check_file_name():
+    # Test that the function correctly identifies invalid instrument name
+    vocab_checks = {
+        "instrument": "__vocabs__:tests/test_instruments:test_instruments:__all__",
+        "platform": "__vocabs__:tests/test_platforms:test_platforms:__all__",
+        "data_product": "__vocabs__:tests/test_products:test_products"
+    }
+    file_name = "inst3_plat1_20220101_prod1_v1.0.nc"
+    errors, warnings = cg.check_file_name(file_name, vocab_checks)
+    assert errors == ["[file name]: Invalid file name format - unknown instrument inst3"]
+    assert warnings == []
+
+    # Test that the function correctly identifies invalid platform name
+    file_name = "inst1_plat3_20220101_prod1_v1.0.nc"
+    errors, warnings = cg.check_file_name(file_name, vocab_checks)
+    assert errors == ["[file name]: Invalid file name format - unknown platform plat3"]
+    assert warnings == []
+
+    # Test that the function correctly identifies invalid date format
+    file_name = "inst1_plat1_2022010_prod1_v1.0.nc"
+    errors, warnings = cg.check_file_name(file_name, vocab_checks)
+    assert errors == ["[file name]: Invalid file name format - bad date format 2022010"]
+    assert warnings == []
+
+    # Test that the function correctly identifies invalid date
+    file_name = "inst1_plat1_20221301_prod1_v1.0.nc"
+    errors, warnings = cg.check_file_name(file_name, vocab_checks)
+    assert errors == ["[file name]: Invalid file name format - invalid date in file name 20221301"]
+    assert warnings == []
+
+    # Test that the function correctly identifies invalid data product
+    file_name = "inst1_plat1_20220101_prod3_v1.0.nc"
+    errors, warnings = cg.check_file_name(file_name, vocab_checks)
+    assert errors == ["[file name]: Invalid file name format - unknown data product prod3"]
+    assert warnings == []
+
+    # Test that the function correctly identifies invalid version number format
+    file_name = "inst1_plat1_20220101_prod1_v10.nc"
+    errors, warnings = cg.check_file_name(file_name, vocab_checks)
+    assert errors == ["[file name]: Invalid file name format - incorrect file version number v10"]
+    assert warnings == []
+
+    # Test that the function correctly identifies too many options in file name
+    file_name = "inst1_plat1_20220101_prod1_option1_option2_option3_option4_option5_v1.0.nc"
+    errors, warnings = cg.check_file_name(file_name, vocab_checks)
+    assert errors == ["[file name]: Invalid file name format - too many options in file name"]
+    assert warnings == []
+
+    # Test that the function correctly handles valid file names
+    file_name = "inst1_plat1_20220101_prod1_v1.0.nc"
+    errors, warnings = cg.check_file_name(file_name, vocab_checks)
+    assert errors == []
+    assert warnings == []
+
+    file_name = "inst1_plat1_20220101_prod1_opt1_v1.0.nc"
+    errors, warnings = cg.check_file_name(file_name, vocab_checks)
+    assert errors == []
+    assert warnings == []
+
+    file_name = "inst1_plat1_20220101_prod1_opt1_opt2_opt3_v1.0.nc"
+    errors, warnings = cg.check_file_name(file_name, vocab_checks)
+    assert errors == []
+    assert warnings == []
\ No newline at end of file

From cd68b27e789a5d95971979d0445bbc33ee0a0179 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Mon, 22 Jan 2024 15:32:19 +0000
Subject: [PATCH 20/37] Test for multiple errors in file name

---
 tests/test_generic.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/test_generic.py b/tests/test_generic.py
index 52ab00c7..fa8963ff 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -292,6 +292,12 @@ def test_check_file_name():
     assert errors == ["[file name]: Invalid file name format - too many options in file name"]
     assert warnings == []
 
+    # Test that the function correctly handles multiple errors
+    file_name = "inst3_plat3_20220101_prod1_v1.0.nc"
+    errors, warnings = cg.check_file_name(file_name, vocab_checks)
+    assert errors == ["[file name]: Invalid file name format - unknown instrument inst3","[file name]: Invalid file name format - unknown platform plat3"]
+    assert warnings == []
+
     # Test that the function correctly handles valid file names
     file_name = "inst1_plat1_20220101_prod1_v1.0.nc"
     errors, warnings = cg.check_file_name(file_name, vocab_checks)

From a85f796e3b2ec103d14c4bc235525e7792059e77 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Mon, 22 Jan 2024 15:42:08 +0000
Subject: [PATCH 21/37] Add more tests to github workflow

---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index a6ac6456..a8e9f92f 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -44,4 +44,4 @@ jobs:
     - name: Test with pytest
       run: |
         export UDUNITS2_XML_PATH=/opt/share/udunits/udunits2.xml
-        python -m pytest -v tests/test_readers.py tests/test_images.py
+        python -m pytest -v tests/test_readers.py tests/test_rules.py tests/test_show_specs.py tests/test_images.py tests/test_generic.py

From a6132c9feefcb876a7becb4d10ce88adbd65a1af Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Tue, 23 Jan 2024 10:49:21 +0000
Subject: [PATCH 22/37] Complete tests for check_global_attrs

---
 tests/test_generic.py | 124 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 106 insertions(+), 18 deletions(-)

diff --git a/tests/test_generic.py b/tests/test_generic.py
index fa8963ff..8f3d6672 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -74,40 +74,128 @@ def test_check_global_attrs():
     # Test that the function correctly identifies missing attributes
     dct = {
         "global_attributes": {
-            "attr1": "value1",
-            "attr2": "value2"
-        }
+            "attr1": "",
+            "attr2": "value2",
+            "attr3": "inst1"
+        },
+        "inpt": "filename"
     }
-    defined_attrs = ["attr1", "attr3"]
+    defined_attrs = ["attr2", "attr4"]
     errors, warnings = cg.check_global_attrs(dct, defined_attrs, skip_spellcheck=True)
-    assert errors == ["[global-attributes:**************:attr3]: Attribute 'attr3' does not exist. "]
+    assert errors == ["[global-attributes:**************:attr4]: Attribute 'attr4' does not exist. "]
     assert warnings == []
 
     # Test that the function correctly handles empty attributes
-    dct = {
-        "global_attributes": {
-            "attr1": "",
-            "attr2": "value2"
-        }
-    }
     defined_attrs = ["attr1", "attr2"]
     errors, warnings = cg.check_global_attrs(dct, defined_attrs)
     assert errors == ["[global-attributes:**************:attr1]: No value defined for attribute 'attr1'."]
     assert warnings == []
 
-    # Test that the function correctly handles attributes with all values defined
-    dct = {
-        "global_attributes": {
-            "attr1": "value1",
-            "attr2": "value2"
-        }
-    }
+    # Test that the function correctly handles defined_attrs when all attributes are defined
+    defined_attrs = ["attr2", "attr3"]
     errors, warnings = cg.check_global_attrs(dct, defined_attrs)
     assert errors == []
     assert warnings == []
 
+    # Test function handles non-existent attributes with vocab checks correctly
+    vocab_attrs = {
+        "attr4": "__vocabs__:tests/test_products:test_products"
+    }
+    errors, warnings = cg.check_global_attrs(dct, vocab_attrs = vocab_attrs, skip_spellcheck=True)
+    assert errors == ["[global-attributes:**************:attr4]: Attribute 'attr4' does not exist. "]
+    assert warnings == []
+
+    # Test function handles undefined attributes with vocab checks correctly 
+    vocab_attrs = {
+        "attr1": "__vocabs__:tests/test_platforms:test_platforms:__all__"
+    }
+    errors, warnings = cg.check_global_attrs(dct, vocab_attrs = vocab_attrs, skip_spellcheck=True)
+    assert errors == ["[global-attributes:**************:attr1]: No value defined for attribute 'attr1'."]
+    assert warnings == []
+
+    # Test function handles incorrect values with vocab checks correctly
+    vocab_attrs = {
+        "attr2": "__vocabs__:tests/test_platforms:test_platforms:__all__"
+    }
+    errors, warnings = cg.check_global_attrs(dct, vocab_attrs = vocab_attrs, skip_spellcheck=True)
+    assert errors == ["[global-attributes:******:attr2]*** 'value2' not in vocab options: ['plat1', 'plat2'] (using: '__vocabs__:tests/test_platforms:test_platforms:__all__')"]
+    assert warnings == []
+
+    # Test function handles correct values with vocab checks correctly
+    vocab_attrs = {
+        "attr3": "__vocabs__:tests/test_instruments:test_instruments:__all__"
+    }
+    errors, warnings = cg.check_global_attrs(dct, vocab_attrs = vocab_attrs, skip_spellcheck=True)
+    assert errors == []
+    assert warnings == []
+
+    # Test function handles non-existent attributes with regex checks correctly
+    regex_attrs = {
+        "attr4": r"\d{4}-\d{2}-\d{2}"
+    }
+    errors, warnings = cg.check_global_attrs(dct, regex_attrs = regex_attrs, skip_spellcheck=True)
+    assert errors == ["[global-attributes:**************:attr4]: Attribute 'attr4' does not exist. "]
+    assert warnings == []
+
+    # Test function handles undefined attributes with regex checks correctly 
+    regex_attrs = {
+        "attr1": r"\d{4}-\d{2}-\d{2}"
+    }
+    errors, warnings = cg.check_global_attrs(dct, regex_attrs = regex_attrs, skip_spellcheck=True)
+    assert errors == ["[global-attributes:**************:attr1]: No value defined for attribute 'attr1'."]
+    assert warnings == []
+
+    # Test function handles incorrect values with regex checks correctly
+    regex_attrs = {
+        "attr2": r"\d{4}-\d{2}-\d{2}"
+    }
+    errors, warnings = cg.check_global_attrs(dct, regex_attrs = regex_attrs, skip_spellcheck=True)
+    assert errors == ["[global-attributes:******:attr2]: 'value2' does not match regex pattern '\d{4}-\d{2}-\d{2}'."]
+    assert warnings == []
+
+    # Test function handles correct values with regex checks correctly
+    regex_attrs = {
+        "attr3": r"inst\d"
+    }
+    errors, warnings = cg.check_global_attrs(dct, regex_attrs = regex_attrs, skip_spellcheck=True)
+    assert errors == []
+    assert warnings == []
+
+    # Test function handles non-existent attributes with rules checks correctly
+    rules_attrs = {
+        "attr4": "rule-func:string-of-length:5"
+    }
+    errors, warnings = cg.check_global_attrs(dct, rules_attrs = rules_attrs, skip_spellcheck=True)
+    assert errors == ["[global-attributes:**************:attr4]: Attribute 'attr4' does not exist. "]
+    assert warnings == []
+
+    # Test function handles undefined attributes with rules checks correctly 
+    rules_attrs = {
+        "attr1": "rule-func:string-of-length:5"
+    }
+    errors, warnings = cg.check_global_attrs(dct, rules_attrs = rules_attrs, skip_spellcheck=True)
+    assert errors == ["[global-attributes:**************:attr1]: No value defined for attribute 'attr1'."]
+    assert warnings == []
+
+    # Test function handles incorrect values with rules checks correctly
+    rules_attrs = {
+        "attr2": "rule-func:string-of-length:5"
+    }
+    errors, warnings = cg.check_global_attrs(dct, rules_attrs = rules_attrs, skip_spellcheck=True)
+    assert errors == ["[global-attributes:******:attr2]*** 'value2' must be exactly 5 characters"]
+    assert warnings == []
+
+    # Test function handles correct values with rules checks correctly
+    rules_attrs = {
+        "attr3": "rule-func:string-of-length:5"
+    }
+    errors, warnings = cg.check_global_attrs(dct, rules_attrs = rules_attrs, skip_spellcheck=True)
+    assert errors == []
+    assert warnings == []
+
     # Test that the function correctly handles an empty dct
     dct = {"global_attributes": {}}
+    defined_attrs = ["attr1", "attr2"]
     errors, warnings = cg.check_global_attrs(dct, defined_attrs)
     assert errors == ["[global-attributes:**************:attr1]: Attribute 'attr1' does not exist. ", "[global-attributes:**************:attr2]: Attribute 'attr2' does not exist. "]
     assert warnings == []

From dae31787bbb0e2362c1ef03c27962ab5b78a0955 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Tue, 23 Jan 2024 11:26:08 +0000
Subject: [PATCH 23/37] Add string_to_dict tests

---
 tests/test_utils.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 tests/test_utils.py

diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 00000000..1fbf6639
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,34 @@
+import checksit.utils as cu 
+import pytest
+
+
+def test_string_to_dict():
+    # Test that the function correctly converts a string to a dictionary
+    s = "key1=value1,key2=value2,key3=value3"
+    d = cu.string_to_dict(s)
+    assert d == {"key1": "value1", "key2": "value2", "key3": "value3"}
+
+    # Test that the function handles an empty string
+    s = ""
+    with pytest.raises(ValueError):
+        d = cu.string_to_dict(s)
+
+    # Test that the function correctly handles a string with no equals signs
+    s = "key1,key2,key3"
+    with pytest.raises(ValueError):
+        d = cu.string_to_dict(s)
+
+    # Test that the function correctly handles a string with multiple equals signs in a pair
+    s = "key1=value1=value1,key2=value2,key3=value3"
+    with pytest.raises(ValueError):
+        d = cu.string_to_dict(s)
+
+    # Test that the function correctly handles a string with no commas
+    s = "key1=value1"
+    d = cu.string_to_dict(s)
+    assert d == {"key1": "value1"}
+
+    # Test that the function correctly handles a string with spaces
+    s = "key1 = value1, key2 = value2, key3 = value3"
+    d = cu.string_to_dict(s)
+    assert d == {"key1 ": " value1", " key2 ": " value2", " key3 ": " value3"}
\ No newline at end of file

From fdec73b41fab9efcb2994e461fb71eb047bfa3ad Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Tue, 23 Jan 2024 11:28:56 +0000
Subject: [PATCH 24/37] Use raw string for string with regex formatting inside

---
 tests/test_generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_generic.py b/tests/test_generic.py
index 8f3d6672..09d6a863 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -150,7 +150,7 @@ def test_check_global_attrs():
         "attr2": r"\d{4}-\d{2}-\d{2}"
     }
     errors, warnings = cg.check_global_attrs(dct, regex_attrs = regex_attrs, skip_spellcheck=True)
-    assert errors == ["[global-attributes:******:attr2]: 'value2' does not match regex pattern '\d{4}-\d{2}-\d{2}'."]
+    assert errors == [r"[global-attributes:******:attr2]: 'value2' does not match regex pattern '\d{4}-\d{2}-\d{2}'."]
     assert warnings == []
 
     # Test function handles correct values with regex checks correctly

From e0e50779a5da5a80b731db47dc398a2b75594de0 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Tue, 23 Jan 2024 11:46:09 +0000
Subject: [PATCH 25/37] Add tests for all functions in utils.py

---
 tests/test_utils.py | 186 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 185 insertions(+), 1 deletion(-)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index 1fbf6639..f8aa8470 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,5 +1,6 @@
 import checksit.utils as cu 
 import pytest
+import inspect
 
 
 def test_string_to_dict():
@@ -31,4 +32,187 @@ def test_string_to_dict():
     # Test that the function correctly handles a string with spaces
     s = "key1 = value1, key2 = value2, key3 = value3"
     d = cu.string_to_dict(s)
-    assert d == {"key1 ": " value1", " key2 ": " value2", " key3 ": " value3"}
\ No newline at end of file
+    assert d == {"key1 ": " value1", " key2 ": " value2", " key3 ": " value3"}
+
+
+def test_string_to_list():
+    # Test that the function correctly converts a string to a list
+    s = "value1,value2,value3"
+    lst = cu.string_to_list(s)
+    assert lst == ["value1", "value2", "value3"]
+
+    # Test that the function handles an empty string
+    s = ""
+    lst = cu.string_to_list(s)
+    assert lst == [""]
+
+    # Test that the function correctly handles a string with no commas
+    s = "value1"
+    lst = cu.string_to_list(s)
+    assert lst == ["value1"]
+
+    # Test that the function correctly handles a string with spaces
+    s = "value1, value2, value3"
+    lst = cu.string_to_list(s)
+    assert lst == ["value1", " value2", " value3"]
+
+    # Test that the function correctly handles a string with trailing comma
+    s = "value1,value2,value3,"
+    lst = cu.string_to_list(s)
+    assert lst == ["value1", "value2", "value3", ""]
+
+    # Test that the function correctly handles a string with leading comma
+    s = ",value1,value2,value3"
+    lst = cu.string_to_list(s)
+    assert lst == ["", "value1", "value2", "value3"]
+
+
+def test_extension():
+    # Test that the function correctly identifies the extension of a file
+    file_path = "/path/to/file.txt"
+    ext = cu.extension(file_path)
+    assert ext == "txt"
+
+    # Test that the function correctly handles a file with multiple dots in the name
+    file_path = "/path/to/file.name.with.multiple.dots.txt"
+    ext = cu.extension(file_path)
+    assert ext == "txt"
+
+    # Test that the function correctly handles a file with a dot at the start of the name
+    file_path = "/path/to/.file"
+    ext = cu.extension(file_path)
+    assert ext == "file"
+
+    # Test that the function correctly handles a file with a dot at the end of the name
+    file_path = "/path/to/file."
+    ext = cu.extension(file_path)
+    assert ext == ""
+
+    # Test that the function correctly handles an empty string
+    file_path = ""
+    ext = cu.extension(file_path)
+    assert ext == ""
+
+
+def test_get_file_base():
+    # Test that the function correctly gets the base of a file name with one underscore
+    file_path = "/path/to/file_base.txt"
+    base = cu.get_file_base(file_path)
+    assert base == "file"
+
+    # Test that the function correctly gets the base of a file name with multiple underscores
+    file_path = "/path/to/file_base_part2_part3.txt"
+    base = cu.get_file_base(file_path)
+    assert base == "file_base_part2"
+
+    # Test that the function correctly gets the base of a file name with an underscore at the start
+    file_path = "/path/to/_file.txt"
+    base = cu.get_file_base(file_path)
+    assert base == ""
+
+    # Test that the function correctly gets the base of a file name with an underscore at the end
+    file_path = "/path/to/file_.txt"
+    base = cu.get_file_base(file_path)
+    assert base == "file"
+
+    # Test that the function correctly handles an empty string
+    file_path = ""
+    base = cu.get_file_base(file_path)
+    assert base == ""
+
+
+def test_map_to_rule():
+    # Test that the function correctly maps a function name with one underscore
+    class TestClass:
+        def test_func_one():
+            pass
+    rule = cu.map_to_rule(TestClass.test_func_one)
+    assert rule == "test-func-one"
+
+    # Test that the function correctly maps a function name with multiple underscores
+    class TestClass:
+        def test_func_multiple_underscores():
+            pass
+    rule = cu.map_to_rule(TestClass.test_func_multiple_underscores)
+    assert rule == "test-func-multiple-underscores"
+
+    # Test that the function correctly maps a function name with no underscores
+    class TestClass:
+        def testfuncnone():
+            pass
+    rule = cu.map_to_rule(TestClass.testfuncnone)
+    assert rule == "testfuncnone"
+
+    # Test that the function correctly maps a function name with an underscore at the start
+    class TestClass:
+        def _test_func_start():
+            pass
+    rule = cu.map_to_rule(TestClass._test_func_start)
+    assert rule == "-test-func-start"
+
+    # Test that the function correctly maps a function name with an underscore at the end
+    class TestClass:
+        def test_func_end_():
+            pass
+    rule = cu.map_to_rule(TestClass.test_func_end_)
+    assert rule == "test-func-end-"
+
+
+def test_is_undefined():
+    # Test that the function correctly identifies None as undefined
+    assert cu.is_undefined(None)
+
+    # Test that the function correctly identifies an empty string as undefined
+    assert cu.is_undefined("")
+
+    # Test that the function correctly identifies an empty list as undefined
+    assert cu.is_undefined([])
+
+    # Test that the function correctly identifies an empty dictionary as undefined
+    assert cu.is_undefined({})
+
+    # Test that the function correctly identifies zero as not undefined
+    assert not cu.is_undefined(0)
+
+    # Test that the function correctly identifies a non-empty string as not undefined
+    assert not cu.is_undefined("non-empty string")
+
+    # Test that the function correctly identifies a non-empty list as not undefined
+    assert not cu.is_undefined(["non-empty list"])
+
+    # Test that the function correctly identifies a non-empty dictionary as not undefined
+    assert not cu.is_undefined({"key": "value"})
+
+
+def test_get_public_funcs():
+    # Test that the function correctly gets the public functions of a module
+    funcs = cu.get_public_funcs(cu)
+    assert all([inspect.isfunction(func) for func in funcs])
+    assert all([func.__name__[0] != "_" for func in funcs])
+    assert "get_config" not in [func.__name__ for func in funcs]
+
+    # Test that the function correctly handles a module with no public functions
+    class TestModule:
+        def _private_func():
+            pass
+    funcs = cu.get_public_funcs(TestModule)
+    assert funcs == []
+
+    # Test that the function correctly handles a module with only private functions
+    class TestModule:
+        def _private_func():
+            pass
+        def get_config():
+            pass
+    funcs = cu.get_public_funcs(TestModule)
+    assert funcs == []
+
+    # Test that the function correctly handles a module with both public and private functions
+    class TestModule:
+        def public_func():
+            pass
+        def _private_func():
+            pass
+    funcs = cu.get_public_funcs(TestModule)
+    assert len(funcs) == 1
+    assert funcs[0].__name__ == "public_func"
\ No newline at end of file

From 34bd1be57c18798881ad96a18abc3775ffd17fb3 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Tue, 23 Jan 2024 11:46:50 +0000
Subject: [PATCH 26/37] Add test_utils.py to workflow

---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index a8e9f92f..3301eea5 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -44,4 +44,4 @@ jobs:
     - name: Test with pytest
       run: |
         export UDUNITS2_XML_PATH=/opt/share/udunits/udunits2.xml
-        python -m pytest -v tests/test_readers.py tests/test_rules.py tests/test_show_specs.py tests/test_images.py tests/test_generic.py
+        python -m pytest -v tests/test_readers.py tests/test_rules.py tests/test_show_specs.py tests/test_images.py tests/test_generic.py tests/test_utils.py

From b6ee1a4c7ac571ba11d7bc2f0e120ce3fb4a7f6f Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Tue, 23 Jan 2024 13:18:51 +0000
Subject: [PATCH 27/37] Test show specs from CLI

---
 tests/test_show_specs.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/tests/test_show_specs.py b/tests/test_show_specs.py
index bf1054fe..903c2a85 100644
--- a/tests/test_show_specs.py
+++ b/tests/test_show_specs.py
@@ -1,6 +1,6 @@
-import pytest
-import json
 from checksit.specs import show_specs
+from click.testing import CliRunner
+from checksit import cli
 
 
 def test_show_specs_all(capsys):
@@ -32,3 +32,18 @@ def test_show_specs_none_specified(capsys):
     captured_ceda_base = capsys.readouterr()
 
     assert captured_empty.out == captured_ceda_base.out
+
+
+def test_show_specs_cli():
+    runner = CliRunner()
+    result = runner.invoke(cli.show_specs, ["tests/test"])
+    expected_output = (
+        'Specifications:\n\ntests/test:\n{\n    "var-requires": {\n'
+        '        "func": "checksit.generic.check_var_attrs",\n        "params": {\n'
+        '            "defined_attrs": [\n                "long_name"\n            ]\n'
+        '        }\n    },\n    "required-global-attrs": {\n'
+        '        "func": "checksit.generic.check_dim_exists",\n        "params": {\n'
+        '            "dimensions": [\n                "time"\n'
+        '            ]\n        }\n    }\n}\n'
+        )
+    assert result.stdout == expected_output

From 4143198a12cce23aed3e8a2477c925247b3e7b30 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Tue, 23 Jan 2024 15:12:49 +0000
Subject: [PATCH 28/37] Remove commented out import statement

---
 tests/test_generic.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_generic.py b/tests/test_generic.py
index 09d6a863..b004368e 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -1,4 +1,3 @@
-#from checksit.generic import one_spelling_mistake, two_spelling_mistakes, search_close_match, check_var_attrs, check_global_attrs, check_var_exists, check_dim_exists, check_var
 import checksit.generic as cg
 import numpy as np
 

From 20f8d9f428894169724016921948b5628f5ca552 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Tue, 23 Jan 2024 15:49:05 +0000
Subject: [PATCH 29/37] Test reading netCDF file

---
 tests/test_readers.py                |  11 +++++++++++
 tests/testdata/netcdf/test_netcdf.nc | Bin 0 -> 16192 bytes
 2 files changed, 11 insertions(+)
 create mode 100644 tests/testdata/netcdf/test_netcdf.nc

diff --git a/tests/test_readers.py b/tests/test_readers.py
index b6f41d8e..b1419bb5 100644
--- a/tests/test_readers.py
+++ b/tests/test_readers.py
@@ -36,3 +36,14 @@ def test_cdl_reader_multiline_parser_2():
     d = resp.to_dict()
 
 
+def test_cdl_reader_netcdf():
+    ncfile = os.path.join(TESTDATA_DIR, "netcdf/test_netcdf.nc")
+    resp = read_cdl(ncfile)
+
+    d = resp.to_dict()
+    assert sorted(d.keys()) == sorted(["global_attributes", "dimensions", "variables", "inpt"])
+    assert list(d["global_attributes"].keys()) == ["test_attribute_name"]
+    assert d["global_attributes"]["test_attribute_name"] == "test_attribute_value"
+    assert list(d["variables"].keys()) == ["T"]
+    assert sorted(d["dimensions"].keys()) == sorted(["x", "y", "z"])
+    assert d["inpt"] == ncfile
\ No newline at end of file
diff --git a/tests/testdata/netcdf/test_netcdf.nc b/tests/testdata/netcdf/test_netcdf.nc
new file mode 100644
index 0000000000000000000000000000000000000000..27f90ca776a6536d02ceaa7e5419d1bdd4a21479
GIT binary patch
literal 16192
zcmeI2&x<Bk6^3tjn>Y@mDK!!lf0Tj>Dnl73qKl9_^h``NPKr!$A;?tgbVx%oQ%pKB
zijccgcZDm(O)b@x3z2{z1^xs6z?Cj)t04Lx==0pVcP5Du;zC9EE_vrY=RNN^_tyQY
zzn&4l`^cq--+J~vXZO#X33dIwtoYETbfxgnpT75&#~-=$#QwRT?SCk12X88++VWaH
zyC3$n+<mWq?p1r(d-1OdtXj!7^1W|-YCjilS4tZ8zWMcoS650J_MZGjncOrw{`lt2
z_wq*N&tHH0<(KmD-};%oy?mbiy%ONa0(@|<Cw}_}KM=N&ZMg4DxbGz@cjad}IJ@of
z`Lf6N%Jg@=H#cx2uf2nspZmDhgK&2^qxJByi&w9PcOrRaAM(<pU%Y(f>Z6ZexpwvN
z;$xS?xi51aG~Bf{T)Ftg%h?}`|H-FMo@@E<sJ(G~dU)yK_WH@Q$2Xokxp|}g>hq`V
zjhm-;N-rGUIyw64GsoxKvhMGK2ij-;+rl{ybnVLV>DR75b>Y&<vnSU>!~0Ngpnd`&
z{C@cGPyhK2-_3zsmEY(czI^xA_kVkb5Bz&~xV_^a{p}xq@XvR+-F3b5H~t=e<p2B*
z+YA5R9d7U9FMsm8zifASZEx>_Z_)n2-tDiK`X1Jw>3X-X)ysR@zRu73S-%AiKDKrK
z?OSKw;T{Kjp_N03AK1A5)bIc6pZZ629`OBg&hMD|Lw0!c=B?`|H;zt^pL_L>8ed8E
zYpt@6yuCUhoO@>bPPm6}4aVL1%6RP2t54=1K)+4y1Ad?T1y`q3&Xe-jq{k|gOYY;o
zvVIr&+phfh#m5gH`9i&d`gK-X@`(<ZbWi9!(G!;+zWl`HD~Fe_<r96H!|S|$bb9pM
z@#&qT-Ttl$XRL1*-R2Y5XRTbn+_B~L^Y({-e7zH0FLm$ezjLB`!rxtg|KaPMsQ<5@
zsD4#{<$ZrX@I-Im1-<Mc{VkAxEnT~E*x$N29^X1W;nnol(}#d#UpT(y@0JJe{{+8o
z_(k#Hh4Y_1fBz?*zW&q$4}S9eCoi1;)VT+@m-F6j6z}%wwWHJ1TPOTFI=*(}=-J~u
zkhdfcd`<m@qi3E!{?Y@#`VHmX+TFnJ26i{FyMf&e>~7#ccmtn1eEhLj{*ABxEwTQu
z0rj8U+uzT&zg2pi`j2$~hUlvP`p^1J4dF-k)M&@M8`#~z?gn-@u)BfX4eV}UcLTc{
z*xkVH26i{FyMf&e{O@j{y%>UUr5Hj_{2gGtg724lqXt6J?a>?6(X7b>9KZx3xpjN2
zFYu}c^mNvMdyF8V*S*bKphwN##Q!59mRwD^^r;6h0rh%rYjfP@>oHHD2hP-IkJ<=A
ziJhTGy^-;Xf!H2ry7bs%Z;ac_NW2o?ey);FaDE1AbygtG^Qm=UCfDc87A)kWdC>=A
z^GLn|b7Om*-50ihM$f&()*~=a1)(hU77XglNc8D(u25gpsQF9{@P3>bz)URwJ;tfP
zdJkGK0sGY&V86LN)*`HC&HYw#S>1Yo)hb&P31l6PLQjc%*m<((zyz$jw|&AsYYnKH
z^+ZkY0L)!_y@OUQZnXstKweAiK6)a#J=SN?8v~zJJ>Nsot7e~PF&}|@%M<8<`@5Q=
z)%R>Dxqa?ed*lx2OP~jOJdbBsV6|ip+|QcxMBb0wIRV&X?2H!F^Vn;wZl7p?^Q;A+
zrpJD@3GTs4ya1PR0s+0wGIvfwv)7)w^r$;a&R#fz0Q3v%E_Kh~KK81sMe@7{+>_g@
zrzy1-);|J!1`w$0i=YENux@Rsy~O$=$aiJVa>g7t%|7!ExQF@*5>Rs)FQD#cZeI(W
z>)EZ1U;@_EjU!(5&KS((_EmqQj+S8O46x9@qI;$uxb%d$X~wm0B3{tlv-TP1KJ)}a
z$=zctz81F6ISsMsfSwhtFS)Va2{<SB!0r)20(u8ve^X-IIUmU<;G6(_rWW+TxC46R
z`n=|O;h%wi`%^(fu5ORIdk1TrU5_<;L<H)_>h=uk#MYMLO+^c=RBHO{@mv93fqT1_
zxXm$lroI-QKqOXkU-bZba<#@f(3=-E^9ktJBfGK|H(p@p+Mn;<oLEi&Ks*zBR(Sz|
z*j%5>KDFELha!0oT5Ivn856NJ=gb8Ix%=z0=DxKa(K@uK4)j?|U?jHJv*_u;1U!c`
z%&X=cXEz1*)cRWH&9HiaN4P$ZntkTR3w381+hZ-l5$=knCxMpSy7>(1jA)%R3hOJm
z^YYnW5!){&pjWP5^$6^@W-dFsvj*#x_1PQ1Og_S?KwV9r94ZqoC3jvAWcP46(^&0x
z5Amc9^k+}p>Usw>m%7he*BbBKV?b{~)yCqT@!02H?qS@34(QMGV3%3~ODyMwU7kTW
zD*=6ahQh`HjN}biiQPw^ae%GQ#w^)+`g3euuu|OS_N5ldTVm(=IS;_TfNswM=g+^?
z@>$W`+gbXY6^k~jkz03;9^qOde*kMP_tfhh)jMd3Mec!}ZO!Gp2IRiDJ^Ks~XwFr)
zPya-m=Md*Q`9e)Z;2i7jZ5+^C#tE$P)%Rc?O3gm~_V&i=o)Jyfn?MA{GjQ%w*d8@&
zxsQAT9SCUF8!)~C1976ZfQeW{Vn3U;^whdup(lTKoXJ_%gCOn;X7c=wA{*DamAm4z
zHxb)!tnTc(|AID>%e7W}^XE$~QmcDKbFiY>W6dS2Hy{wtpe1&e?0(j~H+>_}mtgC1
zerB}3#I@I&u-}?`1Pfky1ld#OImg-hBRs)<iK*QFZ0+lcKI5~;=X9oeA~$x1b!X_A
ztbqpT7Ylm6BjOe$a#@VPntjgEYrjkGjKLbWJ-nmZV?BFed-cgJtY-i{h@fhDUhz!m
zB9d31_3BT=H6MsWsoB$l+(WN=eNJcTUEtgc>oxc6VkUM*0u8WN&04-E;uSbYPXwW8
z`WE2cLQlOv`~7^)?eAc{#?CO;qn2lq&%~kl8o163=&gRBSBya2e&ZFkp5PW1)+e}A
z7wi|*fx4c0Mr(NntZt9}`5i630TvPTAQX=HTq%}w8B=p!t?4yyVedqq$rsQNr&1e@
zfqs4Nuh(8-&D`_W`%{~My#bFsdWWLxUrMefT<#lzb!XY{p4O}8tR4iAKxgcVcixvs
zY>(`6FK4NVslYwVXX^3_>vd)e<VY@z69`}?9}6329bUnJE*5p>H^g!z?}_sw4#1gN
zBlbBb&;WbgN3CKmek!@Pk~<?7*yp}#UFkEfn!W`%*Vw%hY+nTW#=<jtUvfI~8O#${
zsdcbD#`btdYx&(oYl~)&ORqIO12A{S5_37nEqNfHfi-8Fiv>8#zDb`8cHiuQ>}iS?
z<MwQ|MtOUGsL#~&Sxdk<=4;7~-6xVyuzRR=g=ciN1-92bfK)s~@g+2$S)aY;JzNo3
z6M01U{cxr=S<N%pJ6k8#vlee)jYV_6qz+vAIyC!@^S#Gh+Rln@Zv*OHc?P*X0q+bV
zXvtT2f`wY08;D&AJw*aw9DuXk+n&+5)VmT}?}5vjXN<5jhN2tm^NjW_<P&V4?6T%Q
zvVMIo>#@MP@fu;^UbU{@-mz$PFTFk50?#0oSj~9}EfCk-xc10m(hFJ;$%n$mxes1I
zqYh+y^mSmSHp1#FaRTbr?Q?k+edgBms2Tg5WAR3H;t9z5A{>feZwnfrUhg6I;jvEy
zFcvoNK{79M)S3d%v6kL?7M%Hf_UUsUW1%PEcaItF;02CgBG%i05#&9w>W}2sUCt0{
z&Z&3d9Q)g%o7?LwXWF0lhxHk+ddmIgS>!Hf+h@Oh&eSWc4MoG6&uP7S^jo(-g1VX#
zcW}M8zIbBX&NRaTO??E$!t=Vfx}K(J3vu>99ohs2kP7Q>3!I@R&@;jfuwNDv7(oa0
z<hv?feb${TjMc;pBIwZqXh8ma(2T`Y;2Dg4e&<;0&7DzlX9e@3Ss%er>MOc^?%_OP
zU0$dqVC?)3%;ff33qalel~`>C`pjJ^ZrX%)duHjeZjWB)1h~FOl_R<}bLR#S@yR3b
z+0FGizbUZCc>`=;U-AV_J;|&I-5zy45jJ08V`q+_1J>Qwv$&tR^Yw{ECU?o%7q|Wu
zZqVcwp5X{4Vqv}%O?^aj##;1xj+&>^W3OwFfq8Enw|NWP6x~?eSu-rGb)W?+xv{7}
z$3BCzdl0Elg#&9Ow$FKTqF!g_caqOWk39*5;u+u`*wcWTJJUXM_g$E0t|x&GxZK0o
zef0$3tn7#Fap`l8{go4%^JCHMu|9!Oojkzy2B2Q&b;R!Da&L22f}LaE3OlEwDIRAz
zXEG<YKZ04k!~^U()x-*%*#cu{b|99Tv(*-I`<(9#XS;eh8po}_?r)zn)n{O@UiARf
zTq$nzou_{+s2+1;+1dId@dAs9*x4Oe$@MvFCN?*g2ROm{oR?&ld7cYTV2Rs2{q|WO
GF8%|wsBXLf

literal 0
HcmV?d00001


From 1a09a57528e7834188c3ebc8269881afeb189346 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Tue, 23 Jan 2024 16:42:44 +0000
Subject: [PATCH 30/37] Add test for auto-finding NCAS GENERAL specs

---
 tests/test_ncas_file_proc.py                   |  17 +++++++++++++++++
 ...ument_platform_20230101_surface-met_v1.0.nc | Bin 0 -> 6469 bytes
 2 files changed, 17 insertions(+)
 create mode 100644 tests/test_ncas_file_proc.py
 create mode 100644 tests/testdata/netcdf/ncas-instrument_platform_20230101_surface-met_v1.0.nc

diff --git a/tests/test_ncas_file_proc.py b/tests/test_ncas_file_proc.py
new file mode 100644
index 00000000..51e59638
--- /dev/null
+++ b/tests/test_ncas_file_proc.py
@@ -0,0 +1,17 @@
+from click.testing import CliRunner
+from checksit import cli
+import os
+from .common import TESTDATA_DIR
+
+
+def test_ncas_general_specs():
+    """
+    Test checksit finds correct specs for NCAS GENERAL file
+    """
+    runner = CliRunner()
+    result = runner.invoke(cli.check, ["-p", os.path.join(TESTDATA_DIR, "netcdf/ncas-instrument_platform_20230101_surface-met_v1.0.nc")])
+    output = result.output
+    templ_used = output.split("Template: ")[1].split("\n")[0]
+    specs_used = output.split("Spec Files: ")[1].split("\n")[0]
+    assert templ_used == "OFF"
+    assert specs_used == "['ncas-amof-2.0.0/amof-file-name', 'ncas-amof-2.0.0/amof-common-land', 'ncas-amof-2.0.0/amof-surface-met', 'ncas-amof-2.0.0/amof-global-attrs']"
\ No newline at end of file
diff --git a/tests/testdata/netcdf/ncas-instrument_platform_20230101_surface-met_v1.0.nc b/tests/testdata/netcdf/ncas-instrument_platform_20230101_surface-met_v1.0.nc
new file mode 100644
index 0000000000000000000000000000000000000000..b121f1bba0af6ac14f03f86fbdb7dad2d0c3ba4d
GIT binary patch
literal 6469
zcmeH|&ubGw6vy9YZAs&{O>1jVgs#$q)^<s1tOy1YHXBn+Hnr)=OW3+wx{~geWG#vm
zy?PNZo;-N)4-mZg2lzjTo<zZu(3>E5sPD~>)`o&d5$2K1%+9>`oj0F(**vXRYonR7
znN&Ir()NWpXZTEBEbP7c^|Mv4uBJvuwA2MQsi(x6j2Y}RDbUCzo2B-)aIew>*)TOc
z#<)gf9Slgr2>?5ec7iAg<ERg(aN#t5DcY_r6s)Cr)3qz@g-WxwuxRD2ym1L|A04th
z*blb5@xv{Q?`*|>zywf}33R>4Hyj%4Fvi_GuD#NWw}W0127EU<9qqOg^!k`WX>mRZ
zlDqy!sc2od7UwtpjjN@CRVZ3lzWvyJ$DnASX;T=bX<7y&$z-ojfi`h|hK_}1LM{!t
z$2b~7IC_TAybxfT%!A{a<L>ZG%o&#n&#=JLsI=ScTm+V)tIJKtZ7;XnPTQ_D95C*X
z`w(|`D@})amxcG*X4p6J<C%CcyIM2-5OL92&Gq|<8O6!r(vH^)z4dNjnZgQK5fdmh
zXkR|Oo@v?jThr&*QFUEh?O;*t@AGol)49TpKZM@CT=`BK=}AmqlaUvN2WK#bMS^fT
zF1wRrHQWk)$YKFYz=9@>-+TV~ua{$hMzOs9%P=&Cmoi6BLgmTPR8~YP)04EFOi=bl
zg{V(qf>;YOK|79nei(TPR@ad<u~yix10`io>PCY^#<7}F<nUe_%k4Fw-y~gm4!<nP
zV>C$18NmjP=~6RW&g%qr7&UQP6c>&XN9-+E#3N`Az6t~<KNWY`#U6j=E>Aa4>8?68
zXVr0Sr^C`+LIq0K_Y$ukB!_t))ElH}{ov+~IgZSN@+O;o^!^nesa6qC1QdaPOyH*7
zYOt3ky~)tz3zYw*;Ja8(@MZWoE(WSo1QY>9KoL*`6ahs*5l{pa0YyL&Py`f#|AxSC
Dc6aqw

literal 0
HcmV?d00001


From 4bd790cc353b57e6129ecd981b7a22cf2fd58e60 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Tue, 23 Jan 2024 16:43:33 +0000
Subject: [PATCH 31/37] Added test_ncas_file_proc to workflow

---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 3301eea5..b415b1f3 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -44,4 +44,4 @@ jobs:
     - name: Test with pytest
       run: |
         export UDUNITS2_XML_PATH=/opt/share/udunits/udunits2.xml
-        python -m pytest -v tests/test_readers.py tests/test_rules.py tests/test_show_specs.py tests/test_images.py tests/test_generic.py tests/test_utils.py
+        python -m pytest -v tests/test_readers.py tests/test_rules.py tests/test_show_specs.py tests/test_images.py tests/test_generic.py tests/test_utils.py tests/test_ncas_file_proc.py

From 7c262ea8189ba5cffa3baf5a8c846037e722a49c Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Tue, 23 Jan 2024 16:47:42 +0000
Subject: [PATCH 32/37] Removed check for exiftool in workflow

---
 .github/workflows/main.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index b415b1f3..63db8b34 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -38,9 +38,6 @@ jobs:
         pip install flake8 black pytest
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
         if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi
-    - name: Look for exiftool
-      run: |
-        which exiftool
     - name: Test with pytest
       run: |
         export UDUNITS2_XML_PATH=/opt/share/udunits/udunits2.xml

From f5545aff67d68d40bcb06528d359a2e80d27f186 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Tue, 23 Jan 2024 16:52:01 +0000
Subject: [PATCH 33/37] Tidy up import of rule_funcs

---
 tests/test_rules.py | 168 ++++++++++++++++++++++----------------------
 1 file changed, 84 insertions(+), 84 deletions(-)

diff --git a/tests/test_rules.py b/tests/test_rules.py
index 5050ed1d..feb0da0e 100644
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -4,204 +4,204 @@
 from numbers import Number
 
 from checksit.rules import rules as r
-from checksit.rules.rule_funcs import match_file_name, string_of_length, match_one_of, match_one_or_more_of, validate_image_date_time, validate_orcid_ID, list_of_names, headline, title_check, url_checker, relation_url_checker, latitude, longitude
+import checksit.rules.rule_funcs as crf
 
 # rule_funcs.py
 def test_match_file_name():
     file_path = "happy_netcdf"
     value = "happy_NetCDF.nc"
     context = {"file_path": file_path}
-    assert len(match_file_name(value, context)) == 1
-    assert len(match_file_name(value, context, ["lowercase"])) == 1
-    assert len(match_file_name(value, context, ["uppercase"])) == 1
-    assert len(match_file_name(value, context, ["lowercase", "no_extension"])) == 0
-    assert len(match_file_name(value, context, ["uppercase", "no_extension"])) == 1
+    assert len(crf.match_file_name(value, context)) == 1
+    assert len(crf.match_file_name(value, context, ["lowercase"])) == 1
+    assert len(crf.match_file_name(value, context, ["uppercase"])) == 1
+    assert len(crf.match_file_name(value, context, ["lowercase", "no_extension"])) == 0
+    assert len(crf.match_file_name(value, context, ["uppercase", "no_extension"])) == 1
 
 
 def test_string_of_length():
     # Test that the function correctly handles strings of the minimum length
-    assert string_of_length('abc', {}, ['3'], 'Test') == []
-    assert string_of_length('abcd', {}, ['3+'], 'Test') == []
+    assert crf.string_of_length('abc', {}, ['3'], 'Test') == []
+    assert crf.string_of_length('abcd', {}, ['3+'], 'Test') == []
 
     # Test that the function correctly handles strings shorter than the minimum length
-    assert string_of_length('ab', {}, ['3'], 'Test') == ["Test 'ab' must be exactly 3 characters"]
-    assert string_of_length('ab', {}, ['3+'], 'Test') == ["Test 'ab' must be at least 3 characters"]
+    assert crf.string_of_length('ab', {}, ['3'], 'Test') == ["Test 'ab' must be exactly 3 characters"]
+    assert crf.string_of_length('ab', {}, ['3+'], 'Test') == ["Test 'ab' must be at least 3 characters"]
 
     # Test that the function correctly handles strings longer than the minimum length
-    assert string_of_length('abcd', {}, ['3'], 'Test') == ["Test 'abcd' must be exactly 3 characters"]
-    assert string_of_length('abcd', {}, ['3+'], 'Test') == []
+    assert crf.string_of_length('abcd', {}, ['3'], 'Test') == ["Test 'abcd' must be exactly 3 characters"]
+    assert crf.string_of_length('abcd', {}, ['3+'], 'Test') == []
 
     # Test that the function correctly handles empty strings
-    assert string_of_length('', {}, ['0'], 'Test') == []
-    assert string_of_length('', {}, ['1'], 'Test') == ["Test '' must be exactly 1 characters"]
-    assert string_of_length('', {}, ['1+'], 'Test') == ["Test '' must be at least 1 characters"]
+    assert crf.string_of_length('', {}, ['0'], 'Test') == []
+    assert crf.string_of_length('', {}, ['1'], 'Test') == ["Test '' must be exactly 1 characters"]
+    assert crf.string_of_length('', {}, ['1+'], 'Test') == ["Test '' must be at least 1 characters"]
 
 
 def test_match_one_of():
     # Test that the function correctly handles valid inputs
-    assert match_one_of('apple', {}, ['apple|banana|orange'], 'Test') == []
+    assert crf.match_one_of('apple', {}, ['apple|banana|orange'], 'Test') == []
 
     # Test that the function correctly handles invalid inputs
-    assert match_one_of('kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'kiwi' must be one of: '['apple', 'banana', 'orange']'"]
+    assert crf.match_one_of('kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'kiwi' must be one of: '['apple', 'banana', 'orange']'"]
 
     # Test that the function correctly handles empty strings
-    assert match_one_of('', {}, ['apple|banana|orange'], 'Test') == ["Test '' must be one of: '['apple', 'banana', 'orange']'"]
+    assert crf.match_one_of('', {}, ['apple|banana|orange'], 'Test') == ["Test '' must be one of: '['apple', 'banana', 'orange']'"]
 
 
 def test_match_one_or_more_of():
     # Test that the function correctly handles valid inputs
-    assert match_one_or_more_of('apple,banana', {}, ['apple|banana|orange'], 'Test') == []
-    assert match_one_or_more_of('apple', {}, ['apple|banana|orange'], 'Test') == []
+    assert crf.match_one_or_more_of('apple,banana', {}, ['apple|banana|orange'], 'Test') == []
+    assert crf.match_one_or_more_of('apple', {}, ['apple|banana|orange'], 'Test') == []
 
     # Test that the function correctly handles invalid inputs
-    assert match_one_or_more_of('apple,kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'apple,kiwi' must be one or more of: '['apple', 'banana', 'orange']'"]
-    assert match_one_or_more_of('kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'kiwi' must be one or more of: '['apple', 'banana', 'orange']'"]
+    assert crf.match_one_or_more_of('apple,kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'apple,kiwi' must be one or more of: '['apple', 'banana', 'orange']'"]
+    assert crf.match_one_or_more_of('kiwi', {}, ['apple|banana|orange'], 'Test') == ["Test 'kiwi' must be one or more of: '['apple', 'banana', 'orange']'"]
 
     # Test that the function correctly handles empty strings
-    assert match_one_or_more_of('', {}, ['apple|banana|orange'], 'Test') == ["Test '' must be one or more of: '['apple', 'banana', 'orange']'"]
+    assert crf.match_one_or_more_of('', {}, ['apple|banana|orange'], 'Test') == ["Test '' must be one or more of: '['apple', 'banana', 'orange']'"]
 
 
 def test_validate_image_date_time():
     # Test that the function correctly handles valid date-time strings
-    assert validate_image_date_time('2022:01:01 12:00:00', {}, label = 'Test') == []
-    assert validate_image_date_time('2022:01:01 12:00:00.000000', {}, label = 'Test') == []
+    assert crf.validate_image_date_time('2022:01:01 12:00:00', {}, label = 'Test') == []
+    assert crf.validate_image_date_time('2022:01:01 12:00:00.000000', {}, label = 'Test') == []
 
     # Test that the function correctly handles invalid date-time strings
-    assert validate_image_date_time('2022-01-01 12:00:00', {}, label = 'Test') == ["Test '2022-01-01 12:00:00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"]
-    assert validate_image_date_time('2022:01:01 12:00', {}, label = 'Test') == ["Test '2022:01:01 12:00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"]
-    assert validate_image_date_time('2022:01:01', {}, label = 'Test') == ["Test '2022:01:01' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"]
-    assert validate_image_date_time('2022:01:01 12:00:00.00', {}, label = 'Test') == ["Test '2022:01:01 12:00:00.00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"]
+    assert crf.validate_image_date_time('2022-01-01 12:00:00', {}, label = 'Test') == ["Test '2022-01-01 12:00:00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"]
+    assert crf.validate_image_date_time('2022:01:01 12:00', {}, label = 'Test') == ["Test '2022:01:01 12:00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"]
+    assert crf.validate_image_date_time('2022:01:01', {}, label = 'Test') == ["Test '2022:01:01' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"]
+    assert crf.validate_image_date_time('2022:01:01 12:00:00.00', {}, label = 'Test') == ["Test '2022:01:01 12:00:00.00' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"]
 
     # Test that the function correctly handles empty strings
-    assert validate_image_date_time('', {}, label = 'Test') == ["Test '' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"]
+    assert crf.validate_image_date_time('', {}, label = 'Test') == ["Test '' needs to be of the format YYYY:MM:DD hh:mm:ss or YYYY:MM:DD hh:mm:ss.s"]
 
 
 def test_validate_orcid_ID():
     # Test that the function correctly handles valid ORCID IDs
-    assert validate_orcid_ID('https://orcid.org/0000-0002-1825-0097', {}, label='Test') == []
-    assert validate_orcid_ID('https://orcid.org/1234-5678-9012-3456', {}, label='Test') == []
-    assert validate_orcid_ID('https://orcid.org/1234-5678-9012-345X', {}, label='Test') == []
+    assert crf.validate_orcid_ID('https://orcid.org/0000-0002-1825-0097', {}, label='Test') == []
+    assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-3456', {}, label='Test') == []
+    assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-345X', {}, label='Test') == []
 
     # Test that the function correctly handles ORCID IDs with incorrect lengths
-    assert validate_orcid_ID('https://orcid.org/0000-0002-1825-009', {}, label='Test') == ["Test 'https://orcid.org/0000-0002-1825-009' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
-    assert validate_orcid_ID('https://orcid.org/1234-5678-9012-34567', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-34567' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
+    assert crf.validate_orcid_ID('https://orcid.org/0000-0002-1825-009', {}, label='Test') == ["Test 'https://orcid.org/0000-0002-1825-009' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
+    assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-34567', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-34567' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
 
     # Test that the function correctly handles ORCID IDs with incorrect formats
-    assert validate_orcid_ID('https://orcid.org/0000-0002-1825-009Z', {}, label='Test') == ["Test 'https://orcid.org/0000-0002-1825-009Z' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
-    assert validate_orcid_ID('https://orcid.org/1234-5678-9012-34X5', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-34X5' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
-    assert validate_orcid_ID('https://orcid.org/1234-5678-9012-3456-', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456-' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
-    assert validate_orcid_ID('https://orcid.org/1234-5678-9012-3456X', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456X' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
+    assert crf.validate_orcid_ID('https://orcid.org/0000-0002-1825-009Z', {}, label='Test') == ["Test 'https://orcid.org/0000-0002-1825-009Z' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
+    assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-34X5', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-34X5' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
+    assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-3456-', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456-' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
+    assert crf.validate_orcid_ID('https://orcid.org/1234-5678-9012-3456X', {}, label='Test') == ["Test 'https://orcid.org/1234-5678-9012-3456X' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
 
     # Test that the function correctly handles empty strings
-    assert validate_orcid_ID('', {}, label='Test') == ["Test '' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
+    assert crf.validate_orcid_ID('', {}, label='Test') == ["Test '' needs to be of the format https://orcid.org/XXXX-XXXX-XXXX-XXXX"]
 
 
 def test_list_of_names():
     # Test that the function correctly handles valid names
-    assert list_of_names('Doe, John', {}, label='Test') == []
-    assert list_of_names('Doe, John J.', {}, label='Test') == []
-    assert list_of_names(['Doe, John', 'Smith, Jane'], {}, label='Test') == []
+    assert crf.list_of_names('Doe, John', {}, label='Test') == []
+    assert crf.list_of_names('Doe, John J.', {}, label='Test') == []
+    assert crf.list_of_names(['Doe, John', 'Smith, Jane'], {}, label='Test') == []
 
     # Test that the function correctly handles names with incorrect formats
-    assert list_of_names('John Doe', {}, label='Test') == ["Test 'John Doe' should be of the format <last name>, <first name> <middle initials(s)> or <last name>, <first name> <middle name(s)> where appropriate"]
-    assert list_of_names('Doe John', {}, label='Test') == ["Test 'Doe John' should be of the format <last name>, <first name> <middle initials(s)> or <last name>, <first name> <middle name(s)> where appropriate"]
-    assert list_of_names(['Doe, John', 'Jane Smith'], {}, label='Test') == ["Test '['Doe, John', 'Jane Smith']' should be of the format <last name>, <first name> <middle initials(s)> or <last name>, <first name> <middle name(s)> where appropriate"]
+    assert crf.list_of_names('John Doe', {}, label='Test') == ["Test 'John Doe' should be of the format <last name>, <first name> <middle initials(s)> or <last name>, <first name> <middle name(s)> where appropriate"]
+    assert crf.list_of_names('Doe John', {}, label='Test') == ["Test 'Doe John' should be of the format <last name>, <first name> <middle initials(s)> or <last name>, <first name> <middle name(s)> where appropriate"]
+    assert crf.list_of_names(['Doe, John', 'Jane Smith'], {}, label='Test') == ["Test '['Doe, John', 'Jane Smith']' should be of the format <last name>, <first name> <middle initials(s)> or <last name>, <first name> <middle name(s)> where appropriate"]
 
     # Test that the function correctly handles names with invalid characters
-    assert list_of_names('Doe, J0hn', {}, label='Test') == ["Test 'Doe, J0hn' - please use characters A-Z, a-z, À-ÿ where appropriate"]
-    assert list_of_names('Doe, John!', {}, label='Test') == ["Test 'Doe, John!' - please use characters A-Z, a-z, À-ÿ where appropriate"]
-    assert list_of_names(['Doe, John', 'Smith, J@ne'], {}, label='Test') == ["Test '['Doe, John', 'Smith, J@ne']' - please use characters A-Z, a-z, À-ÿ where appropriate"]
+    assert crf.list_of_names('Doe, J0hn', {}, label='Test') == ["Test 'Doe, J0hn' - please use characters A-Z, a-z, À-ÿ where appropriate"]
+    assert crf.list_of_names('Doe, John!', {}, label='Test') == ["Test 'Doe, John!' - please use characters A-Z, a-z, À-ÿ where appropriate"]
+    assert crf.list_of_names(['Doe, John', 'Smith, J@ne'], {}, label='Test') == ["Test '['Doe, John', 'Smith, J@ne']' - please use characters A-Z, a-z, À-ÿ where appropriate"]
 
     # Test that the function correctly handles empty strings
-    assert list_of_names('', {}, label='Test') == ["Test '' should be of the format <last name>, <first name> <middle initials(s)> or <last name>, <first name> <middle name(s)> where appropriate", "Test '' - please use characters A-Z, a-z, À-ÿ where appropriate"]
-    assert list_of_names([], {}, label='Test') == []
+    assert crf.list_of_names('', {}, label='Test') == ["Test '' should be of the format <last name>, <first name> <middle initials(s)> or <last name>, <first name> <middle name(s)> where appropriate", "Test '' - please use characters A-Z, a-z, À-ÿ where appropriate"]
+    assert crf.list_of_names([], {}, label='Test') == []
 
 
 def test_headline():
     # Test that the function correctly handles valid headlines
-    assert headline('This is a valid headline.', {}, label='Test') == []
-    assert headline('This headline is exactly 150 characters long ' + 'a' * 105, {}, label='Test') == []
-    assert headline('This headline is exactly 10 characters.', {}, label='Test') == []
+    assert crf.headline('This is a valid headline.', {}, label='Test') == []
+    assert crf.headline('This headline is exactly 150 characters long ' + 'a' * 105, {}, label='Test') == []
+    assert crf.headline('This headline is exactly 10 characters.', {}, label='Test') == []
 
     # Test that the function correctly handles headlines longer than 150 characters
-    assert headline('This headline is longer than 150 characters.' + 'a' * 120, {}, label='Test') == ["Test 'This headline is longer than 150 characters." + "a" * 120 + "' should contain no more than one sentence"]
+    assert crf.headline('This headline is longer than 150 characters.' + 'a' * 120, {}, label='Test') == ["Test 'This headline is longer than 150 characters." + "a" * 120 + "' should contain no more than one sentence"]
 
     # Test that the function correctly handles headlines with more than one sentence
-    assert headline('This is a headline. It has two sentences.', {}, label='Test') == ["Test 'This is a headline. It has two sentences.' should contain no more than one sentence"]
+    assert crf.headline('This is a headline. It has two sentences.', {}, label='Test') == ["Test 'This is a headline. It has two sentences.' should contain no more than one sentence"]
 
     # Test that the function correctly handles headlines that do not start with a capital letter
-    assert headline('this headline does not start with a capital letter.', {}, label='Test') == ["Test 'this headline does not start with a capital letter.' should start with a capital letter"]
+    assert crf.headline('this headline does not start with a capital letter.', {}, label='Test') == ["Test 'this headline does not start with a capital letter.' should start with a capital letter"]
 
     # Test that the function correctly handles headlines shorter than 10 characters
-    assert headline('Too short', {}, label='Test') == ["Test 'Too short' should be at least 10 characters"]
+    assert crf.headline('Too short', {}, label='Test') == ["Test 'Too short' should be at least 10 characters"]
 
     # Test that the function correctly handles empty strings
-    assert headline('', {}, label='Test') == ["Test '' should not be empty"]
+    assert crf.headline('', {}, label='Test') == ["Test '' should not be empty"]
 
 
 def test_title_check():
     # Test that the function correctly handles titles that match the filename
-    assert title_check('happy_netcdf', "/path/to/file/happy_netcdf", label='Test') == []
-    assert title_check('happy_NetCDF.nc', "/path/to/file/happy_NetCDF.nc", label='Test') == []
+    assert crf.title_check('happy_netcdf', "/path/to/file/happy_netcdf", label='Test') == []
+    assert crf.title_check('happy_NetCDF.nc', "/path/to/file/happy_NetCDF.nc", label='Test') == []
 
     # Test that the function correctly handles titles that do not match the filename
-    assert title_check('sad_netcdf', "/path/to/file/happy_netcdf", label='Test') == ["Test 'sad_netcdf' must match the name of the file"]
-    assert title_check('happy_NetCDF.nc', "/path/to/file/sad_NetCDF.nc", label='Test') == ["Test 'happy_NetCDF.nc' must match the name of the file"]
+    assert crf.title_check('sad_netcdf', "/path/to/file/happy_netcdf", label='Test') == ["Test 'sad_netcdf' must match the name of the file"]
+    assert crf.title_check('happy_NetCDF.nc', "/path/to/file/sad_NetCDF.nc", label='Test') == ["Test 'happy_NetCDF.nc' must match the name of the file"]
 
     # Test that the function correctly handles empty titles
-    assert title_check('', "/path/to/file/happy_netcdf", label='Test') == ["Test '' must match the name of the file"]
+    assert crf.title_check('', "/path/to/file/happy_netcdf", label='Test') == ["Test '' must match the name of the file"]
 
 
 def test_url_checker():
     # Test that the function correctly handles a reachable URL
-    assert url_checker("https://www.example.com", {}, label="Test") == []
+    assert crf.url_checker("https://www.example.com", {}, label="Test") == []
 
     # Test that the function correctly handles an unreachable URL
-    assert url_checker("https://www.nonexistenturl.com", {}, label="Test") == ["Test 'https://www.nonexistenturl.com' is not a reachable url"]
+    assert crf.url_checker("https://www.nonexistenturl.com", {}, label="Test") == ["Test 'https://www.nonexistenturl.com' is not a reachable url"]
 
     # Test that the function correctly handles an existing but unreachable URL
-    assert url_checker("https://www.example.com/nonexistentpage", {}, label="Test") == ["Test 'https://www.example.com/nonexistentpage' is not a reachable url"]
+    assert crf.url_checker("https://www.example.com/nonexistentpage", {}, label="Test") == ["Test 'https://www.example.com/nonexistentpage' is not a reachable url"]
 
     # Test that the function correctly handles an empty URL
-    assert url_checker("", {}, label="Test") == ["Test '' is not a reachable url"]
+    assert crf.url_checker("", {}, label="Test") == ["Test '' is not a reachable url"]
 
 
 def test_relation_url_checker():
     # Test that the function correctly handles valid inputs
-    assert relation_url_checker('relation https://example.com', {}, label='Test') == []
+    assert crf.relation_url_checker('relation https://example.com', {}, label='Test') == []
 
     # Test that the function correctly handles inputs without a space
-    assert relation_url_checker('relationhttps://example.com', {}, label='Test') == ["Test 'relationhttps://example.com' should contain a space before the url"]
+    assert crf.relation_url_checker('relationhttps://example.com', {}, label='Test') == ["Test 'relationhttps://example.com' should contain a space before the url"]
 
     # Test that the function correctly handles inputs with an invalid URL
-    assert relation_url_checker('relation https://', {}, label='Test') == ["Test 'https://' is not a reachable url"]
+    assert crf.relation_url_checker('relation https://', {}, label='Test') == ["Test 'https://' is not a reachable url"]
 
     # Test that the function correctly handles empty strings
-    assert relation_url_checker('', {}, label='Test') == ["Test '' should contain a space before the url"]
+    assert crf.relation_url_checker('', {}, label='Test') == ["Test '' should contain a space before the url"]
 
 
 def test_latitude():
     # Test that the function correctly handles valid latitudes
-    assert latitude('45.1234', {}, label='Test') == []
-    assert latitude('-90.0000', {}, label='Test') == []
-    assert latitude('90.0000', {}, label='Test') == []
+    assert crf.latitude('45.1234', {}, label='Test') == []
+    assert crf.latitude('-90.0000', {}, label='Test') == []
+    assert crf.latitude('90.0000', {}, label='Test') == []
 
     # Test that the function correctly handles invalid latitudes
-    assert latitude('90.0001', {}, label='Test') == ["Test '90.0001' must be within -90 and +90 "]
-    assert latitude('-90.0001', {}, label='Test') == ["Test '-90.0001' must be within -90 and +90 "]
-    assert latitude('100.0000', {}, label='Test') == ["Test '100.0000' must be within -90 and +90 "]
+    assert crf.latitude('90.0001', {}, label='Test') == ["Test '90.0001' must be within -90 and +90 "]
+    assert crf.latitude('-90.0001', {}, label='Test') == ["Test '-90.0001' must be within -90 and +90 "]
+    assert crf.latitude('100.0000', {}, label='Test') == ["Test '100.0000' must be within -90 and +90 "]
 
 
 def test_longitude():
     # Test that the function correctly handles valid longitudes
-    assert longitude('45.1234', {}, label='Test') == []
-    assert longitude('-180.0000', {}, label='Test') == []
-    assert longitude('180.0000', {}, label='Test') == []
+    assert crf.longitude('45.1234', {}, label='Test') == []
+    assert crf.longitude('-180.0000', {}, label='Test') == []
+    assert crf.longitude('180.0000', {}, label='Test') == []
 
     # Test that the function correctly handles invalid longitudes
-    assert longitude('180.0001', {}, label='Test') == ["Test '180.0001' must be within -180 and +180 "]
-    assert longitude('-180.0001', {}, label='Test') == ["Test '-180.0001' must be within -180 and +180 "]
-    assert longitude('200.0000', {}, label='Test') == ["Test '200.0000' must be within -180 and +180 "]
+    assert crf.longitude('180.0001', {}, label='Test') == ["Test '180.0001' must be within -180 and +180 "]
+    assert crf.longitude('-180.0001', {}, label='Test') == ["Test '-180.0001' must be within -180 and +180 "]
+    assert crf.longitude('200.0000', {}, label='Test') == ["Test '200.0000' must be within -180 and +180 "]
 
 
 # rules.py

From e7ad6c46ec7c545fc19bc80e9c103605724587d2 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Fri, 26 Jan 2024 13:42:51 +0000
Subject: [PATCH 34/37] Added checks on optional variables

---
 tests/test_generic.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tests/test_generic.py b/tests/test_generic.py
index b004368e..a651a407 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -301,6 +301,12 @@ def test_check_var():
     assert errors == ["[variable**************:var2]: Attribute 'attr3' does not exist. "]
     assert warnings == []
 
+    variable = "var2:__OPTIONAL__"
+    defined_attrs = ["long_name:Variable 2", "units:kg", "attr3:value 3"]
+    errors, warnings = cg.check_var(dct, variable, defined_attrs, skip_spellcheck=True)
+    assert errors == ["[variable**************:var2]: Attribute 'attr3' does not exist. "]
+    assert warnings == []
+
     # Test that the function correctly identifies incorrect attributes
     variable = "var2"
     defined_attrs = ["long_name:Variable 2", "units:s"]
@@ -308,6 +314,12 @@ def test_check_var():
     assert errors == ["[variable**************:var2]: Attribute 'units' must have definition s, not kg."]
     assert warnings == []
 
+    variable = "var2:__OPTIONAL__"
+    defined_attrs = ["long_name:Variable 2", "units:s"]
+    errors, warnings = cg.check_var(dct, variable, defined_attrs, skip_spellcheck=True)
+    assert errors == ["[variable**************:var2]: Attribute 'units' must have definition s, not kg."]
+    assert warnings == []
+
     # Test that the function correctly handles badly formatted flag_values
     variable = "var4:__OPTIONAL__"
     defined_attrs = ["flag_values:0b, 1b, 2b"]

From db4ac3d7506de50e5fc7a5658e182cfaad6c3c79 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Fri, 26 Jan 2024 13:43:19 +0000
Subject: [PATCH 35/37] Add tests on vocab checks

---
 tests/test_cvs.py | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/tests/test_cvs.py b/tests/test_cvs.py
index d3c2134a..f19669a5 100644
--- a/tests/test_cvs.py
+++ b/tests/test_cvs.py
@@ -1,22 +1,22 @@
-from checksit.cvs import vocabs, vc
+from checksit.cvs import vocabs
+import pytest
 
 
-lookups = {
-    'vocabs:ukcp18:variables:season_year':
-        {'dimensions': ['time'], 'units': '1', 'dtype': 'int', 'long_name': 'season_year'},
-    'vocabs:ukcp18:collection': 
-        ['land-cpm', 'land-derived', 'land-gcm', 'land-indices', 'land-prob', 'land-rcm', 'land-rcm-gwl', 'marine-sim'],
-    'vocabs:cf-netcdf:Conventions':
-        ["CF-1.5", "CF-1.6"]
-}
-
-
-for lookup, exp_value in lookups.items():
-    value = vocabs.lookup(lookup)
-    assert exp_value == value
-
-
-for lookup, exp_value in lookups.items():
-    value = vc._lookup(lookup)
-    assert exp_value == value
+def test_lookup():
+    assert vocabs.lookup('__vocabs__:tests/test_instruments:test_instruments') == {'inst1': {"instrument_id": "inst1"}, "inst2": {"instrument_id": "inst2"}}
+    assert vocabs.lookup('__vocabs__:tests/test_instruments:test_instruments:__all__') == ["inst1", "inst2"]
+    assert vocabs.lookup('__vocabs__:tests/test_instruments:test_instruments:inst1') == {"instrument_id": "inst1"}
+    assert vocabs.lookup('__vocabs__:tests/test_instruments:test_instruments:__all__:instrument_id') == ["inst1", "inst2"]
+    with pytest.raises(ValueError):
+        vocabs.lookup('__vocabs__:tests/test_instruments:test_instruments:__all__:__all__')
 
+def test_check():
+    assert vocabs.check('__vocabs__:tests/test_instruments:test_instruments:__all__:instrument_id', 'inst1', label = "Test") == []
+    assert vocabs.check(
+        "__vocabs__:tests/test_instruments:test_instruments:__all__:instrument_id", "inst3", label="Test",
+    ) == [
+        "Test 'inst3' not in vocab options: ['inst1', 'inst2'] (using: '__vocabs__:tests/test_instruments:test_instruments:__all__:instrument_id')"
+    ]
+    assert vocabs.check('__vocabs__:tests/test_platforms:test_platforms:plat1', {"platform_id": "plat1"}, label = "Test") == ["Test does not have attribute 'description'"]
+    assert vocabs.check('__vocabs__:tests/test_platforms:test_platforms:plat1:platform_id', "plat1", label = "Test") == []
+    assert vocabs.check('__vocabs__:tests/test_platforms:test_platforms:plat1:platform_id', "plat2", label = "Test") == ["Test 'plat2' does not equal required vocab value: 'plat1' (using: '__vocabs__:tests/test_platforms:test_platforms:plat1:platform_id')"]
\ No newline at end of file

From 06f2f0389b8c9fdace5d0dc6e6fee2f46b9c60b5 Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Fri, 26 Jan 2024 13:43:54 +0000
Subject: [PATCH 36/37] Add test_cvs.py to main workflow

---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 63db8b34..e6cfc7dd 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -41,4 +41,4 @@ jobs:
     - name: Test with pytest
       run: |
         export UDUNITS2_XML_PATH=/opt/share/udunits/udunits2.xml
-        python -m pytest -v tests/test_readers.py tests/test_rules.py tests/test_show_specs.py tests/test_images.py tests/test_generic.py tests/test_utils.py tests/test_ncas_file_proc.py
+        python -m pytest -v tests/test_readers.py tests/test_rules.py tests/test_show_specs.py tests/test_images.py tests/test_generic.py tests/test_utils.py tests/test_ncas_file_proc.py tests/test_cvs.py

From b05a001ae5a7dd26a5c23f25e16f96d8da69902a Mon Sep 17 00:00:00 2001
From: Joshua Hampton <joshua.hampton@ncas.ac.uk>
Date: Fri, 26 Jan 2024 15:06:21 +0000
Subject: [PATCH 37/37] Install netcdf in github action

---
 .github/workflows/main.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index e6cfc7dd..e6e03c33 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -29,9 +29,10 @@ jobs:
         ./configure
         make all install
         ln -sf /opt/lib/* $LD_LIBRARY_PATH
-    - name: Install exiftool
+    - name: Install exiftool and netcdf
       run: |
         sudo apt install libimage-exiftool-perl -y
+        sudo apt-get install -y netcdf-bin
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip