Merge pull request #36 from cedadev/ncas-image

Ncas image v1.0
cedadev · Jan 15, 2024 · 28b32af · 28b32af
2 parents 8658e65 + f0b4304
commit 28b32af
Show file tree

Hide file tree

Showing 31 changed files with 509 additions and 55 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -20,12 +20,28 @@ jobs:
       uses: actions/setup-python@v2
       with:
         python-version: ${{ matrix.python-version }}
+    - name: Install udunits
+      run: |
+        cd /opt
+        curl -O https://downloads.unidata.ucar.edu/udunits/2.2.28/udunits-2.2.28.tar.gz
+        tar -xzvf udunits-2.2.28.tar.gz
+        cd udunits-2.2.28
+        ./configure
+        make all install
+        ln -sf /opt/lib/* $LD_LIBRARY_PATH
+    - name: Install exiftool
+      run: |
+        sudo apt install libimage-exiftool-perl -y
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
         pip install flake8 black pytest
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
         if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi
+    - name: Look for exiftool
+      run: |
+        which exiftool
     - name: Test with pytest
       run: |
-        python -m pytest -v tests/test_readers.py
+        export UDUNITS2_XML_PATH=/opt/share/udunits/udunits2.xml
+        python -m pytest -v tests/test_readers.py tests/test_images.py
diff --git a/checksit/check.py b/checksit/check.py
@@ -10,13 +10,14 @@
 
 from .cvs import vocabs, vocabs_prefix
 from .rules import rules, rules_prefix
-from .readers import pp, badc_csv, cdl, yml
+from .readers import pp, badc_csv, cdl, yml, image
 from .specs import SpecificationChecker
 from .utils import get_file_base, extension, UNDEFINED
 from .config import get_config
 from .make_specs import make_amof_specs
 
 AMOF_CONVENTIONS = ['"CF-1.6, NCAS-AMF-2.0.0"']
+IMAGE_EXTENSIONS = ["png", "jpg", "jpeg"]
 conf = get_config()
 
 
@@ -229,12 +230,16 @@ def check_file(self, file_path, template="auto", mappings=None, extra_rules=None
 
         # tmpl = self.parse_file_header(template, auto_cache=auto_cache, verbose=verbose)
 
-        ### Check for AMOF netCDF file and gather specs ###
-        if template == "auto" and file_path.split('.')[-1] == 'nc':
-            # Look for AMOF Convention string in Conventions global attr, if it exists
-            if ':Conventions' in file_content.cdl:
-                conventions = file_content.cdl.split(':Conventions =')[1].split(';')[0].strip()
-                if "NCAS-AMOF" in conventions or "NCAS-GENERAL" in conventions or "NCAS-AMF" in conventions:
+        ### Check for NCAS data files and gather specs ###
+        # if template and specs are "default" values, check to see if
+        # file is an ncas file (assuming file name starts with instrument name)
+        if (template == "auto" and specs == None and
+            file_path.split("/")[-1].startswith("ncas-")):
+            # find appropriate specs depending on convention
+            if file_path.split(".")[-1] == "nc" and ":Conventions" in file_content.cdl:
+                conventions = file_content.cdl.split(":Conventions =")[1].split(";")[0].strip()
+                # NCAS-GENERAL file
+                if any(name in conventions for name in ["NCAS-GENERAL", "NCAS-AMF", "NCAS-AMOF"]):
                     if verbose:
                         print("\nNCAS-AMOF file detected, finding correct spec files")
                         print("Finding correct AMOF version...")
@@ -245,7 +250,7 @@ def check_file(self, file_path, template="auto", mappings=None, extra_rules=None
                     # check specs exist for that version
                     specs_dir = os.path.join(conf["settings"].get("specs_dir", "./specs"), f"groups/{spec_folder}")
                     if not os.path.exists(specs_dir):
-                        if verbose: print(f"Specs for version {version_number} not found, attempting download...")
+                        if verbose: print(f"Specs for version NCAS-GENERAL-{version_number} not found, attempting download...")
                         try:
                             vocabs_dir = os.path.join(conf["settings"].get("vocabs_dir", "./checksit/vocabs"), f"AMF_CVs/{version_number}")
                             cvs = urllib.request.urlopen(f"https://github.com/ncasuk/AMF_CVs/tree/v{version_number}/AMF_CVs")
@@ -280,7 +285,6 @@ def check_file(self, file_path, template="auto", mappings=None, extra_rules=None
                             sys.exit()
                         except:
                             raise
-
 
                     # get deployment mode and data product, to then get specs
                     deployment_mode = file_content.cdl.split(':deployment_mode =')[1].split(';')[0].strip().strip('"')
@@ -291,6 +295,30 @@ def check_file(self, file_path, template="auto", mappings=None, extra_rules=None
                     # don't need to do template check
                     template = "off"
 
+                # NCAS-RADAR (coming soon...)
+                # if "NCAS-Radar" in conventions
+
+            elif (file_path.split(".")[-1].lower() in IMAGE_EXTENSIONS and
+                  "XMP-photoshop:Instructions" in file_content.global_attrs.keys()):
+                conventions = file_content.global_attrs["XMP-photoshop:Instructions"]
+                if "National Centre for Atmospheric Science Image Metadata Standard" in file_content.global_attrs["XMP-photoshop:Instructions"].replace("\n"," "):
+                    if verbose:
+                        print("\nNCAS-IMAGE file detected, finding correct spec files")
+                        print("Finding correct IMAGE version...")
+                    version_number = conventions.replace("\n"," ").split("Metadata Standard ")[1].split(":")[0]
+                    spec_folder = f"ncas-image-{version_number}"
+                    if verbose: print(f"  {version_number}")
+                    specs_dir = os.path.join(conf["settings"].get("specs_dir", "./specs"), f"groups/{spec_folder}")
+                    if not os.path.exists(specs_dir):
+                        print(f"[ERROR] specs for NCAS-IMAGE {version_number} can not be found.")
+                        print("Aborting...")
+                        sys.exit()
+                    product = file_path.split('/')[-1].split('_')[3]
+                    product_spec = f"{spec_folder}/amof-{product}"
+                    specs = [product_spec, f"{spec_folder}/amof-image-global-attrs"]
+                    template = "off"
+
+
 
         if template == "off":
             tmpl = template
@@ -404,6 +432,8 @@ def parse_file_header(self, file_path, auto_cache=False, verbose=False):
             reader = badc_csv
         elif ext in ("yml"):
             reader = yml
+        elif ext.lower() in IMAGE_EXTENSIONS:
+            reader = image   
         else:
             raise Exception(f"No known reader for file with extension: {ext}")
 

diff --git a/checksit/generic.py b/checksit/generic.py
@@ -99,7 +99,7 @@ def check_global_attrs(dct, defined_attrs=None, vocab_attrs=None, regex_attrs=No
             errors.append(f"[global-attributes:**************:{attr}]: No value defined for attribute '{attr}'.")
         else:
             errors.extend(vocabs.check(vocab_attrs[attr], dct["global_attributes"].get(attr), label=f"[global-attributes:******:{attr}]***"))
-    
+
     for attr in regex_attrs:
         if attr not in dct['global_attributes']:
             errors.append(
@@ -123,7 +123,9 @@ def check_global_attrs(dct, defined_attrs=None, vocab_attrs=None, regex_attrs=No
         elif is_undefined(dct['global_attributes'].get(attr)):
             errors.append(f"[global-attributes:**************:{attr}]: No value defined for attribute '{attr}'.")
         else:
-            errors.extend(rules.check(rules_attrs[attr], dct['global_attributes'].get(attr), label=f"[global-attributes:******:{attr}]***"))
+            rules_check_output = rules.check(rules_attrs[attr], dct['global_attributes'].get(attr), context=dct['inpt'], label=f"[global-attributes:******:{attr}]***")
+            warnings.extend(rules_check_output[1])
+            errors.extend(rules_check_output[0])
 
 
     return errors, warnings

diff --git a/checksit/readers/cdl.py b/checksit/readers/cdl.py
@@ -2,6 +2,7 @@
 import re
 import yaml
 import subprocess as sp
+import sys
 
 from ..cvs import vocabs, vocabs_prefix
 
@@ -40,7 +41,8 @@ def _parse(self, inpt):
 
         for s in self.CDL_SPLITTERS:
             if s not in cdl_lines:
-                raise Exception(f"Invalid file or CDL contents provided: '{inpt[:100]}...'")
+                print(f"Please check your command - invalid file or CDL contents provided: '{inpt[:100]}...'")
+                sys.exit(1)
 
         sections = self._get_sections(cdl_lines, split_patterns=self.CDL_SPLITTERS, start_at=1)
 
@@ -188,7 +190,8 @@ def to_yaml(self):
     def to_dict(self):
         return {"dimensions": self.dimensions,
                 "variables": self.variables,
-                "global_attributes": self.global_attrs}
+                "global_attributes": self.global_attrs,
+                "inpt": self.inpt}
 
 
 def read(fpath, verbose=False):

diff --git a/checksit/readers/image.py b/checksit/readers/image.py
@@ -0,0 +1,60 @@
+import subprocess as sp
+import yaml
+
+def get_output(cmd):
+    subp = sp.Popen(cmd, shell=True, stdout=sp.PIPE, stderr=sp.PIPE)
+    return subp.stdout.read().decode("charmap"), subp.stderr.read().decode("charmap")
+
+
+class ImageParser:
+
+    def __init__(self, inpt, verbose=False):
+        self.inpt = inpt
+        self.verbose = verbose
+        self.base_exiftool_arguments = ["exiftool", "-G1", "-j", "-c", "%+.6f"]
+        self._find_exiftool()
+        self._parse(inpt)
+
+    def _parse(self, inpt):
+        if self.verbose: print(f"[INFO] Parsing input: {inpt[:100]}...")
+        self.global_attrs = {}
+        exiftool_arguments = self.base_exiftool_arguments + [inpt]
+        exiftool_return_string = sp.check_output(exiftool_arguments)
+        raw_global_attrs = yaml.load(exiftool_return_string, Loader=yaml.SafeLoader)[0]
+        for tag_name in raw_global_attrs.keys():
+                value_type = type(raw_global_attrs[tag_name])
+                if value_type == list:
+                    self.global_attrs[tag_name] = str(raw_global_attrs[tag_name][0])
+                else:
+                    self.global_attrs[tag_name] = str(raw_global_attrs[tag_name])
+
+    def _find_exiftool(self):
+        if self.verbose: print("[INFO] Searching for exiftool...")
+        which_output, which_error = get_output("which exiftool")
+        if which_error.startswith("which: no exiftool in"):
+            msg = (
+                f"'exiftool' required to read image file metadata but cannot be found.\n"
+                f"              Visit https://exiftool.org/ for information on 'exiftool'."
+            )
+            raise RuntimeError(msg)
+        else:
+            self.exiftool_location = which_output.strip()
+            if self.verbose: print(f"[INFO] Found exiftool at {self.exiftool_location}.")
+
+    def _attrs_dict(self,content_lines):
+        attr_dict = {}
+        for line in content_lines:
+            if self.verbose: print(f"WORKING ON LINE: {line}")
+            key_0 = line.split("=",1)[0].strip()
+            key = key_0[1:]    #removes first character - unwanted quotation marks
+            value = line.split("=",1)[1].strip()
+            attr_dict[key] = value
+        return attr_dict
+
+    def to_dict(self):
+        return {"global_attributes": self.global_attrs, "inpt": self.inpt}
+
+
+def read(fpath, verbose=False):
+    return ImageParser(fpath, verbose=verbose)
+