Skip to content

Commit

Permalink
Merge pull request #36 from cedadev/ncas-image
Browse files Browse the repository at this point in the history
Ncas image v1.0
  • Loading branch information
joshua-hampton authored Jan 15, 2024
2 parents 8658e65 + f0b4304 commit 28b32af
Show file tree
Hide file tree
Showing 31 changed files with 509 additions and 55 deletions.
18 changes: 17 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,28 @@ jobs:
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install udunits
run: |
cd /opt
curl -O https://downloads.unidata.ucar.edu/udunits/2.2.28/udunits-2.2.28.tar.gz
tar -xzvf udunits-2.2.28.tar.gz
cd udunits-2.2.28
./configure
make all install
ln -sf /opt/lib/* $LD_LIBRARY_PATH
- name: Install exiftool
run: |
sudo apt install libimage-exiftool-perl -y
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 black pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi
- name: Look for exiftool
run: |
which exiftool
- name: Test with pytest
run: |
python -m pytest -v tests/test_readers.py
export UDUNITS2_XML_PATH=/opt/share/udunits/udunits2.xml
python -m pytest -v tests/test_readers.py tests/test_images.py
48 changes: 39 additions & 9 deletions checksit/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,14 @@

from .cvs import vocabs, vocabs_prefix
from .rules import rules, rules_prefix
from .readers import pp, badc_csv, cdl, yml
from .readers import pp, badc_csv, cdl, yml, image
from .specs import SpecificationChecker
from .utils import get_file_base, extension, UNDEFINED
from .config import get_config
from .make_specs import make_amof_specs

AMOF_CONVENTIONS = ['"CF-1.6, NCAS-AMF-2.0.0"']
IMAGE_EXTENSIONS = ["png", "jpg", "jpeg"]
conf = get_config()


Expand Down Expand Up @@ -229,12 +230,16 @@ def check_file(self, file_path, template="auto", mappings=None, extra_rules=None

# tmpl = self.parse_file_header(template, auto_cache=auto_cache, verbose=verbose)

### Check for AMOF netCDF file and gather specs ###
if template == "auto" and file_path.split('.')[-1] == 'nc':
# Look for AMOF Convention string in Conventions global attr, if it exists
if ':Conventions' in file_content.cdl:
conventions = file_content.cdl.split(':Conventions =')[1].split(';')[0].strip()
if "NCAS-AMOF" in conventions or "NCAS-GENERAL" in conventions or "NCAS-AMF" in conventions:
### Check for NCAS data files and gather specs ###
# if template and specs are "default" values, check to see if
# file is an ncas file (assuming file name starts with instrument name)
if (template == "auto" and specs == None and
file_path.split("/")[-1].startswith("ncas-")):
# find appropriate specs depending on convention
if file_path.split(".")[-1] == "nc" and ":Conventions" in file_content.cdl:
conventions = file_content.cdl.split(":Conventions =")[1].split(";")[0].strip()
# NCAS-GENERAL file
if any(name in conventions for name in ["NCAS-GENERAL", "NCAS-AMF", "NCAS-AMOF"]):
if verbose:
print("\nNCAS-AMOF file detected, finding correct spec files")
print("Finding correct AMOF version...")
Expand All @@ -245,7 +250,7 @@ def check_file(self, file_path, template="auto", mappings=None, extra_rules=None
# check specs exist for that version
specs_dir = os.path.join(conf["settings"].get("specs_dir", "./specs"), f"groups/{spec_folder}")
if not os.path.exists(specs_dir):
if verbose: print(f"Specs for version {version_number} not found, attempting download...")
if verbose: print(f"Specs for version NCAS-GENERAL-{version_number} not found, attempting download...")
try:
vocabs_dir = os.path.join(conf["settings"].get("vocabs_dir", "./checksit/vocabs"), f"AMF_CVs/{version_number}")
cvs = urllib.request.urlopen(f"https://github.com/ncasuk/AMF_CVs/tree/v{version_number}/AMF_CVs")
Expand Down Expand Up @@ -280,7 +285,6 @@ def check_file(self, file_path, template="auto", mappings=None, extra_rules=None
sys.exit()
except:
raise


# get deployment mode and data product, to then get specs
deployment_mode = file_content.cdl.split(':deployment_mode =')[1].split(';')[0].strip().strip('"')
Expand All @@ -291,6 +295,30 @@ def check_file(self, file_path, template="auto", mappings=None, extra_rules=None
# don't need to do template check
template = "off"

# NCAS-RADAR (coming soon...)
# if "NCAS-Radar" in conventions

elif (file_path.split(".")[-1].lower() in IMAGE_EXTENSIONS and
"XMP-photoshop:Instructions" in file_content.global_attrs.keys()):
conventions = file_content.global_attrs["XMP-photoshop:Instructions"]
if "National Centre for Atmospheric Science Image Metadata Standard" in file_content.global_attrs["XMP-photoshop:Instructions"].replace("\n"," "):
if verbose:
print("\nNCAS-IMAGE file detected, finding correct spec files")
print("Finding correct IMAGE version...")
version_number = conventions.replace("\n"," ").split("Metadata Standard ")[1].split(":")[0]
spec_folder = f"ncas-image-{version_number}"
if verbose: print(f" {version_number}")
specs_dir = os.path.join(conf["settings"].get("specs_dir", "./specs"), f"groups/{spec_folder}")
if not os.path.exists(specs_dir):
print(f"[ERROR] specs for NCAS-IMAGE {version_number} can not be found.")
print("Aborting...")
sys.exit()
product = file_path.split('/')[-1].split('_')[3]
product_spec = f"{spec_folder}/amof-{product}"
specs = [product_spec, f"{spec_folder}/amof-image-global-attrs"]
template = "off"



if template == "off":
tmpl = template
Expand Down Expand Up @@ -404,6 +432,8 @@ def parse_file_header(self, file_path, auto_cache=False, verbose=False):
reader = badc_csv
elif ext in ("yml"):
reader = yml
elif ext.lower() in IMAGE_EXTENSIONS:
reader = image
else:
raise Exception(f"No known reader for file with extension: {ext}")

Expand Down
6 changes: 4 additions & 2 deletions checksit/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def check_global_attrs(dct, defined_attrs=None, vocab_attrs=None, regex_attrs=No
errors.append(f"[global-attributes:**************:{attr}]: No value defined for attribute '{attr}'.")
else:
errors.extend(vocabs.check(vocab_attrs[attr], dct["global_attributes"].get(attr), label=f"[global-attributes:******:{attr}]***"))

for attr in regex_attrs:
if attr not in dct['global_attributes']:
errors.append(
Expand All @@ -123,7 +123,9 @@ def check_global_attrs(dct, defined_attrs=None, vocab_attrs=None, regex_attrs=No
elif is_undefined(dct['global_attributes'].get(attr)):
errors.append(f"[global-attributes:**************:{attr}]: No value defined for attribute '{attr}'.")
else:
errors.extend(rules.check(rules_attrs[attr], dct['global_attributes'].get(attr), label=f"[global-attributes:******:{attr}]***"))
rules_check_output = rules.check(rules_attrs[attr], dct['global_attributes'].get(attr), context=dct['inpt'], label=f"[global-attributes:******:{attr}]***")
warnings.extend(rules_check_output[1])
errors.extend(rules_check_output[0])


return errors, warnings
Expand Down
7 changes: 5 additions & 2 deletions checksit/readers/cdl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import re
import yaml
import subprocess as sp
import sys

from ..cvs import vocabs, vocabs_prefix

Expand Down Expand Up @@ -40,7 +41,8 @@ def _parse(self, inpt):

for s in self.CDL_SPLITTERS:
if s not in cdl_lines:
raise Exception(f"Invalid file or CDL contents provided: '{inpt[:100]}...'")
print(f"Please check your command - invalid file or CDL contents provided: '{inpt[:100]}...'")
sys.exit(1)

sections = self._get_sections(cdl_lines, split_patterns=self.CDL_SPLITTERS, start_at=1)

Expand Down Expand Up @@ -188,7 +190,8 @@ def to_yaml(self):
def to_dict(self):
return {"dimensions": self.dimensions,
"variables": self.variables,
"global_attributes": self.global_attrs}
"global_attributes": self.global_attrs,
"inpt": self.inpt}


def read(fpath, verbose=False):
Expand Down
60 changes: 60 additions & 0 deletions checksit/readers/image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import subprocess as sp
import yaml

def get_output(cmd):
subp = sp.Popen(cmd, shell=True, stdout=sp.PIPE, stderr=sp.PIPE)
return subp.stdout.read().decode("charmap"), subp.stderr.read().decode("charmap")


class ImageParser:

def __init__(self, inpt, verbose=False):
self.inpt = inpt
self.verbose = verbose
self.base_exiftool_arguments = ["exiftool", "-G1", "-j", "-c", "%+.6f"]
self._find_exiftool()
self._parse(inpt)

def _parse(self, inpt):
if self.verbose: print(f"[INFO] Parsing input: {inpt[:100]}...")
self.global_attrs = {}
exiftool_arguments = self.base_exiftool_arguments + [inpt]
exiftool_return_string = sp.check_output(exiftool_arguments)
raw_global_attrs = yaml.load(exiftool_return_string, Loader=yaml.SafeLoader)[0]
for tag_name in raw_global_attrs.keys():
value_type = type(raw_global_attrs[tag_name])
if value_type == list:
self.global_attrs[tag_name] = str(raw_global_attrs[tag_name][0])
else:
self.global_attrs[tag_name] = str(raw_global_attrs[tag_name])

def _find_exiftool(self):
if self.verbose: print("[INFO] Searching for exiftool...")
which_output, which_error = get_output("which exiftool")
if which_error.startswith("which: no exiftool in"):
msg = (
f"'exiftool' required to read image file metadata but cannot be found.\n"
f" Visit https://exiftool.org/ for information on 'exiftool'."
)
raise RuntimeError(msg)
else:
self.exiftool_location = which_output.strip()
if self.verbose: print(f"[INFO] Found exiftool at {self.exiftool_location}.")

def _attrs_dict(self,content_lines):
attr_dict = {}
for line in content_lines:
if self.verbose: print(f"WORKING ON LINE: {line}")
key_0 = line.split("=",1)[0].strip()
key = key_0[1:] #removes first character - unwanted quotation marks
value = line.split("=",1)[1].strip()
attr_dict[key] = value
return attr_dict

def to_dict(self):
return {"global_attributes": self.global_attrs, "inpt": self.inpt}


def read(fpath, verbose=False):
return ImageParser(fpath, verbose=verbose)

Loading

0 comments on commit 28b32af

Please sign in to comment.