From d3ee48267fdf05cbdfefe06d799f5b1c36b0552a Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Mon, 24 Jun 2024 15:25:08 -0700 Subject: [PATCH 1/5] Simplify the version number to version underscored string logic No need to use a regex and store the value since it's used twice. --- python/eups/stack/ProductStack.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/eups/stack/ProductStack.py b/python/eups/stack/ProductStack.py index 0896a5e2..32389fae 100644 --- a/python/eups/stack/ProductStack.py +++ b/python/eups/stack/ProductStack.py @@ -18,7 +18,7 @@ # considered a global tag. userPrefix = "user:" -dotre = re.compile(r'\.') +persistVersionNameNoDot = persistVersionName.replace(".", "_") who = utils.getUserName() class ProductStack: @@ -47,13 +47,13 @@ class ProductStack: persistVersion = persistVersionName # static variable: name of file extension to use to persist data - persistFileExt = "pickleDB%s" % dotre.sub('_', persistVersionName) + persistFileExt = f"pickleDB{persistVersionNameNoDot}" # static variable: regexp for cache file names - persistFileRe = re.compile(r'^(\w\S*)\.%s$' % persistFileExt) + persistFileRe = re.compile(rf'^(\w\S*)\.{persistFileExt}$') # static variable: name of file extension to use to persist data - userTagFileExt = "pickleTag%s" % dotre.sub('_', persistVersionName) + userTagFileExt = f"pickleTag{persistVersionNameNoDot}" def __init__(self, dbpath, persistDir=None, autosave=True): """ From dc7e472b5ad645ecb65dbe920c5cd958c1ef75d3 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Mon, 24 Jun 2024 15:29:57 -0700 Subject: [PATCH 2/5] Rewrite AtomicFile to be a context manager Allows it to easily be used in a with block. --- python/eups/app.py | 6 +-- python/eups/stack/ProductStack.py | 7 ++- python/eups/utils.py | 80 ++++++++++++++++--------------- 3 files changed, 46 insertions(+), 47 deletions(-) diff --git a/python/eups/app.py b/python/eups/app.py index cae3fdb0..ee4e59fd 100644 --- a/python/eups/app.py +++ b/python/eups/app.py @@ -338,9 +338,8 @@ def printUses(outstrm, productName, versionName=None, eupsenv=None, else: usesInfo = eupsenv.uses() if pickleFile: - fd = utils.AtomicFile(pickleFile, "wb") - pickle.dump(usesInfo, fd, protocol=2) - fd.close() + with utils.AtomicFile(pickleFile, "wb") as fd: + pickle.dump(usesInfo, fd, protocol=2) userList = eupsenv.uses(productName, versionName, depth, usesInfo=usesInfo) @@ -967,4 +966,3 @@ def enableLocking(enableLocking=True): """Enable or disable the use of lock files""" from . import lock lock.disableLocking = not enableLocking - diff --git a/python/eups/stack/ProductStack.py b/python/eups/stack/ProductStack.py index 32389fae..29512778 100644 --- a/python/eups/stack/ProductStack.py +++ b/python/eups/stack/ProductStack.py @@ -300,9 +300,9 @@ def persist(self, flavor, file=None): self.lookup[flavor] = {} flavorData = self.lookup[flavor] - fd = utils.AtomicFile(file, "wb") - pickle.dump(flavorData, fd, protocol=2) - fd.close() + with utils.AtomicFile(file, "wb") as fd: + pickle.dump(flavorData, fd, protocol=2) + # This could fail if another process deleted the file immediately. self.modtimes[file] = os.stat(file).st_mtime def export(self): @@ -886,4 +886,3 @@ def __init__(self, files=None, flavors=None, maxsave=None, msg=None): self.files = files self.flavors = flavors self.maxsave = maxsave - diff --git a/python/eups/utils.py b/python/eups/utils.py index 89f68a01..d82353d5 100644 --- a/python/eups/utils.py +++ b/python/eups/utils.py @@ -1,6 +1,7 @@ """ Utility functions used across EUPS classes. """ +import contextlib import time import os import sys @@ -883,54 +884,55 @@ def cmp_prods_and_none(a, b): raise RuntimeError("A cyclic dependency exists amongst %s" % " ".join(sorted([name([x for x in p]) for p in graph.keys()]))) -class AtomicFile: - """ - A file to which all the changes (writes) are committed all at once, - or not at all. Useful for avoiding race conditions where a reader - may be trying to read from a file that's still being written to. - This is accomplished by creating a temporary file into which all the - writes are directed, and then renaming it to the destination - filename on close(). On POSIX-compliant filesystems, the rename is - guaranteed to be atomic. +@contextlib.contextmanager +def AtomicFile(fn: str, mode: str): + """Guarantee atomic writes to a file. + + Parameters + ---------- + fn : `str` + Name of file to write. + mode : `str` + Write mode. + + Returns + ------- + fd : `typing.IO` + File handle to use for writing. + + Notes + ----- + A file to which all the changes (writes) are committed all at once, + or not at all. Useful for avoiding race conditions where a reader + may be trying to read from a file that's still being written to. + + This is accomplished by creating a temporary file into which all the + writes are directed, and then renaming it to the destination + filename on close(). On POSIX-compliant filesystems, the rename is + guaranteed to be atomic. - Presents a file-like object interface. + Should be used as a context manager. - Constructor arguments: - fn: filename (string) - mode: the read/write mode (string), must be equal to - "w" or "wb", for now + .. code-block:: python - Return value: - file object + with AtomicFile("myfile.txt", "w") as fd: + print("Some text", file=fd) """ - def __init__(self, fn, mode): - assert(mode in ["w", "wb"]) # no other modes are currently implemented + dir = os.path.dirname(fn) - self._fn = fn - dir = os.path.dirname(fn) + fh, tmpfn = tempfile.mkstemp(suffix='.tmp', dir=dir) + fp = os.fdopen(fh, mode) - (self._fh, self._tmpfn) = tempfile.mkstemp(suffix='.tmp', dir=dir) - self._fp = os.fdopen(self._fh, mode) + yield fp - def __getattr__(self, name): - try: - return object.__getattr__(self, name) - except AttributeError: - return getattr(self._fp, name) + # Needed because fclose() doesn't guarantee fsync() + # in POSIX, which may lead to interesting issues (e.g., see + # http://thunk.org/tytso/blog/2009/03/12/delayed-allocation-and-the-zero-length-file-problem/ ) + os.fsync(fh) + fp.close() + os.rename(tmpfn, fn) - def __setattr__(self, name, value): - if name.startswith('_'): - return object.__setattr__(self, name, value) - else: - return setattr(self._fp, name, value) - - def close(self): - os.fsync(self._fh) # Needed because fclose() doesn't guarantee fsync() - # in POSIX, which may lead to interesting issues (e.g., see - # http://thunk.org/tytso/blog/2009/03/12/delayed-allocation-and-the-zero-length-file-problem/ ) - self._fp.close() - os.rename(self._tmpfn, self._fn) def isSubpath(path, root): """!Return True if path is root or in root From 07da1c7085ea917939563b3c733e33fc7b8426ed Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Mon, 24 Jun 2024 15:30:25 -0700 Subject: [PATCH 3/5] Use with block for opening the file --- python/eups/stack/ProductStack.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/eups/stack/ProductStack.py b/python/eups/stack/ProductStack.py index 29512778..f032f32a 100644 --- a/python/eups/stack/ProductStack.py +++ b/python/eups/stack/ProductStack.py @@ -690,9 +690,8 @@ def reload(self, flavors=None, persistDir=None, verbose=0): for flavor in flavors: fileName = self._persistPath(flavor,persistDir) self.modtimes[fileName] = os.stat(fileName).st_mtime - fd = open(fileName, "rb") - lookup = pickle.load(fd) - fd.close() + with open(fileName, "rb") as fd: + lookup = pickle.load(fd) self.lookup[flavor] = lookup From d832a927649dd55f16b787f9d7b9a33fd1945391 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Mon, 24 Jun 2024 15:30:46 -0700 Subject: [PATCH 4/5] Allow for the scenario where the pickle db file has been removed Some other EUPS process could remove the db file that was created by this process. --- python/eups/stack/ProductStack.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/python/eups/stack/ProductStack.py b/python/eups/stack/ProductStack.py index f032f32a..44cd66ed 100644 --- a/python/eups/stack/ProductStack.py +++ b/python/eups/stack/ProductStack.py @@ -252,8 +252,15 @@ def save(self, flavors=None, dir=None): raise CacheOutOfSync(outofsync) def _cacheFileIsInSync(self, file): - return (file not in self.modtimes or - os.stat(file).st_mtime <= self.modtimes[file]) + if file not in self.modtimes: + return True + try: + older = os.stat(file).st_mtime <= self.modtimes[file] + except FileNotFoundError: + # File must have been deleted by other eups process. + del self.modtimes[file] + return True + return older def cacheIsInSync(self, flavors=None): """ From 9b91d84f66a922da249e178792d21997863cd2fe Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Tue, 25 Jun 2024 08:56:11 -0700 Subject: [PATCH 5/5] Add two more try/except blocks around cache file access --- python/eups/stack/ProductStack.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/python/eups/stack/ProductStack.py b/python/eups/stack/ProductStack.py index 44cd66ed..432e46c6 100644 --- a/python/eups/stack/ProductStack.py +++ b/python/eups/stack/ProductStack.py @@ -634,7 +634,10 @@ def cacheIsUpToDate(self, flavor, cacheDir=None): return False # get the modification time of the cache file - cache_mtime = os.stat(cache).st_mtime + try: + cache_mtime = os.stat(cache).st_mtime + except FileNotFoundError: + return False # check for user tag updates if cacheDir != self.dbpath and \ @@ -667,7 +670,11 @@ def clearCache(self, flavors=None, cachedir=None, verbose=0): if os.path.exists(fileName): if verbose > 0: print("Deleting %s" % (fileName), file=sys.stderr) - os.remove(fileName) + try: + os.remove(fileName) + except FileNotFoundError: + # Some other process deleted the file. + pass def reload(self, flavors=None, persistDir=None, verbose=0): """