Skip to content

Commit

Permalink
Reworked the way singularity and podman caches their images.
Browse files Browse the repository at this point in the history
Previously, cached layers were in the shared cache.

Now, the layers reside inside each containers subdirectory from each working directory. This method could have some nasty effects in encrypted directories, so they are going to be tested later.

Also, this method makes podman working directories a bit more portable.

Last, pending work around Docker, so it behaves a bit more like podman, using the cached image as primary source (with its obvious limitations).
  • Loading branch information
jmfernandez committed Aug 12, 2023
1 parent eb6d542 commit cf3bc71
Show file tree
Hide file tree
Showing 7 changed files with 446 additions and 184 deletions.
1 change: 1 addition & 0 deletions wfexs_backend/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,7 @@ class StagedSetup(NamedTuple):
extrapolated_inputs_dir: "Optional[AbsPath]"
outputs_dir: "Optional[AbsPath]"
intermediate_dir: "Optional[AbsPath]"
containers_dir: "Optional[AbsPath]"
meta_dir: "Optional[AbsPath]"
temp_dir: "AbsPath"
secure_exec: "bool"
Expand Down
27 changes: 23 additions & 4 deletions wfexs_backend/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@

if TYPE_CHECKING:
from typing import (
Any,
Mapping,
MutableMapping,
MutableSequence,
Expand All @@ -48,7 +49,10 @@
Union,
)

from typing_extensions import Final
from typing_extensions import (
TypedDict,
Final,
)

from .common import (
AbsPath,
Expand All @@ -59,10 +63,17 @@
ContainerLocalConfig,
ContainerOperatingSystem,
ContainerTaggedName,
Fingerprint,
ProcessorArchitecture,
RelPath,
)

class DockerManifestMetadata(TypedDict):
image_signature: "Fingerprint"
manifests_signature: "Fingerprint"
manifests: "Sequence[Mapping[str, Any]]"


from . import common


Expand Down Expand Up @@ -99,6 +110,7 @@ class ContainerFactory(abc.ABC):
def __init__(
self,
cacheDir: "Optional[AnyPath]" = None,
stagedContainersDir: "Optional[AnyPath]" = None,
local_config: "Optional[ContainerLocalConfig]" = None,
engine_name: "str" = "unset",
tempDir: "Optional[AnyPath]" = None,
Expand Down Expand Up @@ -143,9 +155,14 @@ def __init__(
self.tempDir = tempDir
# But, for materialized containers, we should use common directories
# This for the containers themselves
self.containersCacheDir = os.path.join(
cacheDir, "containers", self.__class__.__name__
self.containersCacheDir = cast(
"AnyPath", os.path.join(cacheDir, "containers", self.__class__.__name__)
)
# stagedContainersDir
if stagedContainersDir is None:
stagedContainersDir = self.containersCacheDir
self.stagedContainersDir = stagedContainersDir

# This for the symlinks to the containers, following the engine convention
self.engineContainersSymlinkDir = cast(
"AbsPath", os.path.join(self.containersCacheDir, engine_name)
Expand Down Expand Up @@ -260,7 +277,7 @@ def materializeContainers(
self,
tagList: "Sequence[ContainerTaggedName]",
simpleFileNameMethod: "ContainerFileNamingMethod",
containers_dir: "Optional[Union[RelPath, AbsPath]]" = None,
containers_dir: "Optional[AnyPath]" = None,
offline: "bool" = False,
force: "bool" = False,
) -> "Sequence[Container]":
Expand All @@ -270,6 +287,8 @@ def materializeContainers(
materialized_containers: "MutableSequence[Container]" = []
not_found_containers: "MutableSequence[str]" = []

if containers_dir is None:
containers_dir = self.stagedContainersDir
for tag in tagList:
if self.AcceptsContainer(tag):
container = self.materializeSingleContainer(
Expand Down
66 changes: 49 additions & 17 deletions wfexs_backend/docker_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,14 @@ class DockerContainerFactory(AbstractDockerContainerFactory):
def __init__(
self,
cacheDir: "Optional[AnyPath]" = None,
stagedContainersDir: "Optional[AnyPath]" = None,
local_config: "Optional[ContainerLocalConfig]" = None,
engine_name: "str" = "unset",
tempDir: "Optional[AnyPath]" = None,
):
super().__init__(
cacheDir=cacheDir,
stagedContainersDir=stagedContainersDir,
local_config=local_config,
engine_name=engine_name,
tempDir=tempDir,
Expand Down Expand Up @@ -164,6 +166,39 @@ def _rmi(

return cast("ExitVal", d_retval), d_out_v, d_err_v

def _load(
self,
archivefile: "AbsPath",
dockerTag: "str",
matEnv: "Mapping[str, str]",
) -> "Tuple[ExitVal, str, str]":
with lzma.open(
archivefile, mode="rb"
) as d_in, tempfile.NamedTemporaryFile() as d_out, tempfile.NamedTemporaryFile() as d_err:
self.logger.debug(f"loading docker container {dockerTag}")
with subprocess.Popen(
[self.runtime_cmd, "load"],
env=matEnv,
stdin=d_in,
stdout=d_out,
stderr=d_err,
) as sp:
d_retval = sp.wait()

self.logger.debug(f"docker load {dockerTag} retval: {d_retval}")

with open(d_out.name, "r") as c_stF:
d_out_v = c_stF.read()

self.logger.debug(f"docker load stdout: {d_out_v}")

with open(d_err.name, "r") as c_stF:
d_err_v = c_stF.read()

self.logger.debug(f"docker load stderr: {d_err_v}")

return cast("ExitVal", d_retval), d_out_v, d_err_v

def _save(
self,
dockerTag: "str",
Expand All @@ -181,7 +216,7 @@ def _save(
stderr=d_err,
) as sp:
if sp.stdout is not None:
shutil.copyfileobj(sp.stdout, d_out)
shutil.copyfileobj(sp.stdout, d_out, 1024 * 1024)
d_retval = sp.wait()

self.logger.debug(f"docker save {dockerTag} retval: {d_retval}")
Expand Down Expand Up @@ -289,7 +324,7 @@ def materializeSingleContainer(
if force:
if offline:
raise ContainerFactoryException(
f"Banned remove podman containers in offline mode from {tag_name}"
f"Banned remove docker containers in offline mode from {tag_name}"
)

# Blindly remove
Expand All @@ -302,7 +337,7 @@ def materializeSingleContainer(
if d_retval != 0:
if offline:
raise ContainerFactoryException(
f"Banned pull podman containers in offline mode from {tag_name}"
f"Banned pull docker containers in offline mode from {tag_name}"
)

d_retval, d_out_v, d_err_v = self._pull(dockerTag, matEnv)
Expand Down Expand Up @@ -479,21 +514,18 @@ def materializeSingleContainer(
)

# Last, hardlink or copy the container and its metadata
if containers_dir is not None:
containerPath = cast(
"AbsPath", os.path.join(containers_dir, containerFilename)
)
containerPathMeta = cast(
"AbsPath", os.path.join(containers_dir, containerFilenameMeta)
)
if containers_dir is None:
containers_dir = self.stagedContainersDir
containerPath = cast("AbsPath", os.path.join(containers_dir, containerFilename))
containerPathMeta = cast(
"AbsPath", os.path.join(containers_dir, containerFilenameMeta)
)

# Do not allow overwriting in offline mode
if not offline or not os.path.exists(containerPath):
link_or_copy(localContainerPath, containerPath)
if not offline or not os.path.exists(containerPathMeta):
link_or_copy(localContainerPathMeta, containerPathMeta)
else:
containerPath = localContainerPath
# Do not allow overwriting in offline mode
if not offline or not os.path.exists(containerPath):
link_or_copy(localContainerPath, containerPath)
if not offline or not os.path.exists(containerPathMeta):
link_or_copy(localContainerPathMeta, containerPathMeta)

# Learning about the intended processor architecture and variant
architecture = manifest.get("Architecture")
Expand Down
15 changes: 14 additions & 1 deletion wfexs_backend/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def __init__(
outputMetaDir: "Optional[AnyPath]" = None,
intermediateDir: "Optional[AnyPath]" = None,
tempDir: "Optional[AnyPath]" = None,
stagedContainersDir: "Optional[AnyPath]" = None,
secure_exec: "bool" = False,
allowOther: "bool" = False,
config_directory: "Optional[AnyPath]" = None,
Expand Down Expand Up @@ -295,6 +296,16 @@ def __init__(
atexit.register(shutil.rmtree, tempDir)
self.tempDir = tempDir

# This directory will hold the staged containers to be used
if stagedContainersDir is None:
stagedContainersDir = cast(
"AbsPath", os.path.join(workDir, WORKDIR_CONTAINERS_RELDIR)
)
elif not os.path.isabs(stagedContainersDir):
stagedContainersDir = cast("AbsPath", os.path.abspath(stagedContainersDir))
os.makedirs(stagedContainersDir, exist_ok=True)
self.stagedContainersDir = cast("AbsPath", stagedContainersDir)

# Setting up common properties
self.docker_cmd = local_config.get("tools", {}).get(
"dockerCommand", DEFAULT_DOCKER_CMD
Expand Down Expand Up @@ -332,6 +343,7 @@ def __init__(
self.logger.debug(f"Container type {container_type}")
self.container_factory = containerFactory(
cacheDir=cacheDir,
stagedContainersDir=stagedContainersDir,
local_config=local_config,
engine_name=self.__class__.__name__,
tempDir=self.tempDir,
Expand Down Expand Up @@ -412,6 +424,7 @@ def FromStagedSetup(
cacheDir=cache_dir,
cacheWorkflowDir=cache_workflow_dir,
cacheWorkflowInputsDir=cache_workflow_inputs_dir,
stagedContainersDir=staged_setup.containers_dir,
local_config=local_config,
config_directory=config_directory,
)
Expand Down Expand Up @@ -532,7 +545,7 @@ def simpleContainerFileName(self, imageUrl: "URIType") -> "RelPath":
def materialize_containers(
self,
listOfContainerTags: "Sequence[ContainerTaggedName]",
containersDir: "AnyPath",
containersDir: "Optional[AnyPath]" = None,
offline: "bool" = False,
) -> "Tuple[ContainerEngineVersionStr, Sequence[Container], ContainerOperatingSystem, ProcessorArchitecture]":
return (
Expand Down
Loading

0 comments on commit cf3bc71

Please sign in to comment.