From fdac9cb2f893bc20d3fe7eae0cc62611ef4df767 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20M=C2=AA=20Fern=C3=A1ndez?= Date: Sat, 29 Jul 2023 20:18:00 +0200 Subject: [PATCH] cacheWorkflow was silently removing any reference to already guessed web_url --- wfexs_backend/wfexs_backend.py | 57 +++++++++++++--------------------- 1 file changed, 22 insertions(+), 35 deletions(-) diff --git a/wfexs_backend/wfexs_backend.py b/wfexs_backend/wfexs_backend.py index 7eba1470..29c710ef 100644 --- a/wfexs_backend/wfexs_backend.py +++ b/wfexs_backend/wfexs_backend.py @@ -1456,9 +1456,7 @@ def cacheWorkflow( engineDesc: "Optional[WorkflowType]" = None guessedRepo: "Optional[RemoteRepo]" = None repoDir: "Optional[AbsPath]" = None - repoURL: "Optional[RepoURL]" = None putative: "bool" = False - repoRelPath: "Optional[str]" = None if parsedRepoURL.scheme == "": if (trs_endpoint is not None) and len(trs_endpoint) > 0: i_workflow, repoDir = self.getWorkflowRepoFromTRS( @@ -1475,8 +1473,6 @@ def cacheWorkflow( else: raise WFException("trs_endpoint was not provided") else: - engineDesc = None - # Trying to be smarter guessedRepo = self.guess_repo_params(parsedRepoURL, fail_ok=True) @@ -1487,6 +1483,7 @@ def cacheWorkflow( tag=cast("RepoTag", version_id), rel_path=guessedRepo.rel_path, repo_type=guessedRepo.repo_type, + web_url=guessedRepo.web_url, ) else: ( @@ -1498,11 +1495,10 @@ def cacheWorkflow( offline=offline, ignoreCache=ignoreCache, ) - # This can be incorrect, but let it be for now - if i_workflow is not None: - self.cacheROCrateFilename = cached_putative_path - else: + + if i_workflow is None: repoDir = cached_putative_path + repoRelPath: "Optional[str]" = None if os.path.isdir(repoDir): if len(parsedRepoURL.fragment) > 0: frag_qs = urllib.parse.parse_qs(parsedRepoURL.fragment) @@ -1513,38 +1509,36 @@ def cacheWorkflow( # Let's try getting a pretty filename # when the workflow is a single file repoRelPath = metadata_array[0].preferredName + + # It can be either a relative path to a directory or to a file + # It could be even empty! + if repoRelPath == "": + repoRelPath = None + # raise WFException('Unable to guess repository from RO-Crate manifest') + guessedRepo = RemoteRepo( + repo_url=cast("RepoURL", workflow_id), + tag=cast("RepoTag", version_id), + rel_path=cast("Optional[RelPath]", repoRelPath), + ) putative = True + # This can be incorrect, but let it be for now if i_workflow is not None: guessedRepo = i_workflow.remote_repo engineDesc = i_workflow.workflow_type + self.cacheROCrateFilename = cached_putative_path - if guessedRepo is None: - # raise WFException('Unable to guess repository from RO-Crate manifest') - guessedRepo = RemoteRepo( - repo_url=cast("RepoURL", workflow_id), - tag=cast("RepoTag", version_id), - rel_path=cast("RelPath", repoRelPath), - ) - - if repoURL is None: - repoURL = guessedRepo.repo_url - repoTag = guessedRepo.tag - repoRelPath = guessedRepo.rel_path - repoType = guessedRepo.repo_type + assert guessedRepo is not None + assert guessedRepo.repo_url is not None repoEffectiveCheckout: "Optional[RepoTag]" = None # A putative workflow is one which is already materialized # but we can only guess if repoDir is None: - parsedRepoURL = urllib.parse.urlparse(repoURL) + parsedRepoURL = urllib.parse.urlparse(guessedRepo.repo_url) assert ( len(parsedRepoURL.scheme) > 0 - ), f"Repository id {repoURL} should be a parsable URI" - # It can be either a relative path to a directory or to a file - # It could be even empty! - if repoRelPath == "": - repoRelPath = None + ), f"Repository id {guessedRepo.repo_url} should be a parsable URI" repoDir, repoEffectiveCheckout = self.doMaterializeRepo( guessedRepo, @@ -1552,14 +1546,7 @@ def cacheWorkflow( registerInCache=registerInCache, ) - repo = RemoteRepo( - repo_url=repoURL, - tag=repoTag, - rel_path=repoRelPath, - repo_type=repoType, - ) - - return repoDir, repo, engineDesc, repoEffectiveCheckout + return repoDir, guessedRepo, engineDesc, repoEffectiveCheckout TRS_METADATA_FILE: "Final[RelPath]" = cast("RelPath", "trs_metadata.json") TRS_QUERY_CACHE_FILE: "Final[RelPath]" = cast("RelPath", "trs_result.json")