Skip to content

Commit

Permalink
feat: add a new job management mechanism in the PushJobAgent
Browse files Browse the repository at this point in the history
  • Loading branch information
aldbr committed Apr 25, 2024
1 parent f877d14 commit a7f2833
Show file tree
Hide file tree
Showing 11 changed files with 1,098 additions and 245 deletions.
4 changes: 4 additions & 0 deletions docs/source/AdministratorGuide/Resources/supercomputers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@ To leverage the Push model, one has to add the :mod:`~DIRAC.WorkloadManagementSy
# Control the number of jobs handled on the machine
MaxJobsToSubmit = 100
Module = PushJobAgent
# SubmissionPolicy can be "Application" or "JobWrapper"
# - Application (soon deprecated): the agent will submit a workflow to a PoolCE, the workflow is responsible for interacting with the remote site
# - JobWrapper: the agent will submit a JobWrapper directly to the remote site, it is responsible of the remote execution
SubmissionPolicy = <SubmissionPolicy>
}

One has also to authorize the machine hosting the :mod:`~DIRAC.WorkloadManagementSystem.Agent.PushJobAgent` to process jobs via the ``Registry/Hosts/<Host>`` CS section::
Expand Down
2 changes: 1 addition & 1 deletion src/DIRAC/Resources/Computing/ARCComputingElement.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def _writeXRSL(self, executableFile, inputs, outputs):

def _bundlePreamble(self, executableFile):
"""Bundle the preamble with the executable file"""
wrapperContent = f"{self.preamble}\n./{executableFile}"
wrapperContent = f"{self.preamble}\n{os.path.join('.', os.path.basename(executableFile))}"

# We need to make sure the executable file can be executed by the wrapper
# By adding the execution mode to the file, the file will be processed as an "executable" in the XRSL
Expand Down
4 changes: 4 additions & 0 deletions src/DIRAC/Resources/Computing/AREXComputingElement.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,10 @@ def submitJob(self, executableFile, proxy, numberOfJobs=1, inputs=None, outputs=
f"{jobReference} to CE {self.ceHost}",
)

# Remove the bundled preamble
if self.preamble:
os.remove(executableFile)

if batchIDList:
result = S_OK(batchIDList)
result["PilotStampDict"] = stampDict
Expand Down
24 changes: 7 additions & 17 deletions src/DIRAC/Workflow/Modules/Script.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

from DIRAC import gLogger, gConfig
from DIRAC.Core.Utilities.Subprocess import systemCall
from DIRAC.WorkloadManagementSystem.Utilities.RemoteRunner import RemoteRunner
from DIRAC.Workflow.Modules.ModuleBase import ModuleBase


Expand Down Expand Up @@ -83,22 +82,13 @@ def _executeCommand(self):
"""execute the self.command (uses systemCall)"""
failed = False

# Check whether the execution should be done remotely
if gConfig.getValue("/LocalSite/RemoteExecution", False):
remoteRunner = RemoteRunner(
gConfig.getValue("/LocalSite/Site"),
gConfig.getValue("/LocalSite/GridCE"),
gConfig.getValue("/LocalSite/CEQueue"),
)
retVal = remoteRunner.execute(self.command)
else:
retVal = systemCall(
timeout=0,
cmdSeq=shlex.split(self.command),
env=self.environment,
callbackFunction=self.callbackFunction,
bufferLimit=self.bufferLimit,
)
retVal = systemCall(
timeout=0,
cmdSeq=shlex.split(self.command),
env=self.environment,
callbackFunction=self.callbackFunction,
bufferLimit=self.bufferLimit,
)

if not retVal["OK"]:
failed = True
Expand Down
Loading

0 comments on commit a7f2833

Please sign in to comment.