Skip to content

Commit

Permalink
fix: sadly, remove MJF
Browse files Browse the repository at this point in the history
  • Loading branch information
fstagni committed Oct 11, 2023
1 parent 9d357f3 commit 1968c85
Show file tree
Hide file tree
Showing 8 changed files with 11 additions and 451 deletions.
198 changes: 0 additions & 198 deletions src/DIRAC/Core/Utilities/MJF.py

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,6 @@ def __getBatchSystemPlugin(self):
name = batchSystem
break

if name is None and "MACHINEFEATURES" in os.environ and "JOBFEATURES" in os.environ:
# Only use MJF if legacy batch system information not available for now
name = "MJF"

if name is None:
self.log.warn(f"Batch system type for site {DIRAC.siteName()} is not currently supported")
return S_ERROR("Current batch system is not supported")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@
" 00:00:60.00 6267 40713 25469,14249 12/31-20:52:00 -"
)

MJF_OUT = "0"

SLURM_OUT_0 = "12345,86400,24,3600,03:00:00"
SLURM_OUT_1 = "12345,86400,24,3600,4-03:00:00"
Expand All @@ -72,7 +71,6 @@
[
("LSF", {}, LSF_OUT, 0.0),
("LSF", {"bin": "/usr/bin", "hostNorm": 10.0}, LSF_OUT, 0.0),
("MJF", {}, MJF_OUT, 0.0),
("SGE", {}, SGE_OUT, 300.0),
("SLURM", {}, SLURM_OUT_0, 432000.0),
("SLURM", {}, SLURM_OUT_1, 432000.0),
Expand Down
31 changes: 0 additions & 31 deletions src/DIRAC/WorkloadManagementSystem/JobWrapper/Watchdog.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
from DIRAC import S_ERROR, S_OK, gLogger
from DIRAC.ConfigurationSystem.Client.Config import gConfig
from DIRAC.ConfigurationSystem.Client.PathFinder import getSystemInstance
from DIRAC.Core.Utilities import MJF
from DIRAC.Core.Utilities.Os import getDiskSpace
from DIRAC.Core.Utilities.Profiler import Profiler
from DIRAC.Resources.Computing.BatchSystems.TimeLeft.TimeLeft import TimeLeft
Expand Down Expand Up @@ -217,7 +216,6 @@ def execute(self):
and (time.time() - self.initialValues["StartTime"]) > self.wallClockCheckSeconds * self.wallClockCheckCount
):
self.wallClockCheckCount += 1
self._performWallClockChecks()

if self.littleTimeLeft:
# if we have gone over enough iterations query again
Expand All @@ -242,35 +240,6 @@ def execute(self):
# self.log.debug('Application thread is alive: checking count is %s' %(self.checkCount))
return S_OK()

#############################################################################
def _performWallClockChecks(self):
"""Watchdog performs the wall clock checks based on MJF. Signals are sent
to processes if we need to stop, but function always returns S_OK()
"""
mjf = MJF.MJF()

try:
wallClockSecondsLeft = mjf.getWallClockSecondsLeft()
except Exception:
# Just stop if we can't get the wall clock seconds left
return S_OK()

jobstartSeconds = mjf.getIntJobFeature("jobstart_secs")
if jobstartSeconds is None:
# Just stop if we don't know when the job started
return S_OK()

if (int(time.time()) > jobstartSeconds + self.stopSigStartSeconds) and (
wallClockSecondsLeft < self.stopSigFinishSeconds + self.wallClockCheckSeconds
):
# Need to send the signal! Assume it works to avoid sending the signal more than once
self.log.info("Sending signal to JobWrapper children", f"({self.stopSigNumber})")
self.stopSigSent = True

kill_proc_tree(self.wrapperPID, includeParent=False)

return S_OK()

#############################################################################
def _performChecks(self):
"""The Watchdog checks are performed at a different period to the checking of the
Expand Down
Loading

0 comments on commit 1968c85

Please sign in to comment.