From 48758fb4da1f6516b4216e7054ee49188a8e23c6 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Wed, 31 May 2023 12:49:40 +0200 Subject: [PATCH 01/78] fix: only skip cores when the number is strictly under the requirement. Let it pass when they match. --- cuebot/src/main/java/com/imageworks/spcue/SortableShow.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/SortableShow.java b/cuebot/src/main/java/com/imageworks/spcue/SortableShow.java index f13fbaae2..0b81a0791 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/SortableShow.java +++ b/cuebot/src/main/java/com/imageworks/spcue/SortableShow.java @@ -54,8 +54,8 @@ public boolean isSkipped(String tags, long cores, long memory) { try { if (failed.containsKey(tags)) { long [] mark = failed.get(tags); - if (cores <= mark[0]) { - logger.info("skipped due to not enough cores " + cores + " <= " + mark[0]); + if (cores < mark[0]) { + logger.info("skipped due to not enough cores " + cores + " < " + mark[0]); return true; } else if (memory <= mark[1]) { From 8f802404b2df5d29feec1cb5609886cf3b47cb3b Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Wed, 31 May 2023 15:50:20 +0200 Subject: [PATCH 02/78] fix: only skip memory when the amount is strictly under the requirement. Let it pass when it matches. --- cuebot/src/main/java/com/imageworks/spcue/SortableShow.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/SortableShow.java b/cuebot/src/main/java/com/imageworks/spcue/SortableShow.java index 0b81a0791..83798f079 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/SortableShow.java +++ b/cuebot/src/main/java/com/imageworks/spcue/SortableShow.java @@ -58,8 +58,8 @@ public boolean isSkipped(String tags, long cores, long memory) { logger.info("skipped due to not enough cores " + cores + " < " + mark[0]); return true; } - else if (memory <= mark[1]) { - logger.info("skipped due to not enough memory " + memory + " <= " + mark[1]); + else if (memory < mark[1]) { + logger.info("skipped due to not enough memory " + memory + " < " + mark[1]); return true; } } From 7a3917de6c31472277f127ad1da2f783282ba5ec Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Wed, 31 May 2023 17:36:43 +0200 Subject: [PATCH 03/78] feat: handle negative cores request --- .../main/java/com/imageworks/spcue/DispatchHost.java | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java index 495d0a9b1..35f87e8e8 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java +++ b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java @@ -24,9 +24,14 @@ import com.imageworks.spcue.grpc.host.LockState; import com.imageworks.spcue.util.CueUtil; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.LogManager; + public class DispatchHost extends Entity implements HostInterface, FacilityInterface, ResourceContainer { + private static final Logger logger = LogManager.getLogger(DispatchHost.class); + public String facilityId; public String allocationId; public LockState lockState; @@ -78,7 +83,12 @@ public String getFacilityId() { @Override public boolean hasAdditionalResources(int minCores, long minMemory, int minGpus, long minGpuMemory) { - + if (minCores <= 0) { + int requestedCores = Math.max(idleCores + minCores, 1); + logger.info("Requested core number is " + minCores + " <= 0, + matching up to max number with difference " + idleCores + " > " + requestedCores); + minCores = requestedCores + } if (idleCores < minCores) { return false; } From da57b2e8af40da140ca3578962ff1b01ad21eb14 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Wed, 31 May 2023 17:37:51 +0200 Subject: [PATCH 04/78] fix: missing semicolon --- cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java index 35f87e8e8..b66d0aa21 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java +++ b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java @@ -87,7 +87,7 @@ public boolean hasAdditionalResources(int minCores, long minMemory, int minGpus, int requestedCores = Math.max(idleCores + minCores, 1); logger.info("Requested core number is " + minCores + " <= 0, matching up to max number with difference " + idleCores + " > " + requestedCores); - minCores = requestedCores + minCores = requestedCores; } if (idleCores < minCores) { return false; From 78fb71de36e34a6b477697bcd120cc6dbc3dce66 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Wed, 31 May 2023 17:47:38 +0200 Subject: [PATCH 05/78] fix: error unclosed string literal --- cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java index b66d0aa21..bca81f35b 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java +++ b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java @@ -85,8 +85,8 @@ public String getFacilityId() { public boolean hasAdditionalResources(int minCores, long minMemory, int minGpus, long minGpuMemory) { if (minCores <= 0) { int requestedCores = Math.max(idleCores + minCores, 1); - logger.info("Requested core number is " + minCores + " <= 0, - matching up to max number with difference " + idleCores + " > " + requestedCores); + logger.info("Requested core number is " + minCores + " <= 0, " + + "matching up to max number with difference " + idleCores + " > " + requestedCores); minCores = requestedCores; } if (idleCores < minCores) { From fd4a040fc779e50e00afb44747127e2f94d3de3f Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Wed, 31 May 2023 18:26:56 +0200 Subject: [PATCH 06/78] feat: handle negative frame cores requirement in a public function, to be used during dispatch --- .../java/com/imageworks/spcue/DispatchHost.java | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java index bca81f35b..4cf07e2bc 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java +++ b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java @@ -81,14 +81,19 @@ public String getFacilityId() { return facilityId; } + public int handleNegativeCoresRequirement(int cores) { + if (cores > 0) { + return cores; + } + int requestedCores = Math.max(idleCores + cores, 1); + logger.debug("Requested core number is " + cores + " <= 0, " + + "matching up to max number with difference " + idleCores + " > " + requestedCores); + return requestedCores; + } + @Override public boolean hasAdditionalResources(int minCores, long minMemory, int minGpus, long minGpuMemory) { - if (minCores <= 0) { - int requestedCores = Math.max(idleCores + minCores, 1); - logger.info("Requested core number is " + minCores + " <= 0, " + - "matching up to max number with difference " + idleCores + " > " + requestedCores); - minCores = requestedCores; - } + minCores = handleNegativeCoresRequirement(minCores); if (idleCores < minCores) { return false; } From 263b2485366511971adb33208fee1e0483a743f4 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Wed, 31 May 2023 18:28:56 +0200 Subject: [PATCH 07/78] fix: handle negative cores requirement during dispatch --- .../com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java index a9a8b918a..a89f36e36 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java @@ -262,7 +262,7 @@ public List dispatchHost(DispatchHost host, JobInterface job) { VirtualProc proc = VirtualProc.build(host, frame); - if (host.idleCores < frame.minCores || + if (host.idleCores < host.handleNegativeCoresRequirement(frame.minCores) || host.idleMemory < frame.minMemory || host.idleGpus < frame.minGpus || host.idleGpuMemory < frame.minGpuMemory) { From 00f911185cf390ef44eebe781bb4f232ea27d5ba Mon Sep 17 00:00:00 2001 From: Kern Attila Germain <5556461+KernAttila@users.noreply.github.com> Date: Wed, 31 May 2023 18:31:13 +0200 Subject: [PATCH 08/78] fix: handle negative cores requirement during reservation --- cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java index a9032eb43..daaf55111 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java @@ -94,7 +94,7 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { proc.unbooked = false; proc.isLocalDispatch = host.isLocalDispatch; - proc.coresReserved = frame.minCores; + proc.coresReserved = host.handleNegativeCoresRequirement(frame.minCores); proc.memoryReserved = frame.minMemory; proc.gpusReserved = frame.minGpus; proc.gpuMemoryReserved = frame.minGpuMemory; From 5c9dcf4cb9c1aac9a8913c3987d9820d7d0a2729 Mon Sep 17 00:00:00 2001 From: Kern Attila Germain <5556461+KernAttila@users.noreply.github.com> Date: Wed, 31 May 2023 18:34:37 +0200 Subject: [PATCH 09/78] fix: handle negative cores requirement during report --- .../com/imageworks/spcue/dispatcher/HostReportHandler.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java index d763cce53..7d6a705a6 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java @@ -220,10 +220,11 @@ public void handleHostReport(HostReport report, boolean isBoot) { bookingManager.removeInactiveLocalHostAssignment(lca); } } - - if (host.idleCores < Dispatcher.CORE_POINTS_RESERVED_MIN) { + // TODO: handle negative + int cores_to_reserve = host.handleNegativeCoresRequirement(Dispatcher.CORE_POINTS_RESERVED_MIN); + if (host.idleCores < cores_to_reserve) { msg = String.format("%s doesn't have enough idle cores, %d needs %d", - host.name, host.idleCores, Dispatcher.CORE_POINTS_RESERVED_MIN); + host.name, host.idleCores, cores_to_reserve); } else if (host.idleMemory < Dispatcher.MEM_RESERVED_MIN) { msg = String.format("%s doesn't have enough idle memory, %d needs %d", From fd1720672f0ad7ea4c48e11d060d5bea66e4b1f9 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Wed, 31 May 2023 21:38:58 +0200 Subject: [PATCH 10/78] feat: handle negative frame cores requirement in a public function, to be used during dispatch (LocalHost) --- .../imageworks/spcue/LocalHostAssignment.java | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java b/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java index 3e073fa73..80f08264c 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java +++ b/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java @@ -22,6 +22,9 @@ import com.imageworks.spcue.dispatcher.ResourceContainer; import com.imageworks.spcue.grpc.renderpartition.RenderPartitionType; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.LogManager; + /** * Contains information about local desktop cores a user has * assigned to the given job. @@ -33,6 +36,8 @@ public class LocalHostAssignment extends Entity implements ResourceContainer { + private static final Logger logger = LogManager.getLogger(DispatchHost.class); + private int idleCoreUnits; private long idleMemory; private int idleGpuUnits; @@ -62,9 +67,20 @@ public LocalHostAssignment(int maxCores, int threads, long maxMemory, int maxGpu this.maxGpuMemory = maxGpuMemory; } + public int handleNegativeCoresRequirement(int cores) { + if (cores > 0) { + return cores; + } + int requestedCores = Math.max(idleCoreUnits + cores, 1); + logger.debug("LocalHostAssignment"); + logger.debug("Requested core number is " + cores + " <= 0, " + + "matching up to max number with difference " + idleCoreUnits + " > " + requestedCores); + return requestedCores; + } + @Override public boolean hasAdditionalResources(int minCores, long minMemory, int minGpus, long minGpuMemory) { - + minCores = handleNegativeCoresRequirement(minCores); if (idleCoreUnits < minCores) { return false; } From 627274a45253231ab16ed24642c529d64da7797f Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Wed, 31 May 2023 23:09:41 +0200 Subject: [PATCH 11/78] dev: add debug to find out how cores are reserved with negative value. To be removed --- .../java/com/imageworks/spcue/VirtualProc.java | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java index daaf55111..ee92204c3 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java @@ -22,7 +22,11 @@ import com.imageworks.spcue.dispatcher.Dispatcher; import com.imageworks.spcue.grpc.host.ThreadMode; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.LogManager; + public class VirtualProc extends FrameEntity implements ProcInterface { + private static final Logger logger = LogManager.getLogger(VirtualProc.class); public String hostId; public String allocationId; @@ -108,11 +112,12 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { */ if (host.strandedCores > 0) { + logger.debug("host.strandedCores > 0 : " + host.strandedCores); proc.coresReserved = proc.coresReserved + host.strandedCores; } if (proc.coresReserved >= 100) { - + logger.debug("proc.coresReserved >= 100 : " + proc.coresReserved); int originalCores = proc.coresReserved; /* @@ -129,18 +134,22 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { // if (host.threadMode == ThreadMode.Variable.value() && // CueUtil.isDayTime()) { if (host.threadMode == ThreadMode.ALL_VALUE) { + logger.debug("host.threadMode == ThreadMode.ALL_VALUE"); proc.coresReserved = wholeCores * 100; } else { if (frame.threadable) { if (host.idleMemory - frame.minMemory <= Dispatcher.MEM_STRANDED_THRESHHOLD) { + logger.debug("host.idleMemory - frame.minMemory <= Dispatcher.MEM_STRANDED_THRESHHOLD"); proc.coresReserved = wholeCores * 100; } else { + logger.debug("!!!host.idleMemory - frame.minMemory <= Dispatcher.MEM_STRANDED_THRESHHOLD"); proc.coresReserved = getCoreSpan(host, frame.minMemory); } if (host.threadMode == ThreadMode.VARIABLE_VALUE && proc.coresReserved <= 200) { + logger.debug("host.threadMode == ThreadMode.VARIABLE_VALUE && proc.coresReserved <= 200"); proc.coresReserved = 200; if (proc.coresReserved > host.idleCores) { // Do not allow threadable frame running on 1 core. @@ -155,6 +164,7 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { * Sanity checks to ensure coreUnits are not to high or to low. */ if (proc.coresReserved < 100) { + logger.debug("proc.coresReserved < 100 : " + proc.coresReserved); proc.coresReserved = 100; } @@ -163,6 +173,7 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { * original. */ if (proc.coresReserved < originalCores) { + logger.debug("proc.coresReserved < originalCores : " + proc.coresReserved + " < " + originalCores); proc.coresReserved = originalCores; } @@ -170,6 +181,7 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { * Check to ensure we haven't exceeded max cores. */ if (frame.maxCores > 0 && proc.coresReserved >= frame.maxCores) { + logger.debug("frame.maxCores > 0 && proc.coresReserved >= frame.maxCores : " + frame.maxCores); proc.coresReserved = frame.maxCores; } @@ -179,6 +191,7 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { throw new JobDispatchException( "Do not allow threadable frame running one core on a ThreadMode.Variable host."); } + logger.debug("proc.coresReserved > host.idleCores"); proc.coresReserved = wholeCores * 100; } } @@ -188,6 +201,7 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { * number of cores. */ if (!frame.threadable && proc.coresReserved > 100) { + logger.debug("!frame.threadable && proc.coresReserved > 100"); proc.coresReserved = 100; } @@ -212,6 +226,7 @@ public static final VirtualProc build(DispatchHost host, proc.isLocalDispatch = host.isLocalDispatch; proc.coresReserved = lja.getThreads() * 100; + logger.debug("proc.coresReserved = lja.getThreads() * 100 : " + proc.coresReserved); proc.memoryReserved = frame.minMemory; proc.gpusReserved = frame.minGpus; proc.gpuMemoryReserved = frame.minGpuMemory; @@ -224,6 +239,7 @@ public static final VirtualProc build(DispatchHost host, } if (proc.coresReserved > host.idleCores) { + logger.debug("proc.coresReserved > host.idleCores"); proc.coresReserved = wholeCores * 100; } From 7e8c48ed48c5db0214174fb1b8055b3dadf23eeb Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Wed, 31 May 2023 23:45:01 +0200 Subject: [PATCH 12/78] fix: cores are counted by 100 --- cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java index 4cf07e2bc..4876c6e54 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java +++ b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java @@ -85,7 +85,7 @@ public int handleNegativeCoresRequirement(int cores) { if (cores > 0) { return cores; } - int requestedCores = Math.max(idleCores + cores, 1); + int requestedCores = Math.max(idleCores + cores, 100); logger.debug("Requested core number is " + cores + " <= 0, " + "matching up to max number with difference " + idleCores + " > " + requestedCores); return requestedCores; From e5a77fe7fe51b22f63326ca9da4bc0616080d100 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Thu, 1 Jun 2023 01:31:42 +0200 Subject: [PATCH 13/78] fix: do not set minimum if below threshold. we now accept negative values. --- .../java/com/imageworks/spcue/service/JobManagerService.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java index c1ca1bdfc..175f56234 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java @@ -275,7 +275,9 @@ public JobDetail createJob(BuildableJob buildableJob) { } if (layer.minimumCores < Dispatcher.CORE_POINTS_RESERVED_MIN) { - layer.minimumCores = Dispatcher.CORE_POINTS_RESERVED_MIN; + logger.debug("layer.minimumCores < Dispatcher.CORE_POINTS_RESERVED_MIN" + + layer.minimumCores + " < " + Dispatcher.CORE_POINTS_RESERVED_MIN); + // layer.minimumCores = Dispatcher.CORE_POINTS_RESERVED_MIN; } logger.info("creating layer " + layer.name + " range: " + layer.range); From ef2b6277b5d7bfb734d1805711b42bc9f4e0e0b2 Mon Sep 17 00:00:00 2001 From: Kern Attila Germain <5556461+KernAttila@users.noreply.github.com> Date: Thu, 1 Jun 2023 01:35:50 +0200 Subject: [PATCH 14/78] fix: threadable only if core != 100 --- cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java index 269a9f4af..968b3d7d5 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java @@ -651,7 +651,7 @@ private void determineChunkSize(Element layerTag, LayerDetail layer) { */ private void determineThreadable(Element layerTag, LayerDetail layer) { // Must have at least 1 core to thread. - if (layer.minimumCores < 100) { + if (layer.minimumCores == 100) { layer.isThreadable = false; } else if (layerTag.getChildTextTrim("threadable") != null) { From 902d30ae79d5ab44d6f864e0d140c00aca6194d4 Mon Sep 17 00:00:00 2001 From: Kern Attila Germain <5556461+KernAttila@users.noreply.github.com> Date: Thu, 1 Jun 2023 01:48:49 +0200 Subject: [PATCH 15/78] fix: test if minimumCores is between 0 and 100 (excluded) to mark it as not threadable. --- cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java index 968b3d7d5..210dfb1a6 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java @@ -651,7 +651,7 @@ private void determineChunkSize(Element layerTag, LayerDetail layer) { */ private void determineThreadable(Element layerTag, LayerDetail layer) { // Must have at least 1 core to thread. - if (layer.minimumCores == 100) { + if (layer.minimumCores > 0 && layer.minimumCores < 100) { layer.isThreadable = false; } else if (layerTag.getChildTextTrim("threadable") != null) { From f82485ea52599523792c3bd6f70bd162f7da3c2b Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Thu, 1 Jun 2023 02:25:46 +0200 Subject: [PATCH 16/78] dbg: do not override minimumCores --- cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java index 210dfb1a6..82fe761a5 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java @@ -618,7 +618,8 @@ private void determineMinimumCores(Element layerTag, LayerDetail layer) { } if (corePoints < Dispatcher.CORE_POINTS_RESERVED_MIN) { - corePoints = Dispatcher.CORE_POINTS_RESERVED_DEFAULT; + logger.debug("corePoints < Dispatcher.CORE_POINTS_RESERVED_MIN") + //corePoints = Dispatcher.CORE_POINTS_RESERVED_DEFAULT; } layer.minimumCores = corePoints; From c3d2c24106223da225f8c789185b35793d36ddbd Mon Sep 17 00:00:00 2001 From: Kern Attila Germain <5556461+KernAttila@users.noreply.github.com> Date: Thu, 1 Jun 2023 02:27:45 +0200 Subject: [PATCH 17/78] fix: syntax error, missing semicolon --- cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java index 82fe761a5..4fe65bb69 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java @@ -618,7 +618,7 @@ private void determineMinimumCores(Element layerTag, LayerDetail layer) { } if (corePoints < Dispatcher.CORE_POINTS_RESERVED_MIN) { - logger.debug("corePoints < Dispatcher.CORE_POINTS_RESERVED_MIN") + logger.debug("corePoints < Dispatcher.CORE_POINTS_RESERVED_MIN"); //corePoints = Dispatcher.CORE_POINTS_RESERVED_DEFAULT; } From 3a793b7ac90f4c66be09d590bb6cda49e9617b75 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Thu, 1 Jun 2023 02:46:16 +0200 Subject: [PATCH 18/78] fix: handle negative cores during db insertion, in determineMinimumCores() --- .../main/java/com/imageworks/spcue/service/JobSpec.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java index 4fe65bb69..c821b1cf5 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java @@ -612,10 +612,16 @@ private void determineMinimumCores(Element layerTag, LayerDetail layer) { int corePoints = layer.minimumCores; if (cores.contains(".")) { - corePoints = (int) (Double.valueOf(cores) * 100 + .5); + if (cores.contains("-")) { + corePoints = (int) (Double.valueOf(cores) * 100 - .5); + } else { + corePoints = (int) (Double.valueOf(cores) * 100 + .5); + } } else { corePoints = Integer.valueOf(cores); } + logger.debug("cores : " + cores) + logger.debug("corePoints : " + corePoints) if (corePoints < Dispatcher.CORE_POINTS_RESERVED_MIN) { logger.debug("corePoints < Dispatcher.CORE_POINTS_RESERVED_MIN"); From 7e10f646ec9393dae614d03040920188b0088c83 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Thu, 1 Jun 2023 02:47:26 +0200 Subject: [PATCH 19/78] fix: syntax error, missing semicolon --- .../src/main/java/com/imageworks/spcue/service/JobSpec.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java index c821b1cf5..1ee4b5565 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java @@ -620,8 +620,8 @@ private void determineMinimumCores(Element layerTag, LayerDetail layer) { } else { corePoints = Integer.valueOf(cores); } - logger.debug("cores : " + cores) - logger.debug("corePoints : " + corePoints) + logger.debug("cores : " + cores); + logger.debug("corePoints : " + corePoints); if (corePoints < Dispatcher.CORE_POINTS_RESERVED_MIN) { logger.debug("corePoints < Dispatcher.CORE_POINTS_RESERVED_MIN"); From 145d14e8d3cd16e1d8f9a0312b1c8e55b0f46297 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Thu, 1 Jun 2023 02:58:23 +0200 Subject: [PATCH 20/78] log: add debug logs to determine threadable during db insertion. --- cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java index 1ee4b5565..dfa058880 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java @@ -660,11 +660,14 @@ private void determineThreadable(Element layerTag, LayerDetail layer) { // Must have at least 1 core to thread. if (layer.minimumCores > 0 && layer.minimumCores < 100) { layer.isThreadable = false; + logger.debug("not threadable : " + layer.minimumCores); } else if (layerTag.getChildTextTrim("threadable") != null) { layer.isThreadable = Convert.stringToBool( layerTag.getChildTextTrim("threadable")); + logger.debug("layerTag.getChildTextTrim("threadable") : " + layerTag.getChildTextTrim("threadable")); } + logger.debug("layer.isThreadable : " + layer.isThreadable); } private void determineResourceDefaults(Element layerTag, From 7ca29186ebc913290370efd9129a2933e8902119 Mon Sep 17 00:00:00 2001 From: Kern Attila Germain <5556461+KernAttila@users.noreply.github.com> Date: Thu, 1 Jun 2023 02:59:53 +0200 Subject: [PATCH 21/78] fix: syntax error, did not pay attention --- cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java index dfa058880..2ab911430 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java @@ -665,7 +665,7 @@ private void determineThreadable(Element layerTag, LayerDetail layer) { else if (layerTag.getChildTextTrim("threadable") != null) { layer.isThreadable = Convert.stringToBool( layerTag.getChildTextTrim("threadable")); - logger.debug("layerTag.getChildTextTrim("threadable") : " + layerTag.getChildTextTrim("threadable")); + logger.debug("layerTag.getChildTextTrim('threadable') : " + layerTag.getChildTextTrim("threadable")); } logger.debug("layer.isThreadable : " + layer.isThreadable); } From 7c3f61c2b70ae2e8c6322debd999821b154c3b8e Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Thu, 1 Jun 2023 22:59:28 +0200 Subject: [PATCH 22/78] fix: missing logger --- .../com/imageworks/spcue/dao/postgres/LayerDaoJdbc.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/LayerDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/LayerDaoJdbc.java index 15941a196..34b19ba8d 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/LayerDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/LayerDaoJdbc.java @@ -51,8 +51,12 @@ import com.imageworks.spcue.util.CueUtil; import com.imageworks.spcue.util.SqlUtil; -public class LayerDaoJdbc extends JdbcDaoSupport implements LayerDao { +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.LogManager; + +public class LayerDaoJdbc extends JdbcDaoSupport implements LayerDao { + private static final Logger logger = LogManager.getLogger(LayerDaoJdbc.class); private static final String INSERT_OUTPUT_PATH = "INSERT INTO " + "layer_output " + From 3b94240ea412fb756381702fbef63ad2338eba1c Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Thu, 1 Jun 2023 23:00:06 +0200 Subject: [PATCH 23/78] doc: fix typo --- cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java b/cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java index 9343c3aa0..cdcda5d03 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java @@ -282,7 +282,7 @@ public interface LayerDao { /** * Update all layers of the set type in the specified job - * with the new min cores requirement. + * with the new max cores requirement. * * @param job * @param cores From 9baba0bedbd65cb408958437c9a13bcf56476124 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Thu, 1 Jun 2023 23:06:00 +0200 Subject: [PATCH 24/78] feat: handle negative core requests (+ remove debug logs) --- .../java/com/imageworks/spcue/service/JobSpec.java | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java index 2ab911430..40b3edbd2 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java @@ -620,12 +620,12 @@ private void determineMinimumCores(Element layerTag, LayerDetail layer) { } else { corePoints = Integer.valueOf(cores); } - logger.debug("cores : " + cores); - logger.debug("corePoints : " + corePoints); - if (corePoints < Dispatcher.CORE_POINTS_RESERVED_MIN) { + if (corePoints > 0 && corePoints < Dispatcher.CORE_POINTS_RESERVED_MIN) { + logger.debug("cores : " + cores); + logger.debug("corePoints : " + corePoints); logger.debug("corePoints < Dispatcher.CORE_POINTS_RESERVED_MIN"); - //corePoints = Dispatcher.CORE_POINTS_RESERVED_DEFAULT; + corePoints = Dispatcher.CORE_POINTS_RESERVED_DEFAULT; } layer.minimumCores = corePoints; @@ -660,14 +660,11 @@ private void determineThreadable(Element layerTag, LayerDetail layer) { // Must have at least 1 core to thread. if (layer.minimumCores > 0 && layer.minimumCores < 100) { layer.isThreadable = false; - logger.debug("not threadable : " + layer.minimumCores); } else if (layerTag.getChildTextTrim("threadable") != null) { layer.isThreadable = Convert.stringToBool( layerTag.getChildTextTrim("threadable")); - logger.debug("layerTag.getChildTextTrim('threadable') : " + layerTag.getChildTextTrim("threadable")); } - logger.debug("layer.isThreadable : " + layer.isThreadable); } private void determineResourceDefaults(Element layerTag, From 19521d96b42c8f1606a48e2e50de9f5e0baedae2 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Thu, 1 Jun 2023 23:07:38 +0200 Subject: [PATCH 25/78] feat: handle negative core requests (+ remove debug logs) --- .../com/imageworks/spcue/service/JobManagerService.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java index 175f56234..27dc82021 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java @@ -274,10 +274,8 @@ public JobDetail createJob(BuildableJob buildableJob) { } } - if (layer.minimumCores < Dispatcher.CORE_POINTS_RESERVED_MIN) { - logger.debug("layer.minimumCores < Dispatcher.CORE_POINTS_RESERVED_MIN" - + layer.minimumCores + " < " + Dispatcher.CORE_POINTS_RESERVED_MIN); - // layer.minimumCores = Dispatcher.CORE_POINTS_RESERVED_MIN; + if (layer.minimumCores > 0 && layer.minimumCores < Dispatcher.CORE_POINTS_RESERVED_MIN) { + layer.minimumCores = Dispatcher.CORE_POINTS_RESERVED_MIN; } logger.info("creating layer " + layer.name + " range: " + layer.range); From 2cd6945c3f111cc9eb60480eebe5fa5f35284fe7 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Thu, 1 Jun 2023 23:17:02 +0200 Subject: [PATCH 26/78] fix: do not default to 100 cores, return false when the processed request is <= 0 --- cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java index 4876c6e54..b54535028 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java +++ b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java @@ -85,7 +85,7 @@ public int handleNegativeCoresRequirement(int cores) { if (cores > 0) { return cores; } - int requestedCores = Math.max(idleCores + cores, 100); + int requestedCores = idleCores + cores; logger.debug("Requested core number is " + cores + " <= 0, " + "matching up to max number with difference " + idleCores + " > " + requestedCores); return requestedCores; @@ -97,6 +97,9 @@ public boolean hasAdditionalResources(int minCores, long minMemory, int minGpus, if (idleCores < minCores) { return false; } + if (minCores <= 0) { + return false; + } else if (idleMemory < minMemory) { return false; } From ca89e05983af95c55ec378ff5d9c26b91addc461 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Thu, 1 Jun 2023 23:17:47 +0200 Subject: [PATCH 27/78] fix: same for local dispatch --- .../main/java/com/imageworks/spcue/LocalHostAssignment.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java b/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java index 80f08264c..8a9227080 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java +++ b/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java @@ -71,8 +71,7 @@ public int handleNegativeCoresRequirement(int cores) { if (cores > 0) { return cores; } - int requestedCores = Math.max(idleCoreUnits + cores, 1); - logger.debug("LocalHostAssignment"); + int requestedCores = idleCoreUnits + cores; logger.debug("Requested core number is " + cores + " <= 0, " + "matching up to max number with difference " + idleCoreUnits + " > " + requestedCores); return requestedCores; @@ -84,6 +83,9 @@ public boolean hasAdditionalResources(int minCores, long minMemory, int minGpus, if (idleCoreUnits < minCores) { return false; } + if (minCores <= 0) { + return false; + } else if (idleMemory < minMemory) { return false; } From b442fe7911e2558fed1b85f0775530cd1bc9546c Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Thu, 1 Jun 2023 23:28:06 +0200 Subject: [PATCH 28/78] doc: remove all logs --- .../java/com/imageworks/spcue/VirtualProc.java | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java index ee92204c3..daaf55111 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java @@ -22,11 +22,7 @@ import com.imageworks.spcue.dispatcher.Dispatcher; import com.imageworks.spcue.grpc.host.ThreadMode; -import org.apache.logging.log4j.Logger; -import org.apache.logging.log4j.LogManager; - public class VirtualProc extends FrameEntity implements ProcInterface { - private static final Logger logger = LogManager.getLogger(VirtualProc.class); public String hostId; public String allocationId; @@ -112,12 +108,11 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { */ if (host.strandedCores > 0) { - logger.debug("host.strandedCores > 0 : " + host.strandedCores); proc.coresReserved = proc.coresReserved + host.strandedCores; } if (proc.coresReserved >= 100) { - logger.debug("proc.coresReserved >= 100 : " + proc.coresReserved); + int originalCores = proc.coresReserved; /* @@ -134,22 +129,18 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { // if (host.threadMode == ThreadMode.Variable.value() && // CueUtil.isDayTime()) { if (host.threadMode == ThreadMode.ALL_VALUE) { - logger.debug("host.threadMode == ThreadMode.ALL_VALUE"); proc.coresReserved = wholeCores * 100; } else { if (frame.threadable) { if (host.idleMemory - frame.minMemory <= Dispatcher.MEM_STRANDED_THRESHHOLD) { - logger.debug("host.idleMemory - frame.minMemory <= Dispatcher.MEM_STRANDED_THRESHHOLD"); proc.coresReserved = wholeCores * 100; } else { - logger.debug("!!!host.idleMemory - frame.minMemory <= Dispatcher.MEM_STRANDED_THRESHHOLD"); proc.coresReserved = getCoreSpan(host, frame.minMemory); } if (host.threadMode == ThreadMode.VARIABLE_VALUE && proc.coresReserved <= 200) { - logger.debug("host.threadMode == ThreadMode.VARIABLE_VALUE && proc.coresReserved <= 200"); proc.coresReserved = 200; if (proc.coresReserved > host.idleCores) { // Do not allow threadable frame running on 1 core. @@ -164,7 +155,6 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { * Sanity checks to ensure coreUnits are not to high or to low. */ if (proc.coresReserved < 100) { - logger.debug("proc.coresReserved < 100 : " + proc.coresReserved); proc.coresReserved = 100; } @@ -173,7 +163,6 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { * original. */ if (proc.coresReserved < originalCores) { - logger.debug("proc.coresReserved < originalCores : " + proc.coresReserved + " < " + originalCores); proc.coresReserved = originalCores; } @@ -181,7 +170,6 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { * Check to ensure we haven't exceeded max cores. */ if (frame.maxCores > 0 && proc.coresReserved >= frame.maxCores) { - logger.debug("frame.maxCores > 0 && proc.coresReserved >= frame.maxCores : " + frame.maxCores); proc.coresReserved = frame.maxCores; } @@ -191,7 +179,6 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { throw new JobDispatchException( "Do not allow threadable frame running one core on a ThreadMode.Variable host."); } - logger.debug("proc.coresReserved > host.idleCores"); proc.coresReserved = wholeCores * 100; } } @@ -201,7 +188,6 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { * number of cores. */ if (!frame.threadable && proc.coresReserved > 100) { - logger.debug("!frame.threadable && proc.coresReserved > 100"); proc.coresReserved = 100; } @@ -226,7 +212,6 @@ public static final VirtualProc build(DispatchHost host, proc.isLocalDispatch = host.isLocalDispatch; proc.coresReserved = lja.getThreads() * 100; - logger.debug("proc.coresReserved = lja.getThreads() * 100 : " + proc.coresReserved); proc.memoryReserved = frame.minMemory; proc.gpusReserved = frame.minGpus; proc.gpuMemoryReserved = frame.minGpuMemory; @@ -239,7 +224,6 @@ public static final VirtualProc build(DispatchHost host, } if (proc.coresReserved > host.idleCores) { - logger.debug("proc.coresReserved > host.idleCores"); proc.coresReserved = wholeCores * 100; } From 33c295232807f67ab27629a5b5f6f39f417f7bbd Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Thu, 1 Jun 2023 23:35:15 +0200 Subject: [PATCH 29/78] fis: better management of negative cores --- .../com/imageworks/spcue/dispatcher/HostReportHandler.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java index 7d6a705a6..8a31d0ce0 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java @@ -220,11 +220,10 @@ public void handleHostReport(HostReport report, boolean isBoot) { bookingManager.removeInactiveLocalHostAssignment(lca); } } - // TODO: handle negative int cores_to_reserve = host.handleNegativeCoresRequirement(Dispatcher.CORE_POINTS_RESERVED_MIN); - if (host.idleCores < cores_to_reserve) { + if (cores_to_reserve <= 0 || host.idleCores < Dispatcher.CORE_POINTS_RESERVED_MIN) { msg = String.format("%s doesn't have enough idle cores, %d needs %d", - host.name, host.idleCores, cores_to_reserve); + host.name, host.idleCores, Dispatcher.CORE_POINTS_RESERVED_MIN); } else if (host.idleMemory < Dispatcher.MEM_RESERVED_MIN) { msg = String.format("%s doesn't have enough idle memory, %d needs %d", From 866d738eb93800ec7718df3c830aef518c179115 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Thu, 1 Jun 2023 23:37:59 +0200 Subject: [PATCH 30/78] feat: handle zero/negative cores request when launching frame --- rqd/rqd/rqcore.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/rqd/rqd/rqcore.py b/rqd/rqd/rqcore.py index 224485f2b..541947182 100644 --- a/rqd/rqd/rqcore.py +++ b/rqd/rqd/rqcore.py @@ -896,10 +896,21 @@ def launchFrame(self, runFrame): log.warning(err) raise rqd.rqexceptions.InvalidUserException(err) + # Handle zero/negative cores request if runFrame.num_cores <= 0: - err = "Not launching, numCores must be > 0" - log.warning(err) - raise rqd.rqexceptions.CoreReservationFailureException(err) + cores_to_reserve = self.cores.idle_cores + runFrame.num_cores + if cores_to_reserve <= 0: + err = "Not launching, numCores must be > 0, got {}".format(cores_to_reserve) + log.warning(err) + raise rqd.rqexceptions.CoreReservationFailureException(err) + + log.info("Requested core number is negative {}, " + "matching up to max number with difference {} > {}".format( + runFrame.num_cores, + self.cores.idle_cores, + cores_to_reserve) + ) + runFrame.num_cores = cores_to_reserve # See if all requested cores are available self.__threadLock.acquire() From b96dbe14371cb44a036905597fe20ac95da4f582 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Thu, 1 Jun 2023 23:39:20 +0200 Subject: [PATCH 31/78] feat!: make layer threadable when requested minCores is <= 0 --- cuesubmit/cuesubmit/Submission.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuesubmit/cuesubmit/Submission.py b/cuesubmit/cuesubmit/Submission.py index 94a1f97b9..a5967e971 100644 --- a/cuesubmit/cuesubmit/Submission.py +++ b/cuesubmit/cuesubmit/Submission.py @@ -87,7 +87,7 @@ def buildLayer(layerData, command, lastLayer=None): @type lastLayer: outline.layer.Layer @param lastLayer: layer that this new layer should be dependent on if dependType is set. """ - threadable = float(layerData.cores) >= 2 + threadable = float(layerData.cores) >= 2 or float(layerData.cores) <= 0 layer = outline.modules.shell.Shell( layerData.name, command=command.split(), chunk=layerData.chunk, threads=float(layerData.cores), range=str(layerData.layerRange), threadable=threadable) From 7309701212ee3d7c5b85447b4e7d8d9e72153893 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Thu, 1 Jun 2023 23:39:45 +0200 Subject: [PATCH 32/78] fix: matchNumbersOnly() handle negative values --- cuesubmit/cuesubmit/Validators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuesubmit/cuesubmit/Validators.py b/cuesubmit/cuesubmit/Validators.py index 540f92e21..0b0bfb6f8 100644 --- a/cuesubmit/cuesubmit/Validators.py +++ b/cuesubmit/cuesubmit/Validators.py @@ -53,7 +53,7 @@ def matchNoSpaces(value): def matchNumbersOnly(value): """Matches strings with numbers and '.' only.""" - if re.match(r'^[0-9.]+$', value): + if re.match(r'^-?[0-9.]+$', value): return True return False From 21794dc184ac29fa8d69234b37ea806429bf4c3c Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Thu, 1 Jun 2023 23:40:52 +0200 Subject: [PATCH 33/78] add tst for negative numbers in validator --- cuesubmit/tests/Validators_tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cuesubmit/tests/Validators_tests.py b/cuesubmit/tests/Validators_tests.py index 0a5ef78eb..cbbf0b9cd 100644 --- a/cuesubmit/tests/Validators_tests.py +++ b/cuesubmit/tests/Validators_tests.py @@ -77,6 +77,7 @@ def testMatchNoSpaces(self): def testMatchNumbersOnly(self): self.assertTrue(matchNumbersOnly('0123')) self.assertTrue(matchNumbersOnly('3.14')) + self.assertTrue(matchNumbersOnly('-3.14')) # bit weird, but that's how the function is written self.assertTrue(matchNumbersOnly('800.555.555')) From 06b7f8cd61b136f9cacc68cbce9c42725453ceff Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Thu, 1 Jun 2023 23:42:32 +0200 Subject: [PATCH 34/78] doc: can assign negative core values in show filters --- cuegui/cuegui/FilterDialog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuegui/cuegui/FilterDialog.py b/cuegui/cuegui/FilterDialog.py index 479f1c6e5..1b60c1d1f 100644 --- a/cuegui/cuegui/FilterDialog.py +++ b/cuegui/cuegui/FilterDialog.py @@ -454,7 +454,7 @@ def createAction(self): "Create Action", "What value should this property be set to?", 0, - 0, + -8, 50000, 2) value = float(value) From adf59096d3e2b9ba871bf44542d7bba84ec61cf6 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Fri, 2 Jun 2023 18:05:36 +0200 Subject: [PATCH 35/78] fix: avoid double booking with negative requests --- .../java/com/imageworks/spcue/DispatchHost.java | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java index b54535028..4a5479340 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java +++ b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java @@ -81,12 +81,17 @@ public String getFacilityId() { return facilityId; } - public int handleNegativeCoresRequirement(int cores) { - if (cores > 0) { - return cores; + public int handleNegativeCoresRequirement(int minCores) { + // Do not process positive requests + if (minCores > 0) { + return minCores; } - int requestedCores = idleCores + cores; - logger.debug("Requested core number is " + cores + " <= 0, " + + // If request is negative but cores are already used, return 0 + if (minCores <=0 && idleCores < cores) { + return 0; + } + int requestedCores = idleCores + minCores; + logger.debug("Requested core number is " + minCores + " <= 0, " + "matching up to max number with difference " + idleCores + " > " + requestedCores); return requestedCores; } From 2067abf9ad405a7d05a5bf84c2740b320dba0f2a Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Fri, 2 Jun 2023 18:42:37 +0200 Subject: [PATCH 36/78] doc: add debugs to find where we default to service core requirement. --- .../main/java/com/imageworks/spcue/service/JobSpec.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java index 40b3edbd2..a08392739 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java @@ -621,10 +621,12 @@ private void determineMinimumCores(Element layerTag, LayerDetail layer) { corePoints = Integer.valueOf(cores); } + logger.debug("submission cores : " + cores); + logger.debug("layer.minimumCores : " + layer.minimumCores); + logger.debug("corePoints : " + corePoints); + logger.debug("Dispatcher.CORE_POINTS_RESERVED_MIN : " + Dispatcher.CORE_POINTS_RESERVED_MIN); + if (corePoints > 0 && corePoints < Dispatcher.CORE_POINTS_RESERVED_MIN) { - logger.debug("cores : " + cores); - logger.debug("corePoints : " + corePoints); - logger.debug("corePoints < Dispatcher.CORE_POINTS_RESERVED_MIN"); corePoints = Dispatcher.CORE_POINTS_RESERVED_DEFAULT; } From a392a3dabf93f17931cd69db047e0cfa95383666 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Fri, 2 Jun 2023 20:56:51 +0200 Subject: [PATCH 37/78] doc: add debug --- .../src/main/java/com/imageworks/spcue/service/JobSpec.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java index a08392739..acf5c71c4 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java @@ -606,7 +606,8 @@ private void determineMinimumCores(Element layerTag, LayerDetail layer) { String cores = layerTag.getChildTextTrim("cores"); if (cores == null) { - return; + logger.debug("cores == null"); + // return; } int corePoints = layer.minimumCores; @@ -629,6 +630,7 @@ private void determineMinimumCores(Element layerTag, LayerDetail layer) { if (corePoints > 0 && corePoints < Dispatcher.CORE_POINTS_RESERVED_MIN) { corePoints = Dispatcher.CORE_POINTS_RESERVED_DEFAULT; } + logger.debug("corePoints after : " + corePoints); layer.minimumCores = corePoints; } From 1dfd79b18cdbfe2d53a2e6e2c22b7faa02dece2a Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Fri, 2 Jun 2023 21:10:03 +0200 Subject: [PATCH 38/78] dbg: default to zero but continue --- cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java index acf5c71c4..ced9d936a 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java @@ -607,7 +607,7 @@ private void determineMinimumCores(Element layerTag, LayerDetail layer) { String cores = layerTag.getChildTextTrim("cores"); if (cores == null) { logger.debug("cores == null"); - // return; + cores="0"; } int corePoints = layer.minimumCores; From de62e36b3ea116e53882af223a4356475bd34f2a Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Fri, 2 Jun 2023 21:31:53 +0200 Subject: [PATCH 39/78] dev: modify trigger function for test --- .../conf/ddl/postgres/migrations/V1__Initial_schema.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V1__Initial_schema.sql b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V1__Initial_schema.sql index c9d77140b..c24f239ed 100644 --- a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V1__Initial_schema.sql +++ b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V1__Initial_schema.sql @@ -2661,7 +2661,7 @@ EXECUTE PROCEDURE trigger__before_insert_folder(); CREATE FUNCTION trigger__before_insert_proc() RETURNS TRIGGER AS $body$ BEGIN - IF NEW.int_cores_reserved <= 0 THEN + IF NEW.int_cores_reserved < 0 THEN RAISE EXCEPTION 'failed to allocate proc, tried to allocate 0 cores'; END IF; RETURN NEW; From 939bfe6f808d37147f13d8037623f0731aef5692 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 04:31:51 +0200 Subject: [PATCH 40/78] fix: return early --- .../src/main/java/com/imageworks/spcue/service/JobSpec.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java index ced9d936a..b4fc37346 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java @@ -606,8 +606,8 @@ private void determineMinimumCores(Element layerTag, LayerDetail layer) { String cores = layerTag.getChildTextTrim("cores"); if (cores == null) { - logger.debug("cores == null"); - cores="0"; + logger.debug("cores == null"); + return; } int corePoints = layer.minimumCores; From d64039bd085c885c72344a8062264f50c872b1f9 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 04:37:31 +0200 Subject: [PATCH 41/78] fix: avoid double booking with negative requests --- .../com/imageworks/spcue/LocalHostAssignment.java | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java b/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java index 8a9227080..9feeed685 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java +++ b/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java @@ -68,11 +68,16 @@ public LocalHostAssignment(int maxCores, int threads, long maxMemory, int maxGpu } public int handleNegativeCoresRequirement(int cores) { - if (cores > 0) { - return cores; + // Do not process positive requests + if (minCores > 0) { + return minCores; } - int requestedCores = idleCoreUnits + cores; - logger.debug("Requested core number is " + cores + " <= 0, " + + // If request is negative but cores are already used, return 0 + if (minCores <=0 && idleCoreUnits < cores) { + return 0; + } + int requestedCores = idleCoreUnits + minCores; + logger.debug("Requested core number is " + minCores + " <= 0, " + "matching up to max number with difference " + idleCoreUnits + " > " + requestedCores); return requestedCores; } From 444253076c6d3339eaec6cc2b8e6189c02244d31 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 04:41:28 +0200 Subject: [PATCH 42/78] fix: forgot to change parameter name --- .../src/main/java/com/imageworks/spcue/LocalHostAssignment.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java b/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java index 9feeed685..a6ddcdb16 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java +++ b/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java @@ -67,7 +67,7 @@ public LocalHostAssignment(int maxCores, int threads, long maxMemory, int maxGpu this.maxGpuMemory = maxGpuMemory; } - public int handleNegativeCoresRequirement(int cores) { + public int handleNegativeCoresRequirement(int minCores) { // Do not process positive requests if (minCores > 0) { return minCores; From a2aa63130ac626cd4b90faa84f7365c3052129b0 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 04:57:35 +0200 Subject: [PATCH 43/78] fix: logger use proper class and fix core -> threads --- .../main/java/com/imageworks/spcue/LocalHostAssignment.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java b/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java index a6ddcdb16..93371834b 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java +++ b/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java @@ -36,7 +36,7 @@ public class LocalHostAssignment extends Entity implements ResourceContainer { - private static final Logger logger = LogManager.getLogger(DispatchHost.class); + private static final Logger logger = LogManager.getLogger(LocalHostAssignment.class); private int idleCoreUnits; private long idleMemory; @@ -73,7 +73,7 @@ public int handleNegativeCoresRequirement(int minCores) { return minCores; } // If request is negative but cores are already used, return 0 - if (minCores <=0 && idleCoreUnits < cores) { + if (minCores <=0 && idleCoreUnits < threads) { return 0; } int requestedCores = idleCoreUnits + minCores; From c4fc5537df47d577541fce4a706a6db355f88321 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 05:11:24 +0200 Subject: [PATCH 44/78] dbg: do not assign minimum --- .../java/com/imageworks/spcue/service/JobManagerService.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java index 27dc82021..5d49caa24 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java @@ -275,7 +275,9 @@ public JobDetail createJob(BuildableJob buildableJob) { } if (layer.minimumCores > 0 && layer.minimumCores < Dispatcher.CORE_POINTS_RESERVED_MIN) { - layer.minimumCores = Dispatcher.CORE_POINTS_RESERVED_MIN; + logger.debug("layer.minimumCores < Dispatcher.CORE_POINTS_RESERVED_MIN"); + logger.debug(layer.minimumCores + " < " +Dispatcher.CORE_POINTS_RESERVED_MIN); + // layer.minimumCores = Dispatcher.CORE_POINTS_RESERVED_MIN; } logger.info("creating layer " + layer.name + " range: " + layer.range); From e7022cc2f5eed3bbe3ca0bade536cecd15bd6b84 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 10:28:22 +0200 Subject: [PATCH 45/78] dbg: add debug info when not dispatching --- .../com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java index a89f36e36..299752620 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java @@ -266,6 +266,8 @@ public List dispatchHost(DispatchHost host, JobInterface job) { host.idleMemory < frame.minMemory || host.idleGpus < frame.minGpus || host.idleGpuMemory < frame.minGpuMemory) { + logger.debug('Cannot dispatch, host.idleCores < host.handleNegativeCoresRequirement(frame.minCores)'); + logger.debug(host.idleCores + " < " + host.handleNegativeCoresRequirement(frame.minCores) + " : frame.minCores"); break; } From 2634735dacbd38ac00acbb75042c17e49148eac4 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 10:31:07 +0200 Subject: [PATCH 46/78] dbg: add debug info when setting corePoints to layer --- .../src/main/java/com/imageworks/spcue/service/JobSpec.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java index b4fc37346..6d2e8f323 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java @@ -614,11 +614,14 @@ private void determineMinimumCores(Element layerTag, LayerDetail layer) { if (cores.contains(".")) { if (cores.contains("-")) { + logger.debug("cores is negative : " + cores); corePoints = (int) (Double.valueOf(cores) * 100 - .5); } else { + logger.debug("cores is positive : " + cores); corePoints = (int) (Double.valueOf(cores) * 100 + .5); } } else { + logger.debug("cores is an integer : " + cores); corePoints = Integer.valueOf(cores); } @@ -628,7 +631,8 @@ private void determineMinimumCores(Element layerTag, LayerDetail layer) { logger.debug("Dispatcher.CORE_POINTS_RESERVED_MIN : " + Dispatcher.CORE_POINTS_RESERVED_MIN); if (corePoints > 0 && corePoints < Dispatcher.CORE_POINTS_RESERVED_MIN) { - corePoints = Dispatcher.CORE_POINTS_RESERVED_DEFAULT; + logger.debug("corePoints > 0 && corePoints < Dispatcher.CORE_POINTS_RESERVED_MIN"); + //corePoints = Dispatcher.CORE_POINTS_RESERVED_DEFAULT; } logger.debug("corePoints after : " + corePoints); From f93754b7b4ef7bedd1366dbf747cc1df9e8dee51 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 10:34:55 +0200 Subject: [PATCH 47/78] dbg: debug when determining threadable --- cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java index 6d2e8f323..5737d0988 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java @@ -668,11 +668,14 @@ private void determineThreadable(Element layerTag, LayerDetail layer) { // Must have at least 1 core to thread. if (layer.minimumCores > 0 && layer.minimumCores < 100) { layer.isThreadable = false; + logger.debug("not threadable : " + layer.minimumCores); } else if (layerTag.getChildTextTrim("threadable") != null) { layer.isThreadable = Convert.stringToBool( layerTag.getChildTextTrim("threadable")); + logger.debug("layerTag.getChildTextTrim("threadable") : " + layerTag.getChildTextTrim("threadable")); } + logger.debug("layer.isThreadable : " + layer.isThreadable); } private void determineResourceDefaults(Element layerTag, From 1c79a55d539454b1f7fb44231fa5f77613cedb49 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 13:47:02 +0200 Subject: [PATCH 48/78] fix: syntax --- cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java index 5737d0988..c0753f64a 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java @@ -673,7 +673,7 @@ private void determineThreadable(Element layerTag, LayerDetail layer) { else if (layerTag.getChildTextTrim("threadable") != null) { layer.isThreadable = Convert.stringToBool( layerTag.getChildTextTrim("threadable")); - logger.debug("layerTag.getChildTextTrim("threadable") : " + layerTag.getChildTextTrim("threadable")); + logger.debug("layerTag.getChildTextTrim('threadable') : " + layerTag.getChildTextTrim('threadable')); } logger.debug("layer.isThreadable : " + layer.isThreadable); } From dd44a7aeb0fb3dd2520b92653540966a75b0d5ff Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 13:47:15 +0200 Subject: [PATCH 49/78] fix: syntax --- .../com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java index 299752620..c311895f0 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java @@ -266,7 +266,7 @@ public List dispatchHost(DispatchHost host, JobInterface job) { host.idleMemory < frame.minMemory || host.idleGpus < frame.minGpus || host.idleGpuMemory < frame.minGpuMemory) { - logger.debug('Cannot dispatch, host.idleCores < host.handleNegativeCoresRequirement(frame.minCores)'); + logger.debug("Cannot dispatch, host.idleCores < host.handleNegativeCoresRequirement(frame.minCores)"); logger.debug(host.idleCores + " < " + host.handleNegativeCoresRequirement(frame.minCores) + " : frame.minCores"); break; } From 2081bf4e8f25fddfe151467513cd5ce5b30a0b25 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 13:48:39 +0200 Subject: [PATCH 50/78] fix: syntax --- cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java index c0753f64a..51b53b161 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java @@ -673,7 +673,7 @@ private void determineThreadable(Element layerTag, LayerDetail layer) { else if (layerTag.getChildTextTrim("threadable") != null) { layer.isThreadable = Convert.stringToBool( layerTag.getChildTextTrim("threadable")); - logger.debug("layerTag.getChildTextTrim('threadable') : " + layerTag.getChildTextTrim('threadable')); + logger.debug("layerTag.getChildTextTrim('threadable') : " + layerTag.getChildTextTrim("threadable")); } logger.debug("layer.isThreadable : " + layer.isThreadable); } From 00e29993d0dfe8aeaa2f6816058d2c501011e1c8 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 17:42:45 +0200 Subject: [PATCH 51/78] dbg: add frame summary before dispatch --- .../com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java index c311895f0..c8956e323 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java @@ -258,7 +258,12 @@ public List dispatchHost(DispatchHost host, JobInterface job) { host.getName() + " " + host.idleCores + "/" + host.idleMemory + " on job " + job.getName()); + logger.debug("Frames summary before dispatch:"); for (DispatchFrame frame: frames) { + logger.debug("frame.minCores: " + frame.minCores + ", frame.command: " + frame.command); + } + for (DispatchFrame frame: frames) { + VirtualProc proc = VirtualProc.build(host, frame); From 7f3e24c3a090ccf94ac3ed247821c6d68c847237 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 18:00:22 +0200 Subject: [PATCH 52/78] dbg: log requested minCores --- cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java | 1 + 1 file changed, 1 insertion(+) diff --git a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java index 4a5479340..8e40b53ac 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java +++ b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java @@ -83,6 +83,7 @@ public String getFacilityId() { public int handleNegativeCoresRequirement(int minCores) { // Do not process positive requests + logger.debug("requested minCores:" + minCores); if (minCores > 0) { return minCores; } From 29282bdec99017dc70353d97bde40b05477d2de4 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 18:06:44 +0200 Subject: [PATCH 53/78] dbg: debug when allocating cores --- .../main/java/com/imageworks/spcue/VirtualProc.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java index daaf55111..ab111bf20 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java @@ -22,8 +22,13 @@ import com.imageworks.spcue.dispatcher.Dispatcher; import com.imageworks.spcue.grpc.host.ThreadMode; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.LogManager; + public class VirtualProc extends FrameEntity implements ProcInterface { + private static final Logger logger = LogManager.getLogger(VirtualProc.class); + public String hostId; public String allocationId; public String frameId; @@ -108,6 +113,7 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { */ if (host.strandedCores > 0) { + logger.debug("host.strandedCores > 0 : " + host.strandedCores); proc.coresReserved = proc.coresReserved + host.strandedCores; } @@ -130,6 +136,7 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { // CueUtil.isDayTime()) { if (host.threadMode == ThreadMode.ALL_VALUE) { proc.coresReserved = wholeCores * 100; + logger.debug("host.threadMode == ThreadMode.ALL_VALUE : proc.coresReserved=" + proc.coresReserved); } else { if (frame.threadable) { if (host.idleMemory - frame.minMemory @@ -137,6 +144,7 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { proc.coresReserved = wholeCores * 100; } else { proc.coresReserved = getCoreSpan(host, frame.minMemory); + logger.debug("proc.coresReserved = getCoreSpan(host, frame.minMemory):" + proc.coresReserved); } if (host.threadMode == ThreadMode.VARIABLE_VALUE @@ -163,6 +171,7 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { * original. */ if (proc.coresReserved < originalCores) { + logger.debug("proc.coresReserved < originalCores: " + proc.coresReserved + " < " + originalCores); proc.coresReserved = originalCores; } @@ -170,10 +179,12 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { * Check to ensure we haven't exceeded max cores. */ if (frame.maxCores > 0 && proc.coresReserved >= frame.maxCores) { + logger.debug("frame.maxCores > 0 && proc.coresReserved >= frame.maxCores"); proc.coresReserved = frame.maxCores; } if (proc.coresReserved > host.idleCores) { + logger.debug("proc.coresReserved > host.idleCores"); if (host.threadMode == ThreadMode.VARIABLE_VALUE && frame.threadable && wholeCores == 1) { throw new JobDispatchException( @@ -181,6 +192,7 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { } proc.coresReserved = wholeCores * 100; } + logger.debug("finally, proc.coresReserved = " + proc.coresReserved); } /* From 8a0f001a36b567c4a0041754f52548170424d5db Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 18:07:24 +0200 Subject: [PATCH 54/78] dbg: add debug info when dispatching --- .../java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java | 1 + 1 file changed, 1 insertion(+) diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java index c8956e323..f86684535 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java @@ -288,6 +288,7 @@ public List dispatchHost(DispatchHost host, JobInterface job) { boolean success = new DispatchFrameTemplate(proc, job, frame, false) { public void wrapDispatchFrame() { + logger.debug("Dispatching frame with minCores: " + frame.minCores + " on proc with: " + proc.minCores); dispatch(frame, proc); dispatchSummary(proc, frame, "Booking"); return; From 85bab384cf3df0a453357d3572bfed5e406cd67a Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 18:09:22 +0200 Subject: [PATCH 55/78] dbg, change variable to display --- .../com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java index f86684535..bfec35de0 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java @@ -288,7 +288,7 @@ public List dispatchHost(DispatchHost host, JobInterface job) { boolean success = new DispatchFrameTemplate(proc, job, frame, false) { public void wrapDispatchFrame() { - logger.debug("Dispatching frame with minCores: " + frame.minCores + " on proc with: " + proc.minCores); + logger.debug("Dispatching frame with minCores: " + frame.minCores + " on proc with coresReserved= " + proc.coresReserved); dispatch(frame, proc); dispatchSummary(proc, frame, "Booking"); return; From 2ccd2e4d536614bf3e8bb4e2593f2bc5feaf64f8 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 18:36:35 +0200 Subject: [PATCH 56/78] dbg: add logs and do not use getCoreSpan() --- cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java index ab111bf20..0eac1203a 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java @@ -145,6 +145,8 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { } else { proc.coresReserved = getCoreSpan(host, frame.minMemory); logger.debug("proc.coresReserved = getCoreSpan(host, frame.minMemory):" + proc.coresReserved); + proc.coresReserved = originalCores; + logger.debug("avoid getCoreSpan():" + proc.coresReserved); } if (host.threadMode == ThreadMode.VARIABLE_VALUE @@ -253,14 +255,19 @@ public static final VirtualProc build(DispatchHost host, */ public static int getCoreSpan(DispatchHost host, long minMemory) { int totalCores = (int) (Math.floor(host.cores / 100.0)); + logger.debug("getCoreSpan() -> totalCores = " + totalCores); int idleCores = (int) (Math.floor(host.idleCores / 100.0)); + logger.debug("getCoreSpan() -> idleCores = " + idleCores); if (idleCores < 1) { return 100; } long memPerCore = host.idleMemory / totalCores; + logger.debug("getCoreSpan() -> memPerCore = " + memPerCore); double procs = minMemory / (double) memPerCore; + logger.debug("getCoreSpan() -> procs = " + procs); int reserveCores = (int) (Math.round(procs)) * 100; + logger.debug("getCoreSpan() -> reserveCores = " + reserveCores); return reserveCores; } From 1f24058e067543ddfe530072187cad55765f2183 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 18:59:56 +0200 Subject: [PATCH 57/78] doc: remove debug and keep getCoreSpan() --- cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java index 0eac1203a..8864a8e79 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java @@ -145,8 +145,6 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { } else { proc.coresReserved = getCoreSpan(host, frame.minMemory); logger.debug("proc.coresReserved = getCoreSpan(host, frame.minMemory):" + proc.coresReserved); - proc.coresReserved = originalCores; - logger.debug("avoid getCoreSpan():" + proc.coresReserved); } if (host.threadMode == ThreadMode.VARIABLE_VALUE From 359ed535e88176b87fd555d77a775bf4ceabcb5b Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 19:14:33 +0200 Subject: [PATCH 58/78] dbg: do not accept negative cores when the host is used. --- .../src/main/java/com/imageworks/spcue/VirtualProc.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java index 8864a8e79..08a2d7c91 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java @@ -99,7 +99,8 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { proc.unbooked = false; proc.isLocalDispatch = host.isLocalDispatch; - proc.coresReserved = host.handleNegativeCoresRequirement(frame.minCores); + // proc.coresReserved = host.handleNegativeCoresRequirement(frame.minCores); + proc.coresReserved = frame.minCores; proc.memoryReserved = frame.minMemory; proc.gpusReserved = frame.minGpus; proc.gpuMemoryReserved = frame.minGpuMemory; @@ -116,7 +117,11 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { logger.debug("host.strandedCores > 0 : " + host.strandedCores); proc.coresReserved = proc.coresReserved + host.strandedCores; } - + if (proc.coresReserved < 0) { + if (host.cores > host.idleCores){ + throw new EntityException("Host has not enough resources to launch the frame"); + } + } if (proc.coresReserved >= 100) { int originalCores = proc.coresReserved; From cc4744b8f708f6140b0c4326ffe8a6c87844a01f Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 19:28:00 +0200 Subject: [PATCH 59/78] fix: handle negative and zero cores requests early --- .../main/java/com/imageworks/spcue/VirtualProc.java | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java index 08a2d7c91..11b390fe9 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java @@ -117,12 +117,21 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { logger.debug("host.strandedCores > 0 : " + host.strandedCores); proc.coresReserved = proc.coresReserved + host.strandedCores; } - if (proc.coresReserved < 0) { + if (proc.coresReserved == 0) { if (host.cores > host.idleCores){ throw new EntityException("Host has not enough resources to launch the frame"); } + logger.debug("Reserving all cores"); + proc.coresReserved = host.cores; } - if (proc.coresReserved >= 100) { + else if (proc.coresReserved < 0) { + if (host.cores > host.idleCores){ + throw new EntityException("Host has not enough resources to launch the frame"); + } + logger.debug("Reserving all cores " + proc.coresReserved); + proc.coresReserved = host.cores + proc.coresReserved; + } + else if (proc.coresReserved >= 100) { int originalCores = proc.coresReserved; From 1f0d7d9ffc2708a0aafb8db3aa6d6ba0171c2f82 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 20:25:32 +0200 Subject: [PATCH 60/78] feat: add canHandleNegativeCoresRequirement() --- .../main/java/com/imageworks/spcue/DispatchHost.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java index 8e40b53ac..8cab7fef5 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java +++ b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java @@ -80,6 +80,18 @@ public String getAllocationId() { public String getFacilityId() { return facilityId; } + public bool canHandleNegativeCoresRequirement(int minCores) { + if (minCores > 0) { + logger.debug(host.name() + " can handle the job with " + minCores + " cores."); + return true; + } + if (host.cores == host.idleCores) { + logger.debug(host.name() + " can handle the job with " + minCores + " cores."); + return true; + } + logger.debug(host.name() + " cannot handle the job with " + minCores + " cores."); + return false; + } public int handleNegativeCoresRequirement(int minCores) { // Do not process positive requests From 5ff094a798f28a300c98dc8be60ebf79d07f1d7d Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 20:29:50 +0200 Subject: [PATCH 61/78] fix: early break without throwing exception --- .../com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java index bfec35de0..c344681f4 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java @@ -264,9 +264,11 @@ public List dispatchHost(DispatchHost host, JobInterface job) { } for (DispatchFrame frame: frames) { - VirtualProc proc = VirtualProc.build(host, frame); - + if (frame.minCores <= 0 && !proc.canLaunch) { + logger.debug("Cannot dispatch job, host is busy."); + break; + } if (host.idleCores < host.handleNegativeCoresRequirement(frame.minCores) || host.idleMemory < frame.minMemory || host.idleGpus < frame.minGpus || From de80d1d59e8b314411a4e8375bbcd8b84de4698a Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 20:31:47 +0200 Subject: [PATCH 62/78] fix: typo --- cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java index 8cab7fef5..11c23e48e 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java +++ b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java @@ -80,7 +80,7 @@ public String getAllocationId() { public String getFacilityId() { return facilityId; } - public bool canHandleNegativeCoresRequirement(int minCores) { + public boolean canHandleNegativeCoresRequirement(int minCores) { if (minCores > 0) { logger.debug(host.name() + " can handle the job with " + minCores + " cores."); return true; From 191bdca50ccd057fb76317e94474ccbacc6a1fa2 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 20:33:06 +0200 Subject: [PATCH 63/78] fix: do not throw exceptions, pass a state instead --- .../main/java/com/imageworks/spcue/VirtualProc.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java index 11b390fe9..4bf6ced1c 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java @@ -36,6 +36,7 @@ public class VirtualProc extends FrameEntity implements ProcInterface { public String os; public byte[] childProcesses; + public boolean canLaunch; public int coresReserved; public long memoryReserved; public long memoryUsed; @@ -117,17 +118,14 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { logger.debug("host.strandedCores > 0 : " + host.strandedCores); proc.coresReserved = proc.coresReserved + host.strandedCores; } + + proc.canLaunch = host.canHandleNegativeCoresRequirement(proc.coresReserved) + if (proc.coresReserved == 0) { - if (host.cores > host.idleCores){ - throw new EntityException("Host has not enough resources to launch the frame"); - } logger.debug("Reserving all cores"); proc.coresReserved = host.cores; } else if (proc.coresReserved < 0) { - if (host.cores > host.idleCores){ - throw new EntityException("Host has not enough resources to launch the frame"); - } logger.debug("Reserving all cores " + proc.coresReserved); proc.coresReserved = host.cores + proc.coresReserved; } From f17d7ae2ecf9d75632a993798ba865ed061a757b Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 20:34:20 +0200 Subject: [PATCH 64/78] fix: typo --- cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java index 4bf6ced1c..29d1df396 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java @@ -100,7 +100,7 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { proc.unbooked = false; proc.isLocalDispatch = host.isLocalDispatch; - // proc.coresReserved = host.handleNegativeCoresRequirement(frame.minCores); +// proc.canLaunch = host.canHandleNegativeCoresRequirement(frame.minCores); proc.coresReserved = frame.minCores; proc.memoryReserved = frame.minMemory; proc.gpusReserved = frame.minGpus; @@ -119,7 +119,7 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { proc.coresReserved = proc.coresReserved + host.strandedCores; } - proc.canLaunch = host.canHandleNegativeCoresRequirement(proc.coresReserved) + proc.canLaunch = host.canHandleNegativeCoresRequirement(proc.coresReserved); if (proc.coresReserved == 0) { logger.debug("Reserving all cores"); From 06195b77b3a4778e8d4ddd6ab263e0b7697f82f8 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 20:36:19 +0200 Subject: [PATCH 65/78] fix: use porper function to retrieve host name --- cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java index 11c23e48e..a22e3d040 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java +++ b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java @@ -82,14 +82,14 @@ public String getFacilityId() { } public boolean canHandleNegativeCoresRequirement(int minCores) { if (minCores > 0) { - logger.debug(host.name() + " can handle the job with " + minCores + " cores."); + logger.debug(host.getName() + " can handle the job with " + minCores + " cores."); return true; } if (host.cores == host.idleCores) { - logger.debug(host.name() + " can handle the job with " + minCores + " cores."); + logger.debug(host.getName() + " can handle the job with " + minCores + " cores."); return true; } - logger.debug(host.name() + " cannot handle the job with " + minCores + " cores."); + logger.debug(host.getName() + " cannot handle the job with " + minCores + " cores."); return false; } From 9b57fe988618a19cc838d936e887b4935663abd0 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 20:38:04 +0200 Subject: [PATCH 66/78] fix: no namespace --- cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java index a22e3d040..ecbf93dec 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java +++ b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java @@ -85,11 +85,11 @@ public boolean canHandleNegativeCoresRequirement(int minCores) { logger.debug(host.getName() + " can handle the job with " + minCores + " cores."); return true; } - if (host.cores == host.idleCores) { - logger.debug(host.getName() + " can handle the job with " + minCores + " cores."); + if (cores == idleCores) { + logger.debug(getName() + " can handle the job with " + minCores + " cores."); return true; } - logger.debug(host.getName() + " cannot handle the job with " + minCores + " cores."); + logger.debug(getName() + " cannot handle the job with " + minCores + " cores."); return false; } From 8ff11e36987cc96aba75ea6e916d37f82fa4c1a0 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 20:41:02 +0200 Subject: [PATCH 67/78] fix: no namespace --- cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java index ecbf93dec..6f55235bf 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java +++ b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java @@ -82,7 +82,7 @@ public String getFacilityId() { } public boolean canHandleNegativeCoresRequirement(int minCores) { if (minCores > 0) { - logger.debug(host.getName() + " can handle the job with " + minCores + " cores."); + logger.debug(getName() + " can handle the job with " + minCores + " cores."); return true; } if (cores == idleCores) { From 79b6d1159ad7d01feacaf44f399095075e722f6b Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Mon, 5 Jun 2023 21:20:16 +0200 Subject: [PATCH 68/78] config: lower thresholds, jobs are not distributed to all hosts --- cuebot/src/main/resources/opencue.properties | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cuebot/src/main/resources/opencue.properties b/cuebot/src/main/resources/opencue.properties index a08522eb1..bfe2423ac 100644 --- a/cuebot/src/main/resources/opencue.properties +++ b/cuebot/src/main/resources/opencue.properties @@ -65,15 +65,15 @@ log.frame-log-root.default_os=${CUE_FRAME_LOG_DIR:/shots} dispatcher.job_query_max=20 # Number of seconds before waiting to book the same job from a different host. # "0" disables the job_lock -dispatcher.job_lock_expire_seconds=20 +dispatcher.job_lock_expire_seconds=10 # Concurrency level to allow on the job lock cache dispatcher.job_lock_concurrency_level=14 # Maximum number of frames to query from the DB to attempt to dispatch. dispatcher.frame_query_max=20 # Maximum number of frames to book at one time on the same host. -dispatcher.job_frame_dispatch_max=8 +dispatcher.job_frame_dispatch_max=4 # Maximum number of frames to dispatch from a host at one time. -dispatcher.host_frame_dispatch_max=12 +dispatcher.host_frame_dispatch_max=4 # Choose between different scheduling strategies: # - PRIORITY_ONLY: Sort by priority only # - FIFO: Whether or not to enable FIFO scheduling in the same priority. From 247327eb4c879d3d1b6c4cb35eea09f939cfcbf3 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Wed, 7 Jun 2023 11:15:54 +0200 Subject: [PATCH 69/78] pref: set frame_query_max=2 --- cuebot/src/main/resources/opencue.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuebot/src/main/resources/opencue.properties b/cuebot/src/main/resources/opencue.properties index bfe2423ac..69e8ac892 100644 --- a/cuebot/src/main/resources/opencue.properties +++ b/cuebot/src/main/resources/opencue.properties @@ -69,7 +69,7 @@ dispatcher.job_lock_expire_seconds=10 # Concurrency level to allow on the job lock cache dispatcher.job_lock_concurrency_level=14 # Maximum number of frames to query from the DB to attempt to dispatch. -dispatcher.frame_query_max=20 +dispatcher.frame_query_max=2 # Maximum number of frames to book at one time on the same host. dispatcher.job_frame_dispatch_max=4 # Maximum number of frames to dispatch from a host at one time. From a97b4c0c5a17517333beaf9f50503df8b0427b3b Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Fri, 9 Jun 2023 10:53:41 +0200 Subject: [PATCH 70/78] feat: accept dash character in tags (-) --- cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java index 51b53b161..3ed18ac49 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java @@ -117,7 +117,7 @@ public class JobSpec { public JobSpec() { } - public static final String NAME_REGEX = "^([\\w\\.]{3,})$"; + public static final String NAME_REGEX = "^([\\w\\.-]{3,})$"; public static final Pattern NAME_PATTERN = Pattern.compile(NAME_REGEX); From cc0f3a969c7677bdf60dcef6e2462986175b7090 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+kernattila@users.noreply.github.com> Date: Fri, 25 Aug 2023 16:10:58 +0200 Subject: [PATCH 71/78] feat: Added function and property `canHandleNegativeCoresRequest` doc: added some comments doc: Added some documentation doc: fix docstrings and parameters doc: added debug message doc: explain why we allow negative value doc: update debug message for rqd --- .../com/imageworks/spcue/DispatchHost.java | 41 +++++++++++-------- .../imageworks/spcue/LocalHostAssignment.java | 26 +++++++----- .../com/imageworks/spcue/VirtualProc.java | 19 ++------- .../com/imageworks/spcue/dao/LayerDao.java | 29 +++++++------ .../spcue/dispatcher/CoreUnitDispatcher.java | 13 ++---- .../spcue/service/JobManagerService.java | 4 +- .../com/imageworks/spcue/service/JobSpec.java | 16 +------- cuebot/src/main/resources/opencue.properties | 8 ++-- cuegui/cuegui/FilterDialog.py | 2 +- rqd/rqd/rqcore.py | 4 +- 10 files changed, 72 insertions(+), 90 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java index 6f55235bf..687e906ac 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java +++ b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java @@ -80,33 +80,42 @@ public String getAllocationId() { public String getFacilityId() { return facilityId; } - public boolean canHandleNegativeCoresRequirement(int minCores) { - if (minCores > 0) { - logger.debug(getName() + " can handle the job with " + minCores + " cores."); + + public boolean canHandleNegativeCoresRequest(int requestedCores) { + // Request is positive, no need to test further. + if (requestedCores > 0) { + logger.debug(getName() + " can handle the job with " + requestedCores + " cores."); return true; } + // All cores are available, validate the request. if (cores == idleCores) { - logger.debug(getName() + " can handle the job with " + minCores + " cores."); + logger.debug(getName() + " can handle the job with " + requestedCores + " cores."); return true; } - logger.debug(getName() + " cannot handle the job with " + minCores + " cores."); + // Some or all cores are busy, avoid booking again. + logger.debug(getName() + " cannot handle the job with " + requestedCores + " cores."); return false; } - public int handleNegativeCoresRequirement(int minCores) { - // Do not process positive requests - logger.debug("requested minCores:" + minCores); - if (minCores > 0) { - return minCores; + public int handleNegativeCoresRequirement(int requestedCores) { + // If we request a <=0 amount of cores, return positive core count. + + if (requestedCores > 0) { + // Do not process positive core requests. + logger.debug("Requested " + requestedCores + " cores."); + return requestedCores; } - // If request is negative but cores are already used, return 0 - if (minCores <=0 && idleCores < cores) { + if (requestedCores <=0 && idleCores < cores) { + // If request is negative but cores are already used, return 0. + // We don't want to overbook the host. + logger.debug("Requested " + requestedCores + " cores, but the host is busy and cannot book more jobs."); return 0; } - int requestedCores = idleCores + minCores; - logger.debug("Requested core number is " + minCores + " <= 0, " + - "matching up to max number with difference " + idleCores + " > " + requestedCores); - return requestedCores; + // Book all cores minus the request + int totalCores = idleCores + requestedCores; + logger.debug("Requested " + requestedCores + " cores <= 0, " + + idleCores + " cores are free, booking " + totalCores + " cores"); + return totalCores; } @Override diff --git a/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java b/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java index 93371834b..aa944e297 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java +++ b/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java @@ -67,19 +67,25 @@ public LocalHostAssignment(int maxCores, int threads, long maxMemory, int maxGpu this.maxGpuMemory = maxGpuMemory; } - public int handleNegativeCoresRequirement(int minCores) { - // Do not process positive requests - if (minCores > 0) { - return minCores; + public int handleNegativeCoresRequirement(int requestedCores) { + // If we request a <=0 amount of cores, return positive core count. + + if (requestedCores > 0) { + // Do not process positive core requests. + logger.debug("Requested " + requestedCores + " cores."); + return requestedCores; } - // If request is negative but cores are already used, return 0 - if (minCores <=0 && idleCoreUnits < threads) { + if (requestedCores <=0 && idleCoreUnits < threads) { + // If request is negative but cores are already used, return 0. + // We don't want to overbook the host. + logger.debug("Requested " + requestedCores + " cores, but the host is busy and cannot book more jobs."); return 0; } - int requestedCores = idleCoreUnits + minCores; - logger.debug("Requested core number is " + minCores + " <= 0, " + - "matching up to max number with difference " + idleCoreUnits + " > " + requestedCores); - return requestedCores; + // Book all cores minus the request + int totalCores = idleCoreUnits + requestedCores; + logger.debug("Requested " + requestedCores + " cores <= 0, " + + idleCoreUnits + " cores are free, booking " + totalCores + " cores"); + return totalCores; } @Override diff --git a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java index 29d1df396..b8dd11131 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/VirtualProc.java @@ -36,7 +36,7 @@ public class VirtualProc extends FrameEntity implements ProcInterface { public String os; public byte[] childProcesses; - public boolean canLaunch; + public boolean canHandleNegativeCoresRequest; public int coresReserved; public long memoryReserved; public long memoryUsed; @@ -100,7 +100,6 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { proc.unbooked = false; proc.isLocalDispatch = host.isLocalDispatch; -// proc.canLaunch = host.canHandleNegativeCoresRequirement(frame.minCores); proc.coresReserved = frame.minCores; proc.memoryReserved = frame.minMemory; proc.gpusReserved = frame.minGpus; @@ -115,18 +114,17 @@ public static final VirtualProc build(DispatchHost host, DispatchFrame frame) { */ if (host.strandedCores > 0) { - logger.debug("host.strandedCores > 0 : " + host.strandedCores); proc.coresReserved = proc.coresReserved + host.strandedCores; } - proc.canLaunch = host.canHandleNegativeCoresRequirement(proc.coresReserved); + proc.canHandleNegativeCoresRequest = host.canHandleNegativeCoresRequest(proc.coresReserved); if (proc.coresReserved == 0) { logger.debug("Reserving all cores"); proc.coresReserved = host.cores; } else if (proc.coresReserved < 0) { - logger.debug("Reserving all cores " + proc.coresReserved); + logger.debug("Reserving all cores minus " + proc.coresReserved); proc.coresReserved = host.cores + proc.coresReserved; } else if (proc.coresReserved >= 100) { @@ -148,7 +146,6 @@ else if (proc.coresReserved >= 100) { // CueUtil.isDayTime()) { if (host.threadMode == ThreadMode.ALL_VALUE) { proc.coresReserved = wholeCores * 100; - logger.debug("host.threadMode == ThreadMode.ALL_VALUE : proc.coresReserved=" + proc.coresReserved); } else { if (frame.threadable) { if (host.idleMemory - frame.minMemory @@ -156,7 +153,6 @@ else if (proc.coresReserved >= 100) { proc.coresReserved = wholeCores * 100; } else { proc.coresReserved = getCoreSpan(host, frame.minMemory); - logger.debug("proc.coresReserved = getCoreSpan(host, frame.minMemory):" + proc.coresReserved); } if (host.threadMode == ThreadMode.VARIABLE_VALUE @@ -183,7 +179,6 @@ else if (proc.coresReserved >= 100) { * original. */ if (proc.coresReserved < originalCores) { - logger.debug("proc.coresReserved < originalCores: " + proc.coresReserved + " < " + originalCores); proc.coresReserved = originalCores; } @@ -191,12 +186,10 @@ else if (proc.coresReserved >= 100) { * Check to ensure we haven't exceeded max cores. */ if (frame.maxCores > 0 && proc.coresReserved >= frame.maxCores) { - logger.debug("frame.maxCores > 0 && proc.coresReserved >= frame.maxCores"); proc.coresReserved = frame.maxCores; } if (proc.coresReserved > host.idleCores) { - logger.debug("proc.coresReserved > host.idleCores"); if (host.threadMode == ThreadMode.VARIABLE_VALUE && frame.threadable && wholeCores == 1) { throw new JobDispatchException( @@ -204,7 +197,6 @@ else if (proc.coresReserved >= 100) { } proc.coresReserved = wholeCores * 100; } - logger.debug("finally, proc.coresReserved = " + proc.coresReserved); } /* @@ -265,19 +257,14 @@ public static final VirtualProc build(DispatchHost host, */ public static int getCoreSpan(DispatchHost host, long minMemory) { int totalCores = (int) (Math.floor(host.cores / 100.0)); - logger.debug("getCoreSpan() -> totalCores = " + totalCores); int idleCores = (int) (Math.floor(host.idleCores / 100.0)); - logger.debug("getCoreSpan() -> idleCores = " + idleCores); if (idleCores < 1) { return 100; } long memPerCore = host.idleMemory / totalCores; - logger.debug("getCoreSpan() -> memPerCore = " + memPerCore); double procs = minMemory / (double) memPerCore; - logger.debug("getCoreSpan() -> procs = " + procs); int reserveCores = (int) (Math.round(procs)) * 100; - logger.debug("getCoreSpan() -> reserveCores = " + reserveCores); return reserveCores; } diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java b/cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java index cdcda5d03..c4b07edf9 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java @@ -59,7 +59,7 @@ public interface LayerDao { public List getLayerDetails(JobInterface job); /** - * Returns true if supplied layer is compelte. + * Returns true if supplied layer is complete. * * @param layer * @return boolean @@ -82,7 +82,7 @@ public interface LayerDao { void insertLayerDetail(LayerDetail l); /** - * gets a layer detail from an object that implments layer + * gets a layer detail from an object that implements layer * * @param layer * @return LayerDetail @@ -167,7 +167,7 @@ public interface LayerDao { void updateLayerTags(LayerInterface layer, Set tags); /** - * Insert a key/valye pair into the layer environment + * Insert a key/value pair into the layer environment * * @param layer * @param key @@ -282,7 +282,7 @@ public interface LayerDao { /** * Update all layers of the set type in the specified job - * with the new max cores requirement. + * with the new min cores requirement. * * @param job * @param cores @@ -292,7 +292,7 @@ public interface LayerDao { /** * Update all layers of the set type in the specified job - * with the new min cores requirement. + * with the new min gpu requirement. * * @param job * @param gpus @@ -304,9 +304,8 @@ public interface LayerDao { * Update a layer's max cores value, which limits how * much threading can go on. * - * @param job - * @param cores - * @param type + * @param layer + * @param threadable */ void updateThreadable(LayerInterface layer, boolean threadable); @@ -314,7 +313,7 @@ public interface LayerDao { * Update a layer's timeout value, which limits how * much the frame can run on a host. * - * @param job + * @param layer * @param timeout */ void updateTimeout(LayerInterface layer, int timeout); @@ -323,8 +322,8 @@ public interface LayerDao { * Update a layer's LLU timeout value, which limits how * much the frame can run on a host without updates in the log file. * - * @param job - * @param timeout + * @param layer + * @param timeout_llu */ void updateTimeoutLLU(LayerInterface layer, int timeout_llu); @@ -341,7 +340,7 @@ public interface LayerDao { /** * Appends a tag to the current set of tags. If the tag - * already exists than nothing happens. + * already exists then nothing happens. * * @param layer * @param val @@ -363,8 +362,9 @@ public interface LayerDao { * Update layer usage with processor time usage. * This happens when the proc has completed or failed some work. * - * @param proc + * @param layer * @param newState + * @param exitStatus */ void updateUsage(LayerInterface layer, ResourceUsage usage, int exitStatus); @@ -387,6 +387,9 @@ public interface LayerDao { /** * Enable/disable memory optimizer. + * + * @param layer + * @param state */ void enableMemoryOptimizer(LayerInterface layer, boolean state); diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java index c344681f4..8dc770131 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java @@ -258,14 +258,9 @@ public List dispatchHost(DispatchHost host, JobInterface job) { host.getName() + " " + host.idleCores + "/" + host.idleMemory + " on job " + job.getName()); - logger.debug("Frames summary before dispatch:"); for (DispatchFrame frame: frames) { - logger.debug("frame.minCores: " + frame.minCores + ", frame.command: " + frame.command); - } - for (DispatchFrame frame: frames) { - VirtualProc proc = VirtualProc.build(host, frame); - if (frame.minCores <= 0 && !proc.canLaunch) { + if (frame.minCores <= 0 && !proc.canHandleNegativeCoresRequest) { logger.debug("Cannot dispatch job, host is busy."); break; } @@ -273,8 +268,7 @@ public List dispatchHost(DispatchHost host, JobInterface job) { host.idleMemory < frame.minMemory || host.idleGpus < frame.minGpus || host.idleGpuMemory < frame.minGpuMemory) { - logger.debug("Cannot dispatch, host.idleCores < host.handleNegativeCoresRequirement(frame.minCores)"); - logger.debug(host.idleCores + " < " + host.handleNegativeCoresRequirement(frame.minCores) + " : frame.minCores"); + logger.debug("Cannot dispatch, insufficient resources."); break; } @@ -290,7 +284,8 @@ public List dispatchHost(DispatchHost host, JobInterface job) { boolean success = new DispatchFrameTemplate(proc, job, frame, false) { public void wrapDispatchFrame() { - logger.debug("Dispatching frame with minCores: " + frame.minCores + " on proc with coresReserved= " + proc.coresReserved); + logger.debug("Dispatching frame with " + frame.minCores + " minCores on proc with " + + proc.coresReserved + " coresReserved"); dispatch(frame, proc); dispatchSummary(proc, frame, "Booking"); return; diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java index 5d49caa24..27dc82021 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobManagerService.java @@ -275,9 +275,7 @@ public JobDetail createJob(BuildableJob buildableJob) { } if (layer.minimumCores > 0 && layer.minimumCores < Dispatcher.CORE_POINTS_RESERVED_MIN) { - logger.debug("layer.minimumCores < Dispatcher.CORE_POINTS_RESERVED_MIN"); - logger.debug(layer.minimumCores + " < " +Dispatcher.CORE_POINTS_RESERVED_MIN); - // layer.minimumCores = Dispatcher.CORE_POINTS_RESERVED_MIN; + layer.minimumCores = Dispatcher.CORE_POINTS_RESERVED_MIN; } logger.info("creating layer " + layer.name + " range: " + layer.range); diff --git a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java index 3ed18ac49..c64afa5d3 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java +++ b/cuebot/src/main/java/com/imageworks/spcue/service/JobSpec.java @@ -606,7 +606,6 @@ private void determineMinimumCores(Element layerTag, LayerDetail layer) { String cores = layerTag.getChildTextTrim("cores"); if (cores == null) { - logger.debug("cores == null"); return; } @@ -614,27 +613,17 @@ private void determineMinimumCores(Element layerTag, LayerDetail layer) { if (cores.contains(".")) { if (cores.contains("-")) { - logger.debug("cores is negative : " + cores); corePoints = (int) (Double.valueOf(cores) * 100 - .5); } else { - logger.debug("cores is positive : " + cores); corePoints = (int) (Double.valueOf(cores) * 100 + .5); } } else { - logger.debug("cores is an integer : " + cores); corePoints = Integer.valueOf(cores); } - logger.debug("submission cores : " + cores); - logger.debug("layer.minimumCores : " + layer.minimumCores); - logger.debug("corePoints : " + corePoints); - logger.debug("Dispatcher.CORE_POINTS_RESERVED_MIN : " + Dispatcher.CORE_POINTS_RESERVED_MIN); - if (corePoints > 0 && corePoints < Dispatcher.CORE_POINTS_RESERVED_MIN) { - logger.debug("corePoints > 0 && corePoints < Dispatcher.CORE_POINTS_RESERVED_MIN"); - //corePoints = Dispatcher.CORE_POINTS_RESERVED_DEFAULT; + corePoints = Dispatcher.CORE_POINTS_RESERVED_DEFAULT; } - logger.debug("corePoints after : " + corePoints); layer.minimumCores = corePoints; } @@ -668,14 +657,11 @@ private void determineThreadable(Element layerTag, LayerDetail layer) { // Must have at least 1 core to thread. if (layer.minimumCores > 0 && layer.minimumCores < 100) { layer.isThreadable = false; - logger.debug("not threadable : " + layer.minimumCores); } else if (layerTag.getChildTextTrim("threadable") != null) { layer.isThreadable = Convert.stringToBool( layerTag.getChildTextTrim("threadable")); - logger.debug("layerTag.getChildTextTrim('threadable') : " + layerTag.getChildTextTrim("threadable")); } - logger.debug("layer.isThreadable : " + layer.isThreadable); } private void determineResourceDefaults(Element layerTag, diff --git a/cuebot/src/main/resources/opencue.properties b/cuebot/src/main/resources/opencue.properties index 69e8ac892..a08522eb1 100644 --- a/cuebot/src/main/resources/opencue.properties +++ b/cuebot/src/main/resources/opencue.properties @@ -65,15 +65,15 @@ log.frame-log-root.default_os=${CUE_FRAME_LOG_DIR:/shots} dispatcher.job_query_max=20 # Number of seconds before waiting to book the same job from a different host. # "0" disables the job_lock -dispatcher.job_lock_expire_seconds=10 +dispatcher.job_lock_expire_seconds=20 # Concurrency level to allow on the job lock cache dispatcher.job_lock_concurrency_level=14 # Maximum number of frames to query from the DB to attempt to dispatch. -dispatcher.frame_query_max=2 +dispatcher.frame_query_max=20 # Maximum number of frames to book at one time on the same host. -dispatcher.job_frame_dispatch_max=4 +dispatcher.job_frame_dispatch_max=8 # Maximum number of frames to dispatch from a host at one time. -dispatcher.host_frame_dispatch_max=4 +dispatcher.host_frame_dispatch_max=12 # Choose between different scheduling strategies: # - PRIORITY_ONLY: Sort by priority only # - FIFO: Whether or not to enable FIFO scheduling in the same priority. diff --git a/cuegui/cuegui/FilterDialog.py b/cuegui/cuegui/FilterDialog.py index 1b60c1d1f..31d7a08d7 100644 --- a/cuegui/cuegui/FilterDialog.py +++ b/cuegui/cuegui/FilterDialog.py @@ -454,7 +454,7 @@ def createAction(self): "Create Action", "What value should this property be set to?", 0, - -8, + -8, # Allow minimum core value to be negative, booking all host cores minus this value. 50000, 2) value = float(value) diff --git a/rqd/rqd/rqcore.py b/rqd/rqd/rqcore.py index 541947182..e01bf81ce 100644 --- a/rqd/rqd/rqcore.py +++ b/rqd/rqd/rqcore.py @@ -903,9 +903,7 @@ def launchFrame(self, runFrame): err = "Not launching, numCores must be > 0, got {}".format(cores_to_reserve) log.warning(err) raise rqd.rqexceptions.CoreReservationFailureException(err) - - log.info("Requested core number is negative {}, " - "matching up to max number with difference {} > {}".format( + log.info("Requested {} cores <= 0, {} cores are free, booking {}} cores".format( runFrame.num_cores, self.cores.idle_cores, cores_to_reserve) From 9eddb03c04f230280071c2b77ae665e9f3e7332a Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+KernAttila@users.noreply.github.com> Date: Sat, 26 Aug 2023 16:18:09 +0200 Subject: [PATCH 72/78] fix: line too long --- cuegui/cuegui/FilterDialog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuegui/cuegui/FilterDialog.py b/cuegui/cuegui/FilterDialog.py index 31d7a08d7..48ff6080b 100644 --- a/cuegui/cuegui/FilterDialog.py +++ b/cuegui/cuegui/FilterDialog.py @@ -454,7 +454,7 @@ def createAction(self): "Create Action", "What value should this property be set to?", 0, - -8, # Allow minimum core value to be negative, booking all host cores minus this value. + -8, # Minimum core value can be <=0, booking all cores minus this value. 50000, 2) value = float(value) From 1a93ca19753a2f9f1caee0e983ce98879af42c72 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+KernAttila@users.noreply.github.com> Date: Sat, 26 Aug 2023 16:28:16 +0200 Subject: [PATCH 73/78] revert: no need to do this anymore, cuebot handles calculation before inserting proc --- .../conf/ddl/postgres/migrations/V1__Initial_schema.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V1__Initial_schema.sql b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V1__Initial_schema.sql index c24f239ed..c9d77140b 100644 --- a/cuebot/src/main/resources/conf/ddl/postgres/migrations/V1__Initial_schema.sql +++ b/cuebot/src/main/resources/conf/ddl/postgres/migrations/V1__Initial_schema.sql @@ -2661,7 +2661,7 @@ EXECUTE PROCEDURE trigger__before_insert_folder(); CREATE FUNCTION trigger__before_insert_proc() RETURNS TRIGGER AS $body$ BEGIN - IF NEW.int_cores_reserved < 0 THEN + IF NEW.int_cores_reserved <= 0 THEN RAISE EXCEPTION 'failed to allocate proc, tried to allocate 0 cores'; END IF; RETURN NEW; From 962ce55ab17243c5cb4ad936cce6d4e13c123fa7 Mon Sep 17 00:00:00 2001 From: Kern Attila GERMAIN <5556461+KernAttila@users.noreply.github.com> Date: Sat, 26 Aug 2023 16:48:29 +0200 Subject: [PATCH 74/78] revert: no need to handle negative cores in rqd, cuebot handles the calculation --- rqd/rqd/rqcore.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/rqd/rqd/rqcore.py b/rqd/rqd/rqcore.py index e01bf81ce..224485f2b 100644 --- a/rqd/rqd/rqcore.py +++ b/rqd/rqd/rqcore.py @@ -896,19 +896,10 @@ def launchFrame(self, runFrame): log.warning(err) raise rqd.rqexceptions.InvalidUserException(err) - # Handle zero/negative cores request if runFrame.num_cores <= 0: - cores_to_reserve = self.cores.idle_cores + runFrame.num_cores - if cores_to_reserve <= 0: - err = "Not launching, numCores must be > 0, got {}".format(cores_to_reserve) - log.warning(err) - raise rqd.rqexceptions.CoreReservationFailureException(err) - log.info("Requested {} cores <= 0, {} cores are free, booking {}} cores".format( - runFrame.num_cores, - self.cores.idle_cores, - cores_to_reserve) - ) - runFrame.num_cores = cores_to_reserve + err = "Not launching, numCores must be > 0" + log.warning(err) + raise rqd.rqexceptions.CoreReservationFailureException(err) # See if all requested cores are available self.__threadLock.acquire() From 302a67ff6d843715969bce6532d85fce46f86a07 Mon Sep 17 00:00:00 2001 From: KernAttila <5556461+KernAttila@users.noreply.github.com> Date: Mon, 26 Aug 2024 00:16:31 +0200 Subject: [PATCH 75/78] chores: fix variable name, add comment --- .../src/main/java/com/imageworks/spcue/DispatchHost.java | 1 + .../java/com/imageworks/spcue/LocalHostAssignment.java | 1 + .../imageworks/spcue/dispatcher/CoreUnitDispatcher.java | 6 +++--- .../imageworks/spcue/dispatcher/HostReportHandler.java | 8 ++++---- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java index 687e906ac..f01724e17 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java +++ b/cuebot/src/main/java/com/imageworks/spcue/DispatchHost.java @@ -99,6 +99,7 @@ public boolean canHandleNegativeCoresRequest(int requestedCores) { public int handleNegativeCoresRequirement(int requestedCores) { // If we request a <=0 amount of cores, return positive core count. + // Request -2 on a 24 core machine will return 22. if (requestedCores > 0) { // Do not process positive core requests. diff --git a/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java b/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java index aa944e297..65ce05c7e 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java +++ b/cuebot/src/main/java/com/imageworks/spcue/LocalHostAssignment.java @@ -69,6 +69,7 @@ public LocalHostAssignment(int maxCores, int threads, long maxMemory, int maxGpu public int handleNegativeCoresRequirement(int requestedCores) { // If we request a <=0 amount of cores, return positive core count. + // Request -2 on a 24 core machine will return 22. if (requestedCores > 0) { // Do not process positive core requests. diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java index 48d31fe15..226d9466c 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/CoreUnitDispatcher.java @@ -261,14 +261,14 @@ public List dispatchHost(DispatchHost host, JobInterface job) { String[] selfishServices = env.getProperty("dispatcher.frame.selfish.services", "").split(","); for (DispatchFrame frame: frames) { - + VirtualProc proc = VirtualProc.build(host, frame, selfishServices); - + if (frame.minCores <= 0 && !proc.canHandleNegativeCoresRequest) { logger.debug("Cannot dispatch job, host is busy."); break; } - + if (host.idleCores < host.handleNegativeCoresRequirement(frame.minCores) || host.idleMemory < frame.minMemory || host.idleGpus < frame.minGpus || diff --git a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java index 71488d3d7..12a584d16 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dispatcher/HostReportHandler.java @@ -244,7 +244,7 @@ public void handleHostReport(HostReport report, boolean isBoot) { */ String msg = null; boolean hasLocalJob = bookingManager.hasLocalHostAssignment(host); - int cores_to_reserve = host.handleNegativeCoresRequirement(Dispatcher.CORE_POINTS_RESERVED_MIN); + int coresToReserve = host.handleNegativeCoresRequirement(Dispatcher.CORE_POINTS_RESERVED_MIN); if (hasLocalJob) { List lcas = @@ -256,10 +256,10 @@ public void handleHostReport(HostReport report, boolean isBoot) { if (!isTempDirStorageEnough(report.getHost().getTotalMcp(), report.getHost().getFreeMcp(), host.os)) { msg = String.format( - "%s doens't have enough free space in the temporary directory (mcp), %dMB", + "%s doesn't have enough free space in the temporary directory (mcp), %dMB", host.name, (report.getHost().getFreeMcp()/1024)); } - else if (cores_to_reserve <= 0 || host.idleCores < Dispatcher.CORE_POINTS_RESERVED_MIN) { + else if (coresToReserve <= 0 || host.idleCores < Dispatcher.CORE_POINTS_RESERVED_MIN) { msg = String.format("%s doesn't have enough idle cores, %d needs %d", host.name, host.idleCores, Dispatcher.CORE_POINTS_RESERVED_MIN); } @@ -268,7 +268,7 @@ else if (host.idleMemory < Dispatcher.MEM_RESERVED_MIN) { host.name, host.idleMemory, Dispatcher.MEM_RESERVED_MIN); } else if (report.getHost().getFreeMem() < CueUtil.MB512) { - msg = String.format("%s doens't have enough free system mem, %d needs %d", + msg = String.format("%s doesn't have enough free system mem, %d needs %d", host.name, report.getHost().getFreeMem(), Dispatcher.MEM_RESERVED_MIN); } else if(!host.hardwareState.equals(HardwareState.UP)) { From 1f7235f8236eaf95c5bcd3d83860aa7ebdb53e2c Mon Sep 17 00:00:00 2001 From: KernAttila <5556461+KernAttila@users.noreply.github.com> Date: Sat, 21 Sep 2024 01:26:04 +0200 Subject: [PATCH 76/78] feat: change the layer "cores" label for zero or negative values, stating "ALL" or "ALL (-x)". --- cuegui/cuegui/LayerMonitorTree.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cuegui/cuegui/LayerMonitorTree.py b/cuegui/cuegui/LayerMonitorTree.py index 6ddd6cf18..281681ae1 100644 --- a/cuegui/cuegui/LayerMonitorTree.py +++ b/cuegui/cuegui/LayerMonitorTree.py @@ -67,7 +67,7 @@ def __init__(self, parent): data=lambda layer: displayRange(layer), tip="The range of frames that the layer should render.") self.addColumn("Cores", 45, id=6, - data=lambda layer: "%.2f" % layer.data.min_cores, + data=lambda layer: self.labelCoresColumn(layer.data.min_cores), sort=lambda layer: layer.data.min_cores, tip="The number of cores that the frames in this layer\n" "will reserve as a minimum.") @@ -181,6 +181,14 @@ def updateRequest(self): since last updated""" self.ticksWithoutUpdate = 9999 + def labelCoresColumn(self, reserved_cores): + """Returns the reserved cores for a job""" + if reserved_cores > 0: + return "%.2f" % reserved_cores + if reserved_cores == 0: + return "ALL" + return "ALL (%.2f)" % reserved_cores + # pylint: disable=inconsistent-return-statements def setJob(self, job): """Sets the current job. From c1cfbdc23e24d9753842ffa5720847bcf2923c6d Mon Sep 17 00:00:00 2001 From: KernAttila <5556461+KernAttila@users.noreply.github.com> Date: Sat, 21 Sep 2024 01:26:44 +0200 Subject: [PATCH 77/78] doc: update the layer "cores" column tooltip to inform the user about the new negative cores feature. --- cuegui/cuegui/LayerMonitorTree.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cuegui/cuegui/LayerMonitorTree.py b/cuegui/cuegui/LayerMonitorTree.py index 281681ae1..0f110f874 100644 --- a/cuegui/cuegui/LayerMonitorTree.py +++ b/cuegui/cuegui/LayerMonitorTree.py @@ -70,7 +70,9 @@ def __init__(self, parent): data=lambda layer: self.labelCoresColumn(layer.data.min_cores), sort=lambda layer: layer.data.min_cores, tip="The number of cores that the frames in this layer\n" - "will reserve as a minimum.") + "will reserve as a minimum." + "Zero or negative value indicate that the layer will use\n" + "all available cores on the machine, minus this value.") self.addColumn("Memory", 60, id=7, data=lambda layer: cuegui.Utils.memoryToString(layer.data.min_memory), sort=lambda layer: layer.data.min_memory, From 318021c0e63077652471201ec88c73c89130de19 Mon Sep 17 00:00:00 2001 From: KernAttila <5556461+KernAttila@users.noreply.github.com> Date: Sat, 21 Sep 2024 01:29:05 +0200 Subject: [PATCH 78/78] fix: missing space in sql statement. --- .../java/com/imageworks/spcue/dao/postgres/LayerDaoJdbc.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/LayerDaoJdbc.java b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/LayerDaoJdbc.java index f4720ecce..78753f578 100644 --- a/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/LayerDaoJdbc.java +++ b/cuebot/src/main/java/com/imageworks/spcue/dao/postgres/LayerDaoJdbc.java @@ -81,7 +81,7 @@ public void insertLayerOutput(LayerInterface layer, String filespec) { "FROM " + "layer_output " + "WHERE " + - "pk_layer = ?" + + "pk_layer = ? " + "ORDER BY " + "ser_order";