-
Notifications
You must be signed in to change notification settings - Fork 7
/
Patch-for-gpu-GRES.patch
178 lines (158 loc) · 4.69 KB
/
Patch-for-gpu-GRES.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
>From 5a82ed4512eb26e94288e1a78e0f3f9f7e52b1f2 Mon Sep 17 00:00:00 2001
From: Jonathan Michalon <[email protected]>
Date: Wed, 18 Jan 2012 11:41:55 +0100
Subject: [PATCH 1/2] =?UTF-8?q?Patch=20from=20the=20Internet=20(Mariusz=20Mamo=C5=84ski=20<mamonski=20at=20man.poznan.pl>)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This patch parses GRES lines, adjusts available ressources on nodes
and puts MAX_MGRES from 4 to 16.
Signed-off-by: Jonathan Michalon <[email protected]>
---
include/moab.h | 3 +-
include/msched-common.h | 2 +-
src/moab/MConst.c | 1 +
src/moab/MJob.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++
src/moab/MNode.c | 6 +++++
5 files changed, 58 insertions(+), 2 deletions(-)
diff --git a/include/moab.h b/include/moab.h
index dccc349..d68b462 100644
--- a/include/moab.h
+++ b/include/moab.h
@@ -1692,7 +1692,8 @@ enum MXAttrType {
mxaSID,
mxaSJID,
mxaTPN,
- mxaTRL };
+ mxaTRL,
+ mxaGRes };
enum MauiAppSimCommandEnum {
mascNONE,
diff --git a/src/moab/MConst.c b/src/moab/MConst.c
index b80776f..c66238f 100644
--- a/src/moab/MConst.c
+++ b/src/moab/MConst.c
@@ -938,6 +938,7 @@ const char *MRMXAttr[] = {
"SJID",
"TPN",
"TRL",
+ "GRES",
NULL };
const char *MJobFlags[] = {
diff --git a/src/moab/MJob.c b/src/moab/MJob.c
index a98cff9..e2c0956 100644
--- a/src/moab/MJob.c
+++ b/src/moab/MJob.c
@@ -4072,7 +4072,55 @@ int MJobProcessExtensionString(
RQ->TaskRequestList[2]);
break;
+ case mxaGRes:
+ MUStrCpy(tmpLine,Value,sizeof(tmpLine));
+
+ /* FORMAT: GRES:<RESTYPE>[@<COUNT>][:<RESTYPE>[@<COUNT>]]
+ GRES:tape:matlab@2
+ */
+
+ ptr = MUStrTok(tmpLine,":",&TokPtr2);
+
+ while (ptr != NULL)
+ {
+ char *gresName = NULL;
+ char *p = NULL;
+ int gresCount = 1;
+ int rIndex = 0;
+
+ if ((p = strchr(ptr,'@')) != NULL)
+ {
+ *p = '\0';
+ gresCount = strtol(p+1, NULL, 10);
+ }
+
+ gresName = ptr;
+
+ DBG(3,fCONFIG) DPrint("INFO: GRES requested = %s@%d\n",
+ gresName,
+ gresCount);
+
+ if ((rIndex = MUMAGetIndex(eGRes,gresName,mVerify)) == 0)
+ {
+ DBG(1,fPBS) DPrint("ALERT: Unknown GRES '%s'\n",
+ gresName);
+ }
+ else if (gresCount <= 0)
+ {
+ DBG(1,fPBS) DPrint("ALERT: Invalid GRES count %d\n",
+ gresCount);
+ }
+ else
+ {
+ RQ->DRes.GRes[rIndex].count = gresCount;
+ RQ->DRes.GRes[0].count +=gresCount;
+ }
+
+ ptr = MUStrTok(NULL,":",&TokPtr2);
+ }
+
+ break;
default:
/* not handled */
diff --git a/src/moab/MNode.c b/src/moab/MNode.c
index 557c718..c37ccb4 100644
--- a/src/moab/MNode.c
+++ b/src/moab/MNode.c
@@ -4947,6 +4947,12 @@ int MNodeAdjustAvailResources(
*ARes[rindex] = CRes[rindex];
}
} /* END for (rindex) */
+
+ /* Adjust GRES */
+ for (rindex = 0; rindex < MAX_MGRES; rindex++)
+ {
+ N->ARes.GRes[rindex].count = MAX(0,(N->CRes.GRes[rindex].count - N->DRes.GRes[rindex].count));
+ }
return(SUCCESS);
} /* END MNodeAdjustAvailResources() */
--
1.7.2.5
>From 63907f27ead94cfe4b229d7ad991cb465a2517e9 Mon Sep 17 00:00:00 2001
From: Jonathan Michalon <[email protected]>
Date: Tue, 24 Jan 2012 19:29:14 +0100
Subject: [PATCH 2/2] Hack to prevent Defer for GRES 'gpu'
Signed-off-by: Jonathan Michalon <[email protected]>
---
src/moab/MJob.c | 19 +++++++++++++++++++
1 files changed, 19 insertions(+), 0 deletions(-)
diff --git a/src/moab/MJob.c b/src/moab/MJob.c
index e2c0956..ec4b9a5 100644
--- a/src/moab/MJob.c
+++ b/src/moab/MJob.c
@@ -3179,6 +3179,7 @@ int MJobSetHold(
{
char Line[MAX_MLINE];
+ int rIndex;
const char *FName = "MJobSetHold";
@@ -3240,6 +3241,24 @@ int MJobSetHold(
return(SUCCESS);
}
+
+ /* workaround to avoid defer state for GRES "gpu": when they are all utilized
+ * a noResource is triggered, but we don't want a hold */
+ /* only if NoResources, other reasons may be valid */
+ if (HoldReason == mhrNoResources)
+ {
+ /* check whether GPUs are defined as GRES somewhere */
+ if ((rIndex = MUMAGetIndex(eGRes,"gpu",mVerify)) != 0)
+ {
+ /* check whether our job requires GPUs */
+ if (J->Req[0]->DRes.GRes[rIndex].count > 0)
+ {
+ DBG(2,fSCHED) DPrint("INFO: defer disabled (hack for GPUs)\n");
+
+ return(SUCCESS);
+ }
+ }
+ }
if (J->Hold & (1 << mhBatch))
{
--
1.7.2.5