5981 |
07 Jul 20 |
nicklas |
1 |
package net.sf.basedb.opengrid.engine; |
5981 |
07 Jul 20 |
nicklas |
2 |
|
6672 |
11 Apr 22 |
nicklas |
3 |
import java.text.SimpleDateFormat; |
5982 |
07 Jul 20 |
nicklas |
4 |
import java.util.Arrays; |
6672 |
11 Apr 22 |
nicklas |
5 |
import java.util.Date; |
5984 |
10 Jul 20 |
nicklas |
6 |
import java.util.HashMap; |
5982 |
07 Jul 20 |
nicklas |
7 |
import java.util.HashSet; |
5984 |
10 Jul 20 |
nicklas |
8 |
import java.util.List; |
6672 |
11 Apr 22 |
nicklas |
9 |
import java.util.Locale; |
5984 |
10 Jul 20 |
nicklas |
10 |
import java.util.Map; |
5982 |
07 Jul 20 |
nicklas |
11 |
import java.util.Set; |
5984 |
10 Jul 20 |
nicklas |
12 |
import java.util.regex.Pattern; |
5982 |
07 Jul 20 |
nicklas |
13 |
|
5984 |
10 Jul 20 |
nicklas |
14 |
import org.jdom2.Document; |
5984 |
10 Jul 20 |
nicklas |
15 |
import org.jdom2.Element; |
5984 |
10 Jul 20 |
nicklas |
16 |
import org.jdom2.filter.ElementFilter; |
5984 |
10 Jul 20 |
nicklas |
17 |
import org.jdom2.xpath.XPathFactory; |
5984 |
10 Jul 20 |
nicklas |
18 |
import org.slf4j.LoggerFactory; |
5984 |
10 Jul 20 |
nicklas |
19 |
|
5984 |
10 Jul 20 |
nicklas |
20 |
import net.sf.basedb.core.Job; |
5984 |
10 Jul 20 |
nicklas |
21 |
import net.sf.basedb.opengrid.CmdResult; |
5982 |
07 Jul 20 |
nicklas |
22 |
import net.sf.basedb.opengrid.JobDefinition; |
5984 |
10 Jul 20 |
nicklas |
23 |
import net.sf.basedb.opengrid.JobIdentifier; |
5984 |
10 Jul 20 |
nicklas |
24 |
import net.sf.basedb.opengrid.JobStatus; |
6629 |
07 Mar 22 |
nicklas |
25 |
import net.sf.basedb.opengrid.OpenGrid; |
5984 |
10 Jul 20 |
nicklas |
26 |
import net.sf.basedb.opengrid.OpenGridSession; |
6629 |
07 Mar 22 |
nicklas |
27 |
import net.sf.basedb.opengrid.ScriptBuilder; |
6672 |
11 Apr 22 |
nicklas |
28 |
import net.sf.basedb.opengrid.config.BatchConfig; |
5981 |
07 Jul 20 |
nicklas |
29 |
import net.sf.basedb.opengrid.config.ClusterConfig; |
5982 |
07 Jul 20 |
nicklas |
30 |
import net.sf.basedb.opengrid.config.JobConfig; |
6629 |
07 Mar 22 |
nicklas |
31 |
import net.sf.basedb.opengrid.filetransfer.InputStreamUploadSource; |
5982 |
07 Jul 20 |
nicklas |
32 |
import net.sf.basedb.opengrid.filetransfer.StringUploadSource; |
5982 |
07 Jul 20 |
nicklas |
33 |
import net.sf.basedb.opengrid.filetransfer.UploadSource; |
7075 |
27 Mar 23 |
nicklas |
34 |
import net.sf.basedb.opengrid.service.OpenGridService; |
5984 |
10 Jul 20 |
nicklas |
35 |
import net.sf.basedb.util.Values; |
5984 |
10 Jul 20 |
nicklas |
36 |
import net.sf.basedb.util.XmlUtil2; |
7075 |
27 Mar 23 |
nicklas |
37 |
import net.sf.basedb.util.extensions.logging.ExtensionsLog; |
7075 |
27 Mar 23 |
nicklas |
38 |
import net.sf.basedb.util.extensions.logging.ExtensionsLogger; |
6672 |
11 Apr 22 |
nicklas |
39 |
import net.sf.basedb.util.formatter.DateFormatter; |
5981 |
07 Jul 20 |
nicklas |
40 |
|
5981 |
07 Jul 20 |
nicklas |
41 |
/** |
5981 |
07 Jul 20 |
nicklas |
Cluster engine implementation for Open Grid clusters. |
5981 |
07 Jul 20 |
nicklas |
@author nicklas |
5981 |
07 Jul 20 |
nicklas |
@since 1.4 |
5981 |
07 Jul 20 |
nicklas |
45 |
*/ |
5981 |
07 Jul 20 |
nicklas |
46 |
public class OpenGridEngine |
5981 |
07 Jul 20 |
nicklas |
47 |
implements ClusterEngine |
5981 |
07 Jul 20 |
nicklas |
48 |
{ |
5981 |
07 Jul 20 |
nicklas |
49 |
|
7075 |
27 Mar 23 |
nicklas |
50 |
private static final ExtensionsLogger logger = |
7075 |
27 Mar 23 |
nicklas |
51 |
ExtensionsLog.getLogger(OpenGridService.ID, true).wrap(LoggerFactory.getLogger(OpenGridEngine.class)); |
5984 |
10 Jul 20 |
nicklas |
52 |
|
5982 |
07 Jul 20 |
nicklas |
53 |
private static final Set<String> ignoredQsubOptions = |
5982 |
07 Jul 20 |
nicklas |
54 |
new HashSet<>(Arrays.asList("p", "terse", "S", "N", "wd", "o", "e")); |
5982 |
07 Jul 20 |
nicklas |
55 |
|
6672 |
11 Apr 22 |
nicklas |
56 |
/** |
6672 |
11 Apr 22 |
nicklas |
Format to use for the -a parameter that contols the earliest |
6672 |
11 Apr 22 |
nicklas |
start time for a job: YYMMDDhhmm.SS |
6672 |
11 Apr 22 |
nicklas |
Example: 2204081145.51 |
6672 |
11 Apr 22 |
nicklas |
@since 1.5 |
6672 |
11 Apr 22 |
nicklas |
61 |
*/ |
6672 |
11 Apr 22 |
nicklas |
62 |
public static final DateFormatter STARTTIME_DATE = |
6672 |
11 Apr 22 |
nicklas |
63 |
new DateFormatter(new SimpleDateFormat("yyMMddHHmm.ss", Locale.ENGLISH)); |
6672 |
11 Apr 22 |
nicklas |
64 |
|
5981 |
07 Jul 20 |
nicklas |
65 |
public OpenGridEngine() |
5981 |
07 Jul 20 |
nicklas |
66 |
{} |
5981 |
07 Jul 20 |
nicklas |
67 |
|
5981 |
07 Jul 20 |
nicklas |
68 |
@Override |
5981 |
07 Jul 20 |
nicklas |
69 |
public void setDefaultConfig(ClusterConfig config) |
5981 |
07 Jul 20 |
nicklas |
70 |
{ |
5981 |
07 Jul 20 |
nicklas |
71 |
config.setOpenGridInfoCommand("qstat -help | head -n1"); |
5981 |
07 Jul 20 |
nicklas |
72 |
config.setTmpFolder("${TMPDIR}", false); // TMPDIR is a variable set by the Open Grid Scheduler |
5981 |
07 Jul 20 |
nicklas |
73 |
} |
5981 |
07 Jul 20 |
nicklas |
74 |
|
5982 |
07 Jul 20 |
nicklas |
75 |
@Override |
6614 |
28 Feb 22 |
nicklas |
76 |
public JobSubmission createJobSubmission(OpenGridSession session, JobDefinition job, String workFolder, String tmpFolder) |
5982 |
07 Jul 20 |
nicklas |
77 |
{ |
6629 |
07 Mar 22 |
nicklas |
78 |
UploadSource qsub = new StringUploadSource("submit.sh", createQsubScript(job, workFolder, tmpFolder)); |
6629 |
07 Mar 22 |
nicklas |
79 |
UploadSource run = getScript("run.sh"); |
6629 |
07 Mar 22 |
nicklas |
80 |
UploadSource jobScript = new StringUploadSource("job.sh", createJobScript(job, workFolder, tmpFolder)); |
6629 |
07 Mar 22 |
nicklas |
81 |
return new JobSubmission("qsub " + workFolder + "/submit.sh", Arrays.asList(qsub, run, jobScript)); |
5982 |
07 Jul 20 |
nicklas |
82 |
} |
5982 |
07 Jul 20 |
nicklas |
83 |
|
5984 |
10 Jul 20 |
nicklas |
84 |
@Override |
5984 |
10 Jul 20 |
nicklas |
85 |
public CmdResult<JobStatus> getStatusInQueue(OpenGridSession session, JobIdentifier jobId, int timeAdjustment) |
5984 |
10 Jul 20 |
nicklas |
86 |
{ |
5984 |
10 Jul 20 |
nicklas |
87 |
if (qstatXml == null) qstatXml = session.execute(new QstatXmlCmd(), 30); |
5984 |
10 Jul 20 |
nicklas |
88 |
return new QstatCmd(qstatXml, jobId, timeAdjustment); |
5984 |
10 Jul 20 |
nicklas |
89 |
} |
5984 |
10 Jul 20 |
nicklas |
90 |
|
5984 |
10 Jul 20 |
nicklas |
91 |
@Override |
5984 |
10 Jul 20 |
nicklas |
92 |
public CmdResult<JobStatus> getStatusIfFinished(OpenGridSession session, JobIdentifier jobId, int timeAdjustment) |
5984 |
10 Jul 20 |
nicklas |
93 |
{ |
5984 |
10 Jul 20 |
nicklas |
94 |
return session.execute(new QacctCmd(jobId, timeAdjustment), 5); |
5984 |
10 Jul 20 |
nicklas |
95 |
} |
5984 |
10 Jul 20 |
nicklas |
96 |
|
5984 |
10 Jul 20 |
nicklas |
97 |
@Override |
5984 |
10 Jul 20 |
nicklas |
98 |
public CmdResult<String> cancelJob(OpenGridSession session, JobIdentifier jobId) |
5984 |
10 Jul 20 |
nicklas |
99 |
{ |
5984 |
10 Jul 20 |
nicklas |
100 |
return session.executeCmd("qdel " + jobId.getClusterJobId(), 5); |
5984 |
10 Jul 20 |
nicklas |
101 |
} |
5984 |
10 Jul 20 |
nicklas |
102 |
|
5982 |
07 Jul 20 |
nicklas |
103 |
/** |
5982 |
07 Jul 20 |
nicklas |
Generates a script that can be submitted with 'qsub' to the Open Grid Cluster. |
5982 |
07 Jul 20 |
nicklas |
105 |
*/ |
5982 |
07 Jul 20 |
nicklas |
106 |
public String createQsubScript(JobDefinition job, String workFolder, String tmpFolder) |
5982 |
07 Jul 20 |
nicklas |
107 |
{ |
5982 |
07 Jul 20 |
nicklas |
108 |
JobConfig config = job.getConfig(); |
6672 |
11 Apr 22 |
nicklas |
109 |
BatchConfig batchConfig = job.getBatchConfig(); |
5982 |
07 Jul 20 |
nicklas |
110 |
|
6629 |
07 Mar 22 |
nicklas |
111 |
ScriptBuilder script = new ScriptBuilder(); |
6629 |
07 Mar 22 |
nicklas |
112 |
script.comment("--- start of qsub options ---"); |
6629 |
07 Mar 22 |
nicklas |
113 |
script.cmd("#$ -S /bin/bash"); // Note! Need to use cmd() instead of comment() to avoid space between # and $ |
6629 |
07 Mar 22 |
nicklas |
114 |
script.cmd("#$ -terse"); // qsub will report <job-id> only |
6629 |
07 Mar 22 |
nicklas |
115 |
script.cmd("#$ -N " + job.getName()); // Name of the job |
6629 |
07 Mar 22 |
nicklas |
116 |
script.cmd("#$ -wd " + workFolder); // Working directory (must already exist) |
6629 |
07 Mar 22 |
nicklas |
117 |
script.cmd("#$ -o stdout"); // Stdout is saved to this file |
6629 |
07 Mar 22 |
nicklas |
118 |
script.cmd("#$ -e stderr"); // Stderr is saved to this file |
5982 |
07 Jul 20 |
nicklas |
119 |
if (config.getPriority() != null) |
5982 |
07 Jul 20 |
nicklas |
120 |
{ |
6629 |
07 Mar 22 |
nicklas |
121 |
script.cmd("#$ -p " + config.getPriority()); |
5982 |
07 Jul 20 |
nicklas |
122 |
} |
6672 |
11 Apr 22 |
nicklas |
123 |
if (batchConfig != null) |
6672 |
11 Apr 22 |
nicklas |
124 |
{ |
6672 |
11 Apr 22 |
nicklas |
125 |
long startTime = batchConfig.getNextStartTime(); |
6672 |
11 Apr 22 |
nicklas |
126 |
if (startTime > 0) |
6672 |
11 Apr 22 |
nicklas |
127 |
{ |
6672 |
11 Apr 22 |
nicklas |
128 |
script.cmd("#$ -a "+STARTTIME_DATE.format(new Date(startTime))); |
6672 |
11 Apr 22 |
nicklas |
129 |
} |
6672 |
11 Apr 22 |
nicklas |
130 |
} |
6672 |
11 Apr 22 |
nicklas |
131 |
|
5982 |
07 Jul 20 |
nicklas |
132 |
config.appendQsubOptionsToScript(script, ignoredQsubOptions); |
6629 |
07 Mar 22 |
nicklas |
133 |
script.comment("--- end of qsub options ---\n"); |
6629 |
07 Mar 22 |
nicklas |
134 |
script.comment("--- setting up work directories and other options from JobDefinition ---\n"); |
6629 |
07 Mar 22 |
nicklas |
135 |
script.export("WD", workFolder); |
6629 |
07 Mar 22 |
nicklas |
136 |
script.export("TMPDIR", tmpFolder); |
6629 |
07 Mar 22 |
nicklas |
137 |
if (job.getDebug()) script.export("JOB_DEBUG", "1"); |
6629 |
07 Mar 22 |
nicklas |
138 |
script.cmd("./run.sh"); |
5982 |
07 Jul 20 |
nicklas |
139 |
return script.toString(); |
5982 |
07 Jul 20 |
nicklas |
140 |
} |
5982 |
07 Jul 20 |
nicklas |
141 |
|
6629 |
07 Mar 22 |
nicklas |
142 |
/** |
6629 |
07 Mar 22 |
nicklas |
Generates a script that executes the job script. |
6629 |
07 Mar 22 |
nicklas |
144 |
*/ |
6629 |
07 Mar 22 |
nicklas |
145 |
public String createJobScript(JobDefinition job, String workFolder, String tmpFolder) |
6629 |
07 Mar 22 |
nicklas |
146 |
{ |
6629 |
07 Mar 22 |
nicklas |
147 |
ScriptBuilder script = new ScriptBuilder(); |
6629 |
07 Mar 22 |
nicklas |
148 |
script.cmd("#!/bin/bash"); |
6629 |
07 Mar 22 |
nicklas |
149 |
script.cmd(job.getCmd()); |
6629 |
07 Mar 22 |
nicklas |
150 |
return script.toString(); |
6629 |
07 Mar 22 |
nicklas |
151 |
} |
6629 |
07 Mar 22 |
nicklas |
152 |
|
6629 |
07 Mar 22 |
nicklas |
153 |
private UploadSource getScript(String name) |
6629 |
07 Mar 22 |
nicklas |
154 |
{ |
6629 |
07 Mar 22 |
nicklas |
155 |
return new InputStreamUploadSource(name, OpenGrid.class.getResourceAsStream("/net/sf/basedb/opengrid/engine/sge/"+name)); |
6629 |
07 Mar 22 |
nicklas |
156 |
} |
6629 |
07 Mar 22 |
nicklas |
157 |
|
6629 |
07 Mar 22 |
nicklas |
158 |
|
5984 |
10 Jul 20 |
nicklas |
// Holds result from qstat |
5984 |
10 Jul 20 |
nicklas |
160 |
private QstatXmlCmd qstatXml; |
5984 |
10 Jul 20 |
nicklas |
161 |
|
5984 |
10 Jul 20 |
nicklas |
162 |
|
5984 |
10 Jul 20 |
nicklas |
163 |
/** |
5984 |
10 Jul 20 |
nicklas |
Execute the 'qstat -xml' command, parse the XML and store the result |
5984 |
10 Jul 20 |
nicklas |
for future use with QstatCmd. |
5984 |
10 Jul 20 |
nicklas |
166 |
*/ |
5984 |
10 Jul 20 |
nicklas |
167 |
public static class QstatXmlCmd |
5984 |
10 Jul 20 |
nicklas |
168 |
extends CmdResult<Map<String, Element>> |
5984 |
10 Jul 20 |
nicklas |
169 |
{ |
5984 |
10 Jul 20 |
nicklas |
170 |
public QstatXmlCmd() |
5984 |
10 Jul 20 |
nicklas |
171 |
{ |
5984 |
10 Jul 20 |
nicklas |
172 |
super("qstat -xml"); |
5984 |
10 Jul 20 |
nicklas |
173 |
} |
5984 |
10 Jul 20 |
nicklas |
174 |
|
5984 |
10 Jul 20 |
nicklas |
175 |
@Override |
5984 |
10 Jul 20 |
nicklas |
176 |
protected void parseResult() |
5984 |
10 Jul 20 |
nicklas |
177 |
{ |
5984 |
10 Jul 20 |
nicklas |
178 |
if (getExitStatus() != 0) return; |
5984 |
10 Jul 20 |
nicklas |
179 |
|
5984 |
10 Jul 20 |
nicklas |
180 |
List<Element> jobList = null; |
5984 |
10 Jul 20 |
nicklas |
181 |
Map<String, Element> qstatCache = new HashMap<>(); |
5984 |
10 Jul 20 |
nicklas |
182 |
setResult(qstatCache); |
5984 |
10 Jul 20 |
nicklas |
183 |
try |
5984 |
10 Jul 20 |
nicklas |
184 |
{ |
5984 |
10 Jul 20 |
nicklas |
185 |
Document dom = XmlUtil2.getXml(getStdout(), null, false); |
5984 |
10 Jul 20 |
nicklas |
186 |
jobList = XPathFactory.instance().compile("//job_list", new ElementFilter()).evaluate(dom); |
5984 |
10 Jul 20 |
nicklas |
187 |
for (Element e : jobList) |
5984 |
10 Jul 20 |
nicklas |
188 |
{ |
5984 |
10 Jul 20 |
nicklas |
189 |
String clusterJobId = e.getChildTextTrim("JB_job_number"); |
5984 |
10 Jul 20 |
nicklas |
190 |
qstatCache.put(clusterJobId, e); |
5984 |
10 Jul 20 |
nicklas |
191 |
} |
5984 |
10 Jul 20 |
nicklas |
192 |
} |
5984 |
10 Jul 20 |
nicklas |
193 |
catch (Exception ex) |
5984 |
10 Jul 20 |
nicklas |
194 |
{ |
5984 |
10 Jul 20 |
nicklas |
195 |
setException(new RuntimeException("Could not parse 'qstat' output", ex)); |
5984 |
10 Jul 20 |
nicklas |
196 |
logger.error("Could not parse 'qstat' output", ex); |
5984 |
10 Jul 20 |
nicklas |
197 |
} |
5984 |
10 Jul 20 |
nicklas |
198 |
} |
5984 |
10 Jul 20 |
nicklas |
199 |
} |
5984 |
10 Jul 20 |
nicklas |
200 |
|
5984 |
10 Jul 20 |
nicklas |
201 |
/** |
5984 |
10 Jul 20 |
nicklas |
Get job status information from an existing QstatXmlCmd instance. |
5984 |
10 Jul 20 |
nicklas |
This will find the XML element for the job with the given id and |
5984 |
10 Jul 20 |
nicklas |
create the job status information from that. If the specified job |
5984 |
10 Jul 20 |
nicklas |
is not found it will return with exit code 1. |
5984 |
10 Jul 20 |
nicklas |
206 |
*/ |
5984 |
10 Jul 20 |
nicklas |
207 |
public static class QstatCmd |
5984 |
10 Jul 20 |
nicklas |
208 |
extends CmdResult<JobStatus> |
5984 |
10 Jul 20 |
nicklas |
209 |
{ |
5984 |
10 Jul 20 |
nicklas |
210 |
public QstatCmd(QstatXmlCmd qstatXml, JobIdentifier jobId, int timeAdjustment) |
5984 |
10 Jul 20 |
nicklas |
211 |
{ |
5984 |
10 Jul 20 |
nicklas |
212 |
super(qstatXml); |
5984 |
10 Jul 20 |
nicklas |
213 |
|
5984 |
10 Jul 20 |
nicklas |
214 |
if (qstatXml.getExitStatus() == 0) |
5984 |
10 Jul 20 |
nicklas |
215 |
{ |
5984 |
10 Jul 20 |
nicklas |
216 |
Element e = qstatXml.getResult().get(jobId.getClusterJobId()); |
5984 |
10 Jul 20 |
nicklas |
217 |
if (e != null) |
5984 |
10 Jul 20 |
nicklas |
218 |
{ |
5984 |
10 Jul 20 |
nicklas |
219 |
OpenGridJobStatus jobStatus = new OpenGridJobStatus(jobId); |
5984 |
10 Jul 20 |
nicklas |
220 |
jobStatus.readFromQstatXml(e, timeAdjustment); |
5984 |
10 Jul 20 |
nicklas |
221 |
setResult(jobStatus); |
5984 |
10 Jul 20 |
nicklas |
222 |
} |
5984 |
10 Jul 20 |
nicklas |
223 |
else |
5984 |
10 Jul 20 |
nicklas |
224 |
{ |
5984 |
10 Jul 20 |
nicklas |
// Faking qstat result with a 'not found' (exit status=1) |
5984 |
10 Jul 20 |
nicklas |
226 |
setExitStatus(1); |
5984 |
10 Jul 20 |
nicklas |
227 |
} |
5984 |
10 Jul 20 |
nicklas |
228 |
} |
5984 |
10 Jul 20 |
nicklas |
229 |
} |
5984 |
10 Jul 20 |
nicklas |
230 |
} |
5984 |
10 Jul 20 |
nicklas |
231 |
|
5984 |
10 Jul 20 |
nicklas |
232 |
/** |
5984 |
10 Jul 20 |
nicklas |
Implements the 'qacct' command for getting information about a completed job. |
5984 |
10 Jul 20 |
nicklas |
234 |
*/ |
5984 |
10 Jul 20 |
nicklas |
235 |
public static class QacctCmd |
5984 |
10 Jul 20 |
nicklas |
236 |
extends CmdResult<JobStatus> |
5984 |
10 Jul 20 |
nicklas |
237 |
{ |
5984 |
10 Jul 20 |
nicklas |
238 |
|
5984 |
10 Jul 20 |
nicklas |
239 |
private final JobIdentifier jobId; |
5984 |
10 Jul 20 |
nicklas |
240 |
private final int timeAdjustment; |
5984 |
10 Jul 20 |
nicklas |
241 |
public QacctCmd(JobIdentifier jobId, int timeAdjustment) |
5984 |
10 Jul 20 |
nicklas |
242 |
{ |
5984 |
10 Jul 20 |
nicklas |
243 |
super("qacct -j " + jobId.getClusterJobId()); |
5984 |
10 Jul 20 |
nicklas |
244 |
this.jobId = jobId; |
5984 |
10 Jul 20 |
nicklas |
245 |
this.timeAdjustment = timeAdjustment; |
5984 |
10 Jul 20 |
nicklas |
246 |
} |
5984 |
10 Jul 20 |
nicklas |
247 |
|
5984 |
10 Jul 20 |
nicklas |
248 |
@Override |
5984 |
10 Jul 20 |
nicklas |
249 |
protected void parseResult() |
5984 |
10 Jul 20 |
nicklas |
250 |
{ |
5984 |
10 Jul 20 |
nicklas |
251 |
if (getExitStatus() != 0) return; |
5984 |
10 Jul 20 |
nicklas |
252 |
|
5984 |
10 Jul 20 |
nicklas |
253 |
if (logger.isDebugEnabled()) |
5984 |
10 Jul 20 |
nicklas |
254 |
{ |
5984 |
10 Jul 20 |
nicklas |
255 |
logger.debug("Got 'qacct' information for job: " + jobId); |
5984 |
10 Jul 20 |
nicklas |
256 |
} |
5984 |
10 Jul 20 |
nicklas |
257 |
try |
5984 |
10 Jul 20 |
nicklas |
258 |
{ |
5984 |
10 Jul 20 |
nicklas |
259 |
OpenGridJobStatus status = new OpenGridJobStatus(jobId); |
5984 |
10 Jul 20 |
nicklas |
260 |
status.readFromQacct(getStdout(), timeAdjustment); |
5984 |
10 Jul 20 |
nicklas |
261 |
setResult(status); |
5984 |
10 Jul 20 |
nicklas |
262 |
} |
5984 |
10 Jul 20 |
nicklas |
263 |
catch (Exception ex) |
5984 |
10 Jul 20 |
nicklas |
264 |
{ |
5984 |
10 Jul 20 |
nicklas |
265 |
setException(new RuntimeException("Could not parse 'qacct' output for job: " + jobId, ex)); |
5984 |
10 Jul 20 |
nicklas |
266 |
logger.error("Could not parse 'qacct' output for job: " + jobId, ex); |
5984 |
10 Jul 20 |
nicklas |
267 |
} |
5984 |
10 Jul 20 |
nicklas |
268 |
} |
5984 |
10 Jul 20 |
nicklas |
269 |
} |
5984 |
10 Jul 20 |
nicklas |
270 |
|
5984 |
10 Jul 20 |
nicklas |
271 |
|
5984 |
10 Jul 20 |
nicklas |
272 |
/** |
5984 |
10 Jul 20 |
nicklas |
Job status information for Open Grid jobs. We need the subclass to |
5984 |
10 Jul 20 |
nicklas |
be able to parse and update the status via protected setter methods. |
5984 |
10 Jul 20 |
nicklas |
275 |
*/ |
5984 |
10 Jul 20 |
nicklas |
276 |
public static class OpenGridJobStatus |
5984 |
10 Jul 20 |
nicklas |
277 |
extends JobStatus |
5984 |
10 Jul 20 |
nicklas |
278 |
{ |
5984 |
10 Jul 20 |
nicklas |
279 |
public OpenGridJobStatus(JobIdentifier jobId) |
5984 |
10 Jul 20 |
nicklas |
280 |
{ |
5984 |
10 Jul 20 |
nicklas |
281 |
super(jobId); |
5984 |
10 Jul 20 |
nicklas |
282 |
} |
5984 |
10 Jul 20 |
nicklas |
283 |
|
5984 |
10 Jul 20 |
nicklas |
284 |
/** |
5984 |
10 Jul 20 |
nicklas |
Get information from XML element generated by 'qstat -xml'. |
5984 |
10 Jul 20 |
nicklas |
The qstat command is only able to give information about jobs |
5984 |
10 Jul 20 |
nicklas |
that are waiting in the queue or execution (or has failed to |
5984 |
10 Jul 20 |
nicklas |
start). |
5984 |
10 Jul 20 |
nicklas |
@param e The XML element containing information about a single job <job_list> |
5984 |
10 Jul 20 |
nicklas |
@param timeAdjustment Number of seconds to adjust the time values in the XML data |
5984 |
10 Jul 20 |
nicklas |
(used for getting the local time). A positive value will adjust the times |
5984 |
10 Jul 20 |
nicklas |
into the future. |
5984 |
10 Jul 20 |
nicklas |
293 |
*/ |
5984 |
10 Jul 20 |
nicklas |
294 |
void readFromQstatXml(Element e, int timeAdjustment) |
5984 |
10 Jul 20 |
nicklas |
295 |
{ |
5984 |
10 Jul 20 |
nicklas |
// Job name |
5984 |
10 Jul 20 |
nicklas |
297 |
setName(Values.getStringOrNull(e.getChildTextTrim("JB_name"))); |
5984 |
10 Jul 20 |
nicklas |
298 |
|
5984 |
10 Jul 20 |
nicklas |
// Submission time |
5984 |
10 Jul 20 |
nicklas |
300 |
String subTime = Values.getStringOrNull(e.getChildTextTrim("JB_submission_time")); |
5984 |
10 Jul 20 |
nicklas |
301 |
if (subTime != null) |
5984 |
10 Jul 20 |
nicklas |
302 |
{ |
5984 |
10 Jul 20 |
nicklas |
303 |
setSubmissionTime(QSTAT_DATE.parseString(subTime).getTime() + timeAdjustment * 1000); |
5984 |
10 Jul 20 |
nicklas |
304 |
} |
5984 |
10 Jul 20 |
nicklas |
305 |
|
5984 |
10 Jul 20 |
nicklas |
// Start time |
5984 |
10 Jul 20 |
nicklas |
307 |
String sTime = Values.getStringOrNull(e.getChildTextTrim("JAT_start_time")); |
5984 |
10 Jul 20 |
nicklas |
308 |
if (sTime != null) |
5984 |
10 Jul 20 |
nicklas |
309 |
{ |
5984 |
10 Jul 20 |
nicklas |
310 |
setStartTime(QSTAT_DATE.parseString(sTime).getTime() + timeAdjustment * 1000); |
5984 |
10 Jul 20 |
nicklas |
311 |
} |
5984 |
10 Jul 20 |
nicklas |
312 |
|
5984 |
10 Jul 20 |
nicklas |
313 |
setNodeName(Values.getStringOrNull(e.getChildTextTrim("queue_name"))); |
5984 |
10 Jul 20 |
nicklas |
314 |
|
5984 |
10 Jul 20 |
nicklas |
// State of the job |
5984 |
10 Jul 20 |
nicklas |
316 |
String state = Values.getStringOrNull(e.getChildTextTrim("state")); |
5984 |
10 Jul 20 |
nicklas |
317 |
if (state != null) |
5984 |
10 Jul 20 |
nicklas |
318 |
{ |
5984 |
10 Jul 20 |
nicklas |
319 |
if ("qw".equals(state) || "t".equals(state)) |
5984 |
10 Jul 20 |
nicklas |
320 |
{ |
5984 |
10 Jul 20 |
nicklas |
321 |
setStatus(Job.Status.WAITING); |
5984 |
10 Jul 20 |
nicklas |
322 |
} |
5984 |
10 Jul 20 |
nicklas |
323 |
else if ("r".equals(state)) |
5984 |
10 Jul 20 |
nicklas |
324 |
{ |
5984 |
10 Jul 20 |
nicklas |
325 |
setStatus(Job.Status.EXECUTING); |
5984 |
10 Jul 20 |
nicklas |
326 |
} |
5984 |
10 Jul 20 |
nicklas |
327 |
else if (state.contains("E")) |
5984 |
10 Jul 20 |
nicklas |
328 |
{ |
5984 |
10 Jul 20 |
nicklas |
329 |
setStatus(Job.Status.ERROR); |
5984 |
10 Jul 20 |
nicklas |
330 |
} |
5984 |
10 Jul 20 |
nicklas |
331 |
} |
5984 |
10 Jul 20 |
nicklas |
332 |
} |
5984 |
10 Jul 20 |
nicklas |
333 |
|
5984 |
10 Jul 20 |
nicklas |
334 |
/** |
5984 |
10 Jul 20 |
nicklas |
Get information from output from 'qacct -j {job-id}'. |
5984 |
10 Jul 20 |
nicklas |
336 |
*/ |
5984 |
10 Jul 20 |
nicklas |
337 |
void readFromQacct(String text, int timeAdjustment) |
5984 |
10 Jul 20 |
nicklas |
338 |
{ |
5984 |
10 Jul 20 |
nicklas |
339 |
String[] lines = text.split("\n"); |
5984 |
10 Jul 20 |
nicklas |
340 |
Pattern p = Pattern.compile("\\s+"); |
5984 |
10 Jul 20 |
nicklas |
341 |
String queueName = null; |
5984 |
10 Jul 20 |
nicklas |
342 |
String hostName = null; |
5984 |
10 Jul 20 |
nicklas |
343 |
|
5984 |
10 Jul 20 |
nicklas |
344 |
for (String line : lines) |
5984 |
10 Jul 20 |
nicklas |
345 |
{ |
5984 |
10 Jul 20 |
nicklas |
346 |
String[] kv = p.split(line, 2); |
5984 |
10 Jul 20 |
nicklas |
347 |
if (kv.length == 2) |
5984 |
10 Jul 20 |
nicklas |
348 |
{ |
5984 |
10 Jul 20 |
nicklas |
349 |
String key = kv[0]; |
5984 |
10 Jul 20 |
nicklas |
350 |
String value = kv[1].trim(); |
5984 |
10 Jul 20 |
nicklas |
351 |
|
5984 |
10 Jul 20 |
nicklas |
352 |
if ("jobname".equals(key)) |
5984 |
10 Jul 20 |
nicklas |
353 |
{ |
5984 |
10 Jul 20 |
nicklas |
354 |
setName(value); |
5984 |
10 Jul 20 |
nicklas |
355 |
} |
5984 |
10 Jul 20 |
nicklas |
356 |
else if ("qsub_time".equals(key)) |
5984 |
10 Jul 20 |
nicklas |
357 |
{ |
5984 |
10 Jul 20 |
nicklas |
358 |
setSubmissionTime(QACCT_DATE.parseString(value).getTime() + timeAdjustment * 1000); |
5984 |
10 Jul 20 |
nicklas |
359 |
} |
5984 |
10 Jul 20 |
nicklas |
360 |
else if ("start_time".equals(key)) |
5984 |
10 Jul 20 |
nicklas |
361 |
{ |
5984 |
10 Jul 20 |
nicklas |
362 |
setStartTime(QACCT_DATE.parseString(value).getTime() + timeAdjustment * 1000); |
5984 |
10 Jul 20 |
nicklas |
363 |
} |
5984 |
10 Jul 20 |
nicklas |
364 |
else if ("end_time".equals(key)) |
5984 |
10 Jul 20 |
nicklas |
365 |
{ |
5984 |
10 Jul 20 |
nicklas |
366 |
setEndTime(QACCT_DATE.parseString(value).getTime() + timeAdjustment * 1000); |
5984 |
10 Jul 20 |
nicklas |
367 |
} |
5984 |
10 Jul 20 |
nicklas |
// 'nodeName' is built from qname+hostname but we don't know which is first... |
5984 |
10 Jul 20 |
nicklas |
369 |
else if ("qname".equals(key)) |
5984 |
10 Jul 20 |
nicklas |
370 |
{ |
5984 |
10 Jul 20 |
nicklas |
371 |
queueName = value; |
5984 |
10 Jul 20 |
nicklas |
372 |
setNodeName(queueName + "@" + hostName); |
5984 |
10 Jul 20 |
nicklas |
373 |
} |
5984 |
10 Jul 20 |
nicklas |
374 |
else if ("hostname".equals(key)) |
5984 |
10 Jul 20 |
nicklas |
375 |
{ |
5984 |
10 Jul 20 |
nicklas |
376 |
hostName = value; |
5984 |
10 Jul 20 |
nicklas |
377 |
setNodeName(queueName + "@" + hostName); |
5984 |
10 Jul 20 |
nicklas |
378 |
} |
5984 |
10 Jul 20 |
nicklas |
379 |
else if ("exit_status".equals(key)) |
5984 |
10 Jul 20 |
nicklas |
380 |
{ |
5984 |
10 Jul 20 |
nicklas |
381 |
setExitCode(Values.getInt(value)); |
5984 |
10 Jul 20 |
nicklas |
382 |
setStatus(getExitCode() == 0 ? Job.Status.DONE : Job.Status.ERROR); |
5984 |
10 Jul 20 |
nicklas |
383 |
} |
5984 |
10 Jul 20 |
nicklas |
384 |
} |
5984 |
10 Jul 20 |
nicklas |
385 |
} |
5984 |
10 Jul 20 |
nicklas |
386 |
} |
5984 |
10 Jul 20 |
nicklas |
387 |
|
5984 |
10 Jul 20 |
nicklas |
388 |
} |
5981 |
07 Jul 20 |
nicklas |
389 |
} |