contournement du bug 269
This commit is contained in:
parent
ef09dd6fec
commit
862f45ec7a
|
@ -24,7 +24,7 @@ class SunGridEngine:
|
|||
|
||||
# read the requirements for pending jobs (which parallel environment, which queue, which architecture) from sge
|
||||
if False: # no need for job details at the moment and since it's very slow, it's been disabled
|
||||
for jobId, job in jobsState.getPendingJobs().iteritems():
|
||||
for unused_jobId, job in jobsState.getPendingJobs().iteritems():
|
||||
(returnCode, stdout, stderr) = executeProgram( ['qstat', '-j', job.getId().asStr()] )
|
||||
assert returnCode != 0, 'prout'
|
||||
QstatParser().parseJobDetails( stdout, job )
|
||||
|
@ -35,11 +35,21 @@ class SunGridEngine:
|
|||
argument = 'd'
|
||||
if bEnable:
|
||||
argument = 'e'
|
||||
errorCode, stdout, stderr = executeProgram(['qmod', '-'+argument, strQueueInstanceName])
|
||||
bBUG_00000269_IS_STILL_ALIVE = True # for some reason, qmod -d (and maybe any sge command) could fail with error: commlib error: can't connect to service (Address already in use)
|
||||
delayBetweenAttemps = 5 # in seconds
|
||||
while True:
|
||||
errorCode, unused_stdout, unused_stderr = executeProgram(['qmod', '-'+argument, strQueueInstanceName])
|
||||
if bBUG_00000269_IS_STILL_ALIVE:
|
||||
# if the command failed, try again
|
||||
if errorCode == 0:
|
||||
break
|
||||
time.sleep(delayBetweenAttemps)
|
||||
else:
|
||||
break
|
||||
return (errorCode == 0)
|
||||
|
||||
def queueIsEmpty( self, strMachineName ):
|
||||
(returnCode, qstatOutput, stderr) = executeProgram( ['qstat', '-f', '-u', '*'] )
|
||||
(returnCode, qstatOutput, unused_stderr) = executeProgram( ['qstat', '-f', '-u', '*'] )
|
||||
assert( returnCode == 0 )
|
||||
jobsState = QstatParser().parseQstatOutput( qstatOutput )
|
||||
jobs = jobsState.getJobsOnMachine( strMachineName )
|
||||
|
|
Loading…
Reference in New Issue