58 lines
2.2 KiB
Python
58 lines
2.2 KiB
Python
import Util
|
|
from QstatParser import *
|
|
|
|
class SunGridEngine:
|
|
|
|
def getCurrentJobsState( self ):
|
|
bBUG_00000009_IS_STILL_ALIVE = True
|
|
if bBUG_00000009_IS_STILL_ALIVE:
|
|
logDebug('Querying the current state of jobs')
|
|
returnCode = -1
|
|
delayBetweenAttemps = 5 # in seconds
|
|
while returnCode != 0:
|
|
command = ['qstat', '-f', '-u', '*']
|
|
(returnCode, qstatOutput, stderr) = executeProgram( command )
|
|
if returnCode != 0:
|
|
logWarning('command "%s" failed (returnCode = %d, stdout="%s", stderr="%s"). Retrying in %d seconds' % (' '.join(command), returnCode, qstatOutput, stderr, delayBetweenAttemps))
|
|
time.sleep(delayBetweenAttemps)
|
|
if bBUG_00000009_IS_STILL_ALIVE:
|
|
logDebug('Just got current state of jobs')
|
|
|
|
jobsState = QstatParser().parseQstatOutput( qstatOutput )
|
|
jobsState.setTime( time.time() )
|
|
|
|
|
|
# read the requirements for pending jobs (which parallel environment, which queue, which architecture) from sge
|
|
if False: # no need for job details at the moment and since it's very slow, it's been disabled
|
|
for unused_jobId, job in jobsState.getPendingJobs().iteritems():
|
|
(returnCode, stdout, stderr) = executeProgram( ['qstat', '-j', job.getId().asStr()] )
|
|
assert returnCode != 0, 'prout'
|
|
QstatParser().parseJobDetails( stdout, job )
|
|
|
|
return jobsState
|
|
|
|
def setQueueInstanceActivation( self, strQueueInstanceName, bEnable ):
|
|
argument = 'd'
|
|
if bEnable:
|
|
argument = 'e'
|
|
bBUG_00000269_IS_STILL_ALIVE = True # for some reason, qmod -d (and maybe any sge command) could fail with error: commlib error: can't connect to service (Address already in use)
|
|
delayBetweenAttemps = 5 # in seconds
|
|
while True:
|
|
errorCode, unused_stdout, unused_stderr = executeProgram(['qmod', '-'+argument, strQueueInstanceName])
|
|
if bBUG_00000269_IS_STILL_ALIVE:
|
|
# if the command failed, try again
|
|
if errorCode == 0:
|
|
break
|
|
time.sleep(delayBetweenAttemps)
|
|
else:
|
|
break
|
|
return (errorCode == 0)
|
|
|
|
def queueIsEmpty( self, strMachineName ):
|
|
(returnCode, qstatOutput, unused_stderr) = executeProgram( ['qstat', '-f', '-u', '*'] )
|
|
assert( returnCode == 0 )
|
|
jobsState = QstatParser().parseQstatOutput( qstatOutput )
|
|
jobs = jobsState.getJobsOnMachine( strMachineName )
|
|
return (len(jobs) == 0)
|
|
|
|
|