cocluto/ClusterController/SunGridEngine.py

48 lines
1.8 KiB
Python

import Util
from QstatParser import *
class SunGridEngine:
def getCurrentJobsState( self ):
bBUG_00000009_IS_STILL_ALIVE = True
if bBUG_00000009_IS_STILL_ALIVE:
logDebug('Querying the current state of jobs')
returnCode = -1
delayBetweenAttemps = 5 # in seconds
while returnCode != 0:
command = ['qstat', '-f', '-u', '*']
(returnCode, qstatOutput, stderr) = executeProgram( command )
if returnCode != 0:
logWarning('command "%s" failed (returnCode = %d, stdout="%s", stderr="%s"). Retrying in %d seconds' % (' '.join(command), returnCode, qstatOutput, stderr, delayBetweenAttemps))
time.sleep(delayBetweenAttemps)
if bBUG_00000009_IS_STILL_ALIVE:
logDebug('Just got current state of jobs')
jobsState = QstatParser().parseQstatOutput( qstatOutput )
jobsState.setTime( time.time() )
# read the requirements for pending jobs (which parallel environment, which queue, which architecture) from sge
if False: # no need for job details at the moment and since it's very slow, it's been disabled
for jobId, job in jobsState.getPendingJobs().iteritems():
(returnCode, stdout, stderr) = executeProgram( ['qstat', '-j', job.getId().asStr()] )
assert returnCode != 0, 'prout'
QstatParser().parseJobDetails( stdout, job )
return jobsState
def setQueueInstanceActivation( self, strQueueInstanceName, bEnable ):
argument = 'd'
if bEnable:
argument = 'e'
errorCode, stdout, stderr = executeProgram(['qmod', '-'+argument, strQueueInstanceName])
return (errorCode == 0)
def queueIsEmpty( self, strMachineName ):
(returnCode, qstatOutput, stderr) = executeProgram( ['qstat', '-f', '-u', '*'] )
assert( returnCode == 0 )
jobsState = QstatParser().parseQstatOutput( qstatOutput )
jobs = jobsState.getJobsOnMachine( strMachineName )
return (len(jobs) == 0)