2011-10-07 17:43:45 +02:00
import Util
from QstatParser import *
class SunGridEngine :
def getCurrentJobsState ( self ) :
bBUG_00000009_IS_STILL_ALIVE = True
if bBUG_00000009_IS_STILL_ALIVE :
logDebug ( ' Querying the current state of jobs ' )
returnCode = - 1
delayBetweenAttemps = 5 # in seconds
while returnCode != 0 :
command = [ ' qstat ' , ' -f ' , ' -u ' , ' * ' ]
( returnCode , qstatOutput , stderr ) = executeProgram ( command )
if returnCode != 0 :
logWarning ( ' command " %s " failed (returnCode = %d , stdout= " %s " , stderr= " %s " ). Retrying in %d seconds ' % ( ' ' . join ( command ) , returnCode , qstatOutput , stderr , delayBetweenAttemps ) )
time . sleep ( delayBetweenAttemps )
if bBUG_00000009_IS_STILL_ALIVE :
logDebug ( ' Just got current state of jobs ' )
jobsState = QstatParser ( ) . parseQstatOutput ( qstatOutput )
jobsState . setTime ( time . time ( ) )
# read the requirements for pending jobs (which parallel environment, which queue, which architecture) from sge
if False : # no need for job details at the moment and since it's very slow, it's been disabled
2012-09-17 10:37:04 +02:00
for unused_jobId , job in jobsState . getPendingJobs ( ) . iteritems ( ) :
2011-10-07 17:43:45 +02:00
( returnCode , stdout , stderr ) = executeProgram ( [ ' qstat ' , ' -j ' , job . getId ( ) . asStr ( ) ] )
2011-10-07 17:51:20 +02:00
assert returnCode != 0 , ' prout '
2011-10-07 17:43:45 +02:00
QstatParser ( ) . parseJobDetails ( stdout , job )
return jobsState
def setQueueInstanceActivation ( self , strQueueInstanceName , bEnable ) :
argument = ' d '
if bEnable :
argument = ' e '
2012-09-17 10:37:04 +02:00
bBUG_00000269_IS_STILL_ALIVE = True # for some reason, qmod -d (and maybe any sge command) could fail with error: commlib error: can't connect to service (Address already in use)
delayBetweenAttemps = 5 # in seconds
while True :
errorCode , unused_stdout , unused_stderr = executeProgram ( [ ' qmod ' , ' - ' + argument , strQueueInstanceName ] )
if bBUG_00000269_IS_STILL_ALIVE :
# if the command failed, try again
if errorCode == 0 :
break
time . sleep ( delayBetweenAttemps )
else :
break
2011-10-07 17:43:45 +02:00
return ( errorCode == 0 )
def queueIsEmpty ( self , strMachineName ) :
2012-09-17 10:37:04 +02:00
( returnCode , qstatOutput , unused_stderr ) = executeProgram ( [ ' qstat ' , ' -f ' , ' -u ' , ' * ' ] )
2011-10-07 17:43:45 +02:00
assert ( returnCode == 0 )
jobsState = QstatParser ( ) . parseQstatOutput ( qstatOutput )
jobs = jobsState . getJobsOnMachine ( strMachineName )
return ( len ( jobs ) == 0 )