la page cluster status affiche désormais les états disabled et error des queues

This commit is contained in:
Guillaume Raffy 2012-05-29 16:31:01 +00:00
parent fa2dc0cd67
commit 006d8752c9
2 changed files with 35 additions and 3 deletions

View File

@ -28,6 +28,21 @@ class QstatParser:
else: else:
assert False, 'unhandled job state flag :"' + c + '"' assert False, 'unhandled job state flag :"' + c + '"'
return jobState return jobState
def parseQueueMachineState( self, strQueueMachineStatus ):
queueMachineState = 0
for i in range(0, len(strQueueMachineStatus) ):
c = strQueueMachineStatus[i]
if c == 'd':
queueMachineState += QueueMachineStateFlags.DISABLED
elif c == 'a':
queueMachineState += QueueMachineStateFlags.ALARM
elif c == 'u':
queueMachineState += QueueMachineStateFlags.UNKNOWN
elif c == 'E':
queueMachineState += QueueMachineStateFlags.ERROR
else:
assert False, 'unhandled queue machine state flag :"' + c + '"'
return queueMachineState
def parseQstatOutput( self, qstatOutput ): def parseQstatOutput( self, qstatOutput ):
jobsState = JobsState() jobsState = JobsState()
f = StringIO.StringIO(qstatOutput) f = StringIO.StringIO(qstatOutput)
@ -45,7 +60,7 @@ class QstatParser:
jobRegularExp = re.compile( '^[ ]*(?P<jobId>[^ ]+)[ ]+[0-9.]+[ ]+(?P<jobScriptName>[^ ]+)[ ]+(?P<jobOwner>[^ ]+)[ ]+(?P<jobStatus>[^ ]+)[ ]+(?P<jobStartOrSubmitTime>[0-9][0-9]/[0-9][0-9]/[0-9][0-9][0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9])[ ]+(?P<numSlots>[0-9]+)[ ]+(?P<jobArrayDetails>[^\n]*)[\s]*$' ) jobRegularExp = re.compile( '^[ ]*(?P<jobId>[^ ]+)[ ]+[0-9.]+[ ]+(?P<jobScriptName>[^ ]+)[ ]+(?P<jobOwner>[^ ]+)[ ]+(?P<jobStatus>[^ ]+)[ ]+(?P<jobStartOrSubmitTime>[0-9][0-9]/[0-9][0-9]/[0-9][0-9][0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9])[ ]+(?P<numSlots>[0-9]+)[ ]+(?P<jobArrayDetails>[^\n]*)[\s]*$' )
# example of machine line : # example of machine line :
# allintel.q@simpatix34.univ-ren BIP 0/6/8 6.00 darwin-x86 # allintel.q@simpatix34.univ-ren BIP 0/6/8 6.00 darwin-x86
machineRegularExp = re.compile( '^(?P<queueName>[^@]+)@(?P<machineName>[^.]+)[^ ]+[ ]+(?P<queueTypeString>[^ ]+)[ ]+(?P<numReservedSlots>[^/]+)/(?P<numUsedSlots>[^/]+)/(?P<numTotalSlots>[^ ]+)[ ]+(?P<cpuLoad>[^ ]+)[?]*' ) machineRegularExp = re.compile( '^(?P<queueName>[^@]+)@(?P<machineName>[^.]+)[^ ]+[ ]+(?P<queueTypeString>[^ ]+)[ ]+(?P<numReservedSlots>[^/]+)/(?P<numUsedSlots>[^/]+)/(?P<numTotalSlots>[^ ]+)[ ]+(?P<cpuLoad>[^ ]+)[\s]+(?P<archName>[^ ]+)[\s]+(?P<queueMachineStatus>[^\s]*)' )
pendingJobsHeaderRegularExp = re.compile( '^ - PENDING JOBS - PENDING JOBS - PENDING JOBS - PENDING JOBS - PENDING JOBS[?]*' ) pendingJobsHeaderRegularExp = re.compile( '^ - PENDING JOBS - PENDING JOBS - PENDING JOBS - PENDING JOBS - PENDING JOBS[?]*' )
while( len(line) > 0 ): while( len(line) > 0 ):
# print line # print line
@ -137,7 +152,8 @@ class QstatParser:
if strCpuLoad != '-NA-': if strCpuLoad != '-NA-':
queueMachine.setCpuLoad( float(strCpuLoad) ) queueMachine.setCpuLoad( float(strCpuLoad) )
strQueueMachineState = matchObj.group('queueMachineStatus')
queueMachine.setState( self.parseQueueMachineState( strQueueMachineState ) )
#log('QstatParser::parseQstatOutput : queueName = "'+matchObj.group('queueName')+'"') #log('QstatParser::parseQstatOutput : queueName = "'+matchObj.group('queueName')+'"')
#log('QstatParser::parseQstatOutput : machineName = "'+matchObj.group('machineName')+'"') #log('QstatParser::parseQstatOutput : machineName = "'+matchObj.group('machineName')+'"')
currentQueueMachine = queueMachine currentQueueMachine = queueMachine

View File

@ -1,4 +1,10 @@
class QueueMachineStateFlags: #
DISABLED=1 # the queue machine is disabled
ALARM=2 # the queue machine is in alarm state (see man qstat)
UNKNOWN=4 # the queue machine is in unknown state because sge_execd cannot be contected (see man qstat)
ERROR=8 # the queue is in error state
class QueueMachine: class QueueMachine:
""" """
a QueueMachine instance represents a given SGE queue on a given machine (eg allintel.q@simpatix10) a QueueMachine instance represents a given SGE queue on a given machine (eg allintel.q@simpatix10)
@ -36,4 +42,14 @@ class QueueMachine:
def getCpuLoad( self ): def getCpuLoad( self ):
assert( self.m_fCpuLoad != None ) assert( self.m_fCpuLoad != None )
return self.m_fCpuLoad return self.m_fCpuLoad
def setState( self, state ):
self.m_stateFlags = state
def isDisabled( self ):
return self.m_stateFlags & QueueMachineStateFlags.DISABLED
def isInErrorState( self ):
return self.m_stateFlags & QueueMachineStateFlags.ERROR
"""
def getStateAsString( self ):
assert( self.m_strState != None )
return self.m_strState
"""