ajout de l'affichage des queues (on peut désormais voir sur quelle queue les jobs tournent). Cela a nécéssité des modifs dans les librairies cluster controller mais certaines modifications que je commite sont plus anciennes (je ne sais plus quand ça a été fait, ni dans quel but)

This commit is contained in:
Guillaume Raffy 2012-05-29 14:05:18 +00:00
parent 39616dc73c
commit fa2dc0cd67
3 changed files with 37 additions and 5 deletions

View File

@ -4,6 +4,9 @@ class JobStateFlags:
WAITING=2 # the job is waiting
QUEUED=4 # not sure what that exactly means but it reflects the q state of jobs as seen in the pending jobs list from qstat -f -u \*
TRANSFERING=8
DELETED=16
HOLD=32
ERROR=64
class ParallelEnvironment:
MPI=1
@ -88,9 +91,13 @@ class Job:
if self.m_scriptName:
assert( self.m_scriptName == jobScriptName )
self.m_scriptName = jobScriptName
def addSlots( self, machineName, numSlots ):
assert( self.m_slots.get( machineName ) == None )
self.m_slots[ machineName ] = numSlots
def addSlots( self, queueMachineName, numSlots ):
assert( self.m_slots.get( queueMachineName ) == None )
if self.m_slots.get( queueMachineName ) == None:
self.m_slots[ queueMachineName ] = numSlots
else:
# should never happen
self.m_slots[ queueMachineName ] += numSlots
def getSlots( self ):
return self.m_slots
def setNumRequiredSlots( self, numSlots ):

View File

@ -19,6 +19,12 @@ class QstatParser:
jobState += JobStateFlags.QUEUED
elif c == 't':
jobState += JobStateFlags.TRANSFERING
elif c == 'd':
jobState += JobStateFlags.DELETED
elif c == 'h':
jobState += JobStateFlags.HOLD
elif c == 'E':
jobState += JobStateFlags.ERROR
else:
assert False, 'unhandled job state flag :"' + c + '"'
return jobState
@ -39,7 +45,7 @@ class QstatParser:
jobRegularExp = re.compile( '^[ ]*(?P<jobId>[^ ]+)[ ]+[0-9.]+[ ]+(?P<jobScriptName>[^ ]+)[ ]+(?P<jobOwner>[^ ]+)[ ]+(?P<jobStatus>[^ ]+)[ ]+(?P<jobStartOrSubmitTime>[0-9][0-9]/[0-9][0-9]/[0-9][0-9][0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9])[ ]+(?P<numSlots>[0-9]+)[ ]+(?P<jobArrayDetails>[^\n]*)[\s]*$' )
# example of machine line :
# allintel.q@simpatix34.univ-ren BIP 0/6/8 6.00 darwin-x86
machineRegularExp = re.compile( '^(?P<queueName>[^@]+)@(?P<machineName>[^.]+)[^ ]+[ ]+(?P<queueTypeString>[^ ]+)[ ]+(?P<numReservedSlots>[^/]+)/(?P<numUsedSlots>[^/]+)/(?P<numTotalSlots>[^ ]+)[?]*' )
machineRegularExp = re.compile( '^(?P<queueName>[^@]+)@(?P<machineName>[^.]+)[^ ]+[ ]+(?P<queueTypeString>[^ ]+)[ ]+(?P<numReservedSlots>[^/]+)/(?P<numUsedSlots>[^/]+)/(?P<numTotalSlots>[^ ]+)[ ]+(?P<cpuLoad>[^ ]+)[?]*' )
pendingJobsHeaderRegularExp = re.compile( '^ - PENDING JOBS - PENDING JOBS - PENDING JOBS - PENDING JOBS - PENDING JOBS[?]*' )
while( len(line) > 0 ):
# print line
@ -112,7 +118,7 @@ class QstatParser:
else:
assert( not bInPendingJobsSection ) # if we are in the pending jobs section, the job should be new
if not bInPendingJobsSection:
job.addSlots( currentQueueMachine.getMachineName(), int(matchObj.group('numSlots')) )
job.addSlots( currentQueueMachine.getName(), int(matchObj.group('numSlots')) )
else:
# the current line does not describe a job
if not bInPendingJobsSection:
@ -126,6 +132,11 @@ class QstatParser:
#log('matchObj.group(queueTypeString) :' + matchObj.group('queueTypeString'))
#log('matchObj.group(numTotalSlots) :' + matchObj.group('numTotalSlots'))
queueMachine.setNumSlots( int( matchObj.group('numTotalSlots') ) )
queueMachine.setNumUsedSlots( int( matchObj.group('numUsedSlots') ) )
strCpuLoad = matchObj.group('cpuLoad')
if strCpuLoad != '-NA-':
queueMachine.setCpuLoad( float(strCpuLoad) )
#log('QstatParser::parseQstatOutput : queueName = "'+matchObj.group('queueName')+'"')
#log('QstatParser::parseQstatOutput : machineName = "'+matchObj.group('machineName')+'"')

View File

@ -7,6 +7,8 @@ class QueueMachine:
self.m_queueName = queueName
self.m_machineName = machineName
self.m_numSlots = None
self.m_numUsedSlots = None
self.m_fCpuLoad = None
def getName( self ):
"""
returns the name of the machine queue (such as allintel.q@simpatix10)
@ -19,7 +21,19 @@ class QueueMachine:
return self.m_machineName
def setNumSlots( self, numSlots ):
self.m_numSlots = numSlots
def setNumUsedSlots( self, numSlots ):
self.m_numUsedSlots = numSlots
def getNumSlots( self ):
assert( self.m_numSlots != None )
return self.m_numSlots
def getNumUsedSlots( self ):
assert( self.m_numUsedSlots != None )
return self.m_numUsedSlots
def setCpuLoad( self, fCpuLoad ):
self.m_fCpuLoad = fCpuLoad
def cpuLoadIsAvailable( self ):
return self.m_fCpuLoad != None
def getCpuLoad( self ):
assert( self.m_fCpuLoad != None )
return self.m_fCpuLoad