ajout de l'affichage des queues (on peut désormais voir sur quelle queue les jobs tournent). Cela a nécéssité des modifs dans les librairies cluster controller mais certaines modifications que je commite sont plus anciennes (je ne sais plus quand ça a été fait, ni dans quel but)
This commit is contained in:
parent
39616dc73c
commit
fa2dc0cd67
|
@ -4,6 +4,9 @@ class JobStateFlags:
|
||||||
WAITING=2 # the job is waiting
|
WAITING=2 # the job is waiting
|
||||||
QUEUED=4 # not sure what that exactly means but it reflects the q state of jobs as seen in the pending jobs list from qstat -f -u \*
|
QUEUED=4 # not sure what that exactly means but it reflects the q state of jobs as seen in the pending jobs list from qstat -f -u \*
|
||||||
TRANSFERING=8
|
TRANSFERING=8
|
||||||
|
DELETED=16
|
||||||
|
HOLD=32
|
||||||
|
ERROR=64
|
||||||
|
|
||||||
class ParallelEnvironment:
|
class ParallelEnvironment:
|
||||||
MPI=1
|
MPI=1
|
||||||
|
@ -88,9 +91,13 @@ class Job:
|
||||||
if self.m_scriptName:
|
if self.m_scriptName:
|
||||||
assert( self.m_scriptName == jobScriptName )
|
assert( self.m_scriptName == jobScriptName )
|
||||||
self.m_scriptName = jobScriptName
|
self.m_scriptName = jobScriptName
|
||||||
def addSlots( self, machineName, numSlots ):
|
def addSlots( self, queueMachineName, numSlots ):
|
||||||
assert( self.m_slots.get( machineName ) == None )
|
assert( self.m_slots.get( queueMachineName ) == None )
|
||||||
self.m_slots[ machineName ] = numSlots
|
if self.m_slots.get( queueMachineName ) == None:
|
||||||
|
self.m_slots[ queueMachineName ] = numSlots
|
||||||
|
else:
|
||||||
|
# should never happen
|
||||||
|
self.m_slots[ queueMachineName ] += numSlots
|
||||||
def getSlots( self ):
|
def getSlots( self ):
|
||||||
return self.m_slots
|
return self.m_slots
|
||||||
def setNumRequiredSlots( self, numSlots ):
|
def setNumRequiredSlots( self, numSlots ):
|
||||||
|
|
|
@ -19,6 +19,12 @@ class QstatParser:
|
||||||
jobState += JobStateFlags.QUEUED
|
jobState += JobStateFlags.QUEUED
|
||||||
elif c == 't':
|
elif c == 't':
|
||||||
jobState += JobStateFlags.TRANSFERING
|
jobState += JobStateFlags.TRANSFERING
|
||||||
|
elif c == 'd':
|
||||||
|
jobState += JobStateFlags.DELETED
|
||||||
|
elif c == 'h':
|
||||||
|
jobState += JobStateFlags.HOLD
|
||||||
|
elif c == 'E':
|
||||||
|
jobState += JobStateFlags.ERROR
|
||||||
else:
|
else:
|
||||||
assert False, 'unhandled job state flag :"' + c + '"'
|
assert False, 'unhandled job state flag :"' + c + '"'
|
||||||
return jobState
|
return jobState
|
||||||
|
@ -39,7 +45,7 @@ class QstatParser:
|
||||||
jobRegularExp = re.compile( '^[ ]*(?P<jobId>[^ ]+)[ ]+[0-9.]+[ ]+(?P<jobScriptName>[^ ]+)[ ]+(?P<jobOwner>[^ ]+)[ ]+(?P<jobStatus>[^ ]+)[ ]+(?P<jobStartOrSubmitTime>[0-9][0-9]/[0-9][0-9]/[0-9][0-9][0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9])[ ]+(?P<numSlots>[0-9]+)[ ]+(?P<jobArrayDetails>[^\n]*)[\s]*$' )
|
jobRegularExp = re.compile( '^[ ]*(?P<jobId>[^ ]+)[ ]+[0-9.]+[ ]+(?P<jobScriptName>[^ ]+)[ ]+(?P<jobOwner>[^ ]+)[ ]+(?P<jobStatus>[^ ]+)[ ]+(?P<jobStartOrSubmitTime>[0-9][0-9]/[0-9][0-9]/[0-9][0-9][0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9])[ ]+(?P<numSlots>[0-9]+)[ ]+(?P<jobArrayDetails>[^\n]*)[\s]*$' )
|
||||||
# example of machine line :
|
# example of machine line :
|
||||||
# allintel.q@simpatix34.univ-ren BIP 0/6/8 6.00 darwin-x86
|
# allintel.q@simpatix34.univ-ren BIP 0/6/8 6.00 darwin-x86
|
||||||
machineRegularExp = re.compile( '^(?P<queueName>[^@]+)@(?P<machineName>[^.]+)[^ ]+[ ]+(?P<queueTypeString>[^ ]+)[ ]+(?P<numReservedSlots>[^/]+)/(?P<numUsedSlots>[^/]+)/(?P<numTotalSlots>[^ ]+)[?]*' )
|
machineRegularExp = re.compile( '^(?P<queueName>[^@]+)@(?P<machineName>[^.]+)[^ ]+[ ]+(?P<queueTypeString>[^ ]+)[ ]+(?P<numReservedSlots>[^/]+)/(?P<numUsedSlots>[^/]+)/(?P<numTotalSlots>[^ ]+)[ ]+(?P<cpuLoad>[^ ]+)[?]*' )
|
||||||
pendingJobsHeaderRegularExp = re.compile( '^ - PENDING JOBS - PENDING JOBS - PENDING JOBS - PENDING JOBS - PENDING JOBS[?]*' )
|
pendingJobsHeaderRegularExp = re.compile( '^ - PENDING JOBS - PENDING JOBS - PENDING JOBS - PENDING JOBS - PENDING JOBS[?]*' )
|
||||||
while( len(line) > 0 ):
|
while( len(line) > 0 ):
|
||||||
# print line
|
# print line
|
||||||
|
@ -112,7 +118,7 @@ class QstatParser:
|
||||||
else:
|
else:
|
||||||
assert( not bInPendingJobsSection ) # if we are in the pending jobs section, the job should be new
|
assert( not bInPendingJobsSection ) # if we are in the pending jobs section, the job should be new
|
||||||
if not bInPendingJobsSection:
|
if not bInPendingJobsSection:
|
||||||
job.addSlots( currentQueueMachine.getMachineName(), int(matchObj.group('numSlots')) )
|
job.addSlots( currentQueueMachine.getName(), int(matchObj.group('numSlots')) )
|
||||||
else:
|
else:
|
||||||
# the current line does not describe a job
|
# the current line does not describe a job
|
||||||
if not bInPendingJobsSection:
|
if not bInPendingJobsSection:
|
||||||
|
@ -126,6 +132,11 @@ class QstatParser:
|
||||||
#log('matchObj.group(queueTypeString) :' + matchObj.group('queueTypeString'))
|
#log('matchObj.group(queueTypeString) :' + matchObj.group('queueTypeString'))
|
||||||
#log('matchObj.group(numTotalSlots) :' + matchObj.group('numTotalSlots'))
|
#log('matchObj.group(numTotalSlots) :' + matchObj.group('numTotalSlots'))
|
||||||
queueMachine.setNumSlots( int( matchObj.group('numTotalSlots') ) )
|
queueMachine.setNumSlots( int( matchObj.group('numTotalSlots') ) )
|
||||||
|
queueMachine.setNumUsedSlots( int( matchObj.group('numUsedSlots') ) )
|
||||||
|
strCpuLoad = matchObj.group('cpuLoad')
|
||||||
|
if strCpuLoad != '-NA-':
|
||||||
|
queueMachine.setCpuLoad( float(strCpuLoad) )
|
||||||
|
|
||||||
|
|
||||||
#log('QstatParser::parseQstatOutput : queueName = "'+matchObj.group('queueName')+'"')
|
#log('QstatParser::parseQstatOutput : queueName = "'+matchObj.group('queueName')+'"')
|
||||||
#log('QstatParser::parseQstatOutput : machineName = "'+matchObj.group('machineName')+'"')
|
#log('QstatParser::parseQstatOutput : machineName = "'+matchObj.group('machineName')+'"')
|
||||||
|
|
|
@ -7,6 +7,8 @@ class QueueMachine:
|
||||||
self.m_queueName = queueName
|
self.m_queueName = queueName
|
||||||
self.m_machineName = machineName
|
self.m_machineName = machineName
|
||||||
self.m_numSlots = None
|
self.m_numSlots = None
|
||||||
|
self.m_numUsedSlots = None
|
||||||
|
self.m_fCpuLoad = None
|
||||||
def getName( self ):
|
def getName( self ):
|
||||||
"""
|
"""
|
||||||
returns the name of the machine queue (such as allintel.q@simpatix10)
|
returns the name of the machine queue (such as allintel.q@simpatix10)
|
||||||
|
@ -19,7 +21,19 @@ class QueueMachine:
|
||||||
return self.m_machineName
|
return self.m_machineName
|
||||||
def setNumSlots( self, numSlots ):
|
def setNumSlots( self, numSlots ):
|
||||||
self.m_numSlots = numSlots
|
self.m_numSlots = numSlots
|
||||||
|
def setNumUsedSlots( self, numSlots ):
|
||||||
|
self.m_numUsedSlots = numSlots
|
||||||
def getNumSlots( self ):
|
def getNumSlots( self ):
|
||||||
assert( self.m_numSlots != None )
|
assert( self.m_numSlots != None )
|
||||||
return self.m_numSlots
|
return self.m_numSlots
|
||||||
|
def getNumUsedSlots( self ):
|
||||||
|
assert( self.m_numUsedSlots != None )
|
||||||
|
return self.m_numUsedSlots
|
||||||
|
def setCpuLoad( self, fCpuLoad ):
|
||||||
|
self.m_fCpuLoad = fCpuLoad
|
||||||
|
def cpuLoadIsAvailable( self ):
|
||||||
|
return self.m_fCpuLoad != None
|
||||||
|
def getCpuLoad( self ):
|
||||||
|
assert( self.m_fCpuLoad != None )
|
||||||
|
return self.m_fCpuLoad
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue