diff --git a/ClusterController/Job.py b/ClusterController/Job.py index 5efa861..6dd5cb6 100644 --- a/ClusterController/Job.py +++ b/ClusterController/Job.py @@ -4,6 +4,9 @@ class JobStateFlags: WAITING=2 # the job is waiting QUEUED=4 # not sure what that exactly means but it reflects the q state of jobs as seen in the pending jobs list from qstat -f -u \* TRANSFERING=8 + DELETED=16 + HOLD=32 + ERROR=64 class ParallelEnvironment: MPI=1 @@ -88,9 +91,13 @@ class Job: if self.m_scriptName: assert( self.m_scriptName == jobScriptName ) self.m_scriptName = jobScriptName - def addSlots( self, machineName, numSlots ): - assert( self.m_slots.get( machineName ) == None ) - self.m_slots[ machineName ] = numSlots + def addSlots( self, queueMachineName, numSlots ): + assert( self.m_slots.get( queueMachineName ) == None ) + if self.m_slots.get( queueMachineName ) == None: + self.m_slots[ queueMachineName ] = numSlots + else: + # should never happen + self.m_slots[ queueMachineName ] += numSlots def getSlots( self ): return self.m_slots def setNumRequiredSlots( self, numSlots ): diff --git a/ClusterController/QstatParser.py b/ClusterController/QstatParser.py index 4f82b95..7b29212 100644 --- a/ClusterController/QstatParser.py +++ b/ClusterController/QstatParser.py @@ -19,6 +19,12 @@ class QstatParser: jobState += JobStateFlags.QUEUED elif c == 't': jobState += JobStateFlags.TRANSFERING + elif c == 'd': + jobState += JobStateFlags.DELETED + elif c == 'h': + jobState += JobStateFlags.HOLD + elif c == 'E': + jobState += JobStateFlags.ERROR else: assert False, 'unhandled job state flag :"' + c + '"' return jobState @@ -39,7 +45,7 @@ class QstatParser: jobRegularExp = re.compile( '^[ ]*(?P[^ ]+)[ ]+[0-9.]+[ ]+(?P[^ ]+)[ ]+(?P[^ ]+)[ ]+(?P[^ ]+)[ ]+(?P[0-9][0-9]/[0-9][0-9]/[0-9][0-9][0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9])[ ]+(?P[0-9]+)[ ]+(?P[^\n]*)[\s]*$' ) # example of machine line : # allintel.q@simpatix34.univ-ren BIP 0/6/8 6.00 darwin-x86 - machineRegularExp = re.compile( '^(?P[^@]+)@(?P[^.]+)[^ ]+[ ]+(?P[^ ]+)[ ]+(?P[^/]+)/(?P[^/]+)/(?P[^ ]+)[?]*' ) + machineRegularExp = re.compile( '^(?P[^@]+)@(?P[^.]+)[^ ]+[ ]+(?P[^ ]+)[ ]+(?P[^/]+)/(?P[^/]+)/(?P[^ ]+)[ ]+(?P[^ ]+)[?]*' ) pendingJobsHeaderRegularExp = re.compile( '^ - PENDING JOBS - PENDING JOBS - PENDING JOBS - PENDING JOBS - PENDING JOBS[?]*' ) while( len(line) > 0 ): # print line @@ -112,7 +118,7 @@ class QstatParser: else: assert( not bInPendingJobsSection ) # if we are in the pending jobs section, the job should be new if not bInPendingJobsSection: - job.addSlots( currentQueueMachine.getMachineName(), int(matchObj.group('numSlots')) ) + job.addSlots( currentQueueMachine.getName(), int(matchObj.group('numSlots')) ) else: # the current line does not describe a job if not bInPendingJobsSection: @@ -126,6 +132,11 @@ class QstatParser: #log('matchObj.group(queueTypeString) :' + matchObj.group('queueTypeString')) #log('matchObj.group(numTotalSlots) :' + matchObj.group('numTotalSlots')) queueMachine.setNumSlots( int( matchObj.group('numTotalSlots') ) ) + queueMachine.setNumUsedSlots( int( matchObj.group('numUsedSlots') ) ) + strCpuLoad = matchObj.group('cpuLoad') + if strCpuLoad != '-NA-': + queueMachine.setCpuLoad( float(strCpuLoad) ) + #log('QstatParser::parseQstatOutput : queueName = "'+matchObj.group('queueName')+'"') #log('QstatParser::parseQstatOutput : machineName = "'+matchObj.group('machineName')+'"') diff --git a/ClusterController/QueueMachine.py b/ClusterController/QueueMachine.py index f62c819..72ac86e 100644 --- a/ClusterController/QueueMachine.py +++ b/ClusterController/QueueMachine.py @@ -7,6 +7,8 @@ class QueueMachine: self.m_queueName = queueName self.m_machineName = machineName self.m_numSlots = None + self.m_numUsedSlots = None + self.m_fCpuLoad = None def getName( self ): """ returns the name of the machine queue (such as allintel.q@simpatix10) @@ -19,7 +21,19 @@ class QueueMachine: return self.m_machineName def setNumSlots( self, numSlots ): self.m_numSlots = numSlots + def setNumUsedSlots( self, numSlots ): + self.m_numUsedSlots = numSlots def getNumSlots( self ): assert( self.m_numSlots != None ) return self.m_numSlots + def getNumUsedSlots( self ): + assert( self.m_numUsedSlots != None ) + return self.m_numUsedSlots + def setCpuLoad( self, fCpuLoad ): + self.m_fCpuLoad = fCpuLoad + def cpuLoadIsAvailable( self ): + return self.m_fCpuLoad != None + def getCpuLoad( self ): + assert( self.m_fCpuLoad != None ) + return self.m_fCpuLoad