cocluto/ClusterController/JobsState.py

86 lines
2.8 KiB
Python
Raw Normal View History

from Log import *
class JobsState:
"""
represents a snapshot of the state of SGE jobs as seen by the SGE command "qstat -f -u \*"
"""
def __init__( self ):
self.m_jobs = {} # list of jobs
self.m_jobArrayJobs = {} # a dictionary of jobs for each job array, indexed by job array id
self.m_queueMachines = {} # list of queue machines such as allintel.q@simpatix10
self.m_stateTime = None # the time at which the state was snapshot
def deleteAllJobs( self ):
self.m_jobs = {}
self.m_jobArrayJobs = {}
def addJob( self, job ):
jobId = job.getId()
self.m_jobs[ jobId ] = job
if jobId.isJobArrayElement():
tasks = self.m_jobArrayJobs.get(jobId.m_iJobId)
if tasks == None:
tasks = {}
self.m_jobArrayJobs[ jobId.m_iJobId ] = tasks
tasks[jobId] = job
def getJob( self, jobId ):
return self.m_jobs.get( jobId )
def getJobArrayJobs( self, iJobArrayId ):
return self.m_jobArrayJobs.get( iJobArrayId )
def setTime( self, stateTime ):
self.m_stateTime = stateTime
def getTime( self ):
return self.m_stateTime
def getJobsOnMachine( self, machineName ):
jobsOnMachine = {}
for jobId, job in self.m_jobs.iteritems():
for queueMachineName, numSlots in job.getSlots().iteritems():
jobMachineName = queueMachineName.split('@')[1]
if jobMachineName == machineName:
jobsOnMachine[ jobId ] = job
return jobsOnMachine
def getNumFreeSlotsOnQueueMachine( self, queueMachine ):
#logInfo('getNumFreeSlotsOnQueueMachine : looking for free slots on queuemachine %s' % queueMachine.getName() )
numUsedSlots = 0
for job in self.m_jobs.itervalues():
numUsedSlotsByThisJob = job.getSlots().get( queueMachine.getName() )
if numUsedSlotsByThisJob != None:
#logInfo('getNumFreeSlotsOnQueueMachine : job %d uses %d slots' % (job.getId().asStr(), numUsedSlotsByThisJob) )
numUsedSlots += numUsedSlotsByThisJob
else:
None
#logInfo('getNumFreeSlotsOnQueueMachine : job %d uses no slot' % job.getId().asStr() )
numFreeSlots = queueMachine.getNumSlots() - numUsedSlots
assert( numFreeSlots >= 0 )
return numFreeSlots
def addQueueMachine( self, queueMachine ):
self.m_queueMachines[ queueMachine.getName() ] = queueMachine
def getQueueMachine( self, machineName ):
"""
finds the queue machine associated with a machine
"""
queueMachine = None
for qmName, qm in self.m_queueMachines.iteritems():
if qm.m_machineName == machineName:
assert( queueMachine == None ) # to be sure that no more than one queue machine is on a given machine
queueMachine = qm
return queueMachine
def getQueueMachines( self ):
return self.m_queueMachines
def getPendingJobs( self ):
pendingJobs = {}
for jobId, job in self.m_jobs.iteritems():
if job.isPending():
pendingJobs[ job.getId() ] = job
return pendingJobs