2011-10-07 17:43:45 +02:00
import StringIO
import re
from JobsState import *
from QueueMachine import *
from Util import *
from Log import *
from Job import *
class QstatParser :
def parseJobState ( self , strJobStatus ) :
jobState = 0
for i in range ( 0 , len ( strJobStatus ) ) :
c = strJobStatus [ i ]
if c == ' r ' :
jobState + = JobStateFlags . RUNNING
elif c == ' w ' :
jobState + = JobStateFlags . WAITING
elif c == ' q ' :
jobState + = JobStateFlags . QUEUED
elif c == ' t ' :
jobState + = JobStateFlags . TRANSFERING
2012-05-29 16:05:18 +02:00
elif c == ' d ' :
jobState + = JobStateFlags . DELETED
elif c == ' h ' :
jobState + = JobStateFlags . HOLD
2016-05-03 14:19:24 +02:00
elif c == ' s ' :
jobState + = JobStateFlags . SUSPENDED
2012-05-29 16:05:18 +02:00
elif c == ' E ' :
jobState + = JobStateFlags . ERROR
2011-10-07 17:43:45 +02:00
else :
2011-10-07 17:51:20 +02:00
assert False , ' unhandled job state flag : " ' + c + ' " '
2011-10-07 17:43:45 +02:00
return jobState
2012-05-29 18:31:01 +02:00
def parseQueueMachineState ( self , strQueueMachineStatus ) :
queueMachineState = 0
for i in range ( 0 , len ( strQueueMachineStatus ) ) :
c = strQueueMachineStatus [ i ]
if c == ' d ' :
queueMachineState + = QueueMachineStateFlags . DISABLED
elif c == ' a ' :
queueMachineState + = QueueMachineStateFlags . ALARM
elif c == ' u ' :
queueMachineState + = QueueMachineStateFlags . UNKNOWN
elif c == ' E ' :
queueMachineState + = QueueMachineStateFlags . ERROR
2013-04-19 11:00:29 +02:00
elif c == ' o ' :
queueMachineState + = QueueMachineStateFlags . OBSOLETE
2016-05-03 14:19:24 +02:00
elif c == ' s ' :
queueMachineState + = QueueMachineStateFlags . SUSPENDED
2012-05-29 18:31:01 +02:00
else :
assert False , ' unhandled queue machine state flag : " ' + c + ' " '
return queueMachineState
2011-10-07 17:43:45 +02:00
def parseQstatOutput ( self , qstatOutput ) :
2019-11-14 13:08:01 +01:00
"""
parses result of command ' qstat -f -u \ * -pri '
"""
def parse_pending_tasks ( task_ranges_sequence ) :
"""
parses a job ' s task ids encoded in the form of a string containing a sequence of ranges
: param str task_ranges_sequence : a job ' s task ids encoded in the form of a string containing a sequence of non overlapping ranges separated with a comma. Each range is expected to be in the form " <min_index>-<max_index>:<step> "
: return list ( int ) : the list of task ids
for example , this function would return [ 1 , 2 , 3 , 4 , 6 , 7 , 8 ] for the input string " 1-4:1,6-8:1 "
"""
task_ids = [ ]
astrRanges = re . split ( ' , ' , task_ranges_sequence )
for strRange in astrRanges :
singleIndexMatch = re . match ( ' ^(?P<elementIndex>[0-9]+)$ ' , strRange )
if singleIndexMatch :
iElementIndex = int ( singleIndexMatch . group ( ' elementIndex ' ) )
task_ids . extend ( range ( iElementIndex , iElementIndex + 1 ) )
else :
# we expect strRange to be of the form "1-4:1", where :
# the 1st number is the min element index (sge imposes it to be greater than 0)
# the 2nd number is the max element index
# the 3rd number is the step between consecutive element indices
rangeMatch = re . match ( ' ^(?P<minElementIndex>[0-9]+)-(?P<maxElementIndex>[0-9]+):(?P<stepBetweenIndices>[0-9]+)$ ' , strRange )
if rangeMatch == None :
logError ( ' unexpected format for job array details : " %s " (line= " %s " ' % ( strRange , line ) )
assert ( False )
iMinElementIndex = int ( rangeMatch . group ( ' minElementIndex ' ) )
iMaxElementIndex = int ( rangeMatch . group ( ' maxElementIndex ' ) )
iStepBetweenIndices = int ( rangeMatch . group ( ' stepBetweenIndices ' ) )
task_ids . extend ( range ( iMinElementIndex , iMaxElementIndex + 1 , iStepBetweenIndices ) )
return task_ids
2018-06-27 15:23:51 +02:00
# ugly hack to work around the fact that qstat truncates the fqdn of cluster nodes
# graffy@physix-master:~$ qstat -f -u \*
# queuename qtype resv/used/tot. load_avg arch states
# ---------------------------------------------------------------------------------
# main.q@physix88.ipr.univ-renne BIP 0/0/36 14.03 lx-amd64
# TODO: fix this properly by parsing the output of 'qstat -f -u \* -xml' instead of 'qstat -f -u \*'
qstatOutput = re . sub ( ' \ .univ[^ ]* ' , ' .univ-rennes1.fr ' , qstatOutput )
2011-10-07 17:43:45 +02:00
jobsState = JobsState ( )
f = StringIO . StringIO ( qstatOutput )
line = f . readline ( )
currentQueueMachine = None
bInPendingJobsSection = False
# examples of job line :
# 43521 0.55108 Confidiso3 aghoufi r 08/19/2009 18:40:09 1
# a typical job line in the pending jobs section looks like this :
# 43645 0.00000 LC_LV_MC aghoufi qw 08/21/2009 08:14:58 1
# a typical running job array line looks like this
# 43619 0.56000 SimpleJobA raffy r 08/20/2009 18:13:03 1 3
# a typical job array line in the pending jobs section looks like this
# 43646 0.00000 SimpleJobA raffy qw 08/21/2009 09:56:40 1 1-4:1
2019-11-14 13:08:01 +01:00
# nurg The job's total urgency value in normalized fashion.
# npprior The job's -p priority in normalized fashion.
# ntckts The job's ticket amount in normalized fashion.
# ppri The job's -p priority as specified by the user.
2020-01-17 15:54:02 +01:00
jobRegularExp = re . compile ( ' ^[ ]*(?P<jobId>[^ ]+)[ ]+(?P<JobPriority>[0-9.]+)[ ]+(?P<nurg>[0-9.]+)[ ]+(?P<npprior>[0-9.]+)[ ]+(?P<ntckts>[0-9.]+)[ ]+(?P<ppri>-?[0-9]+)[ ]+(?P<jobScriptName>[^ ]+)[ ]+(?P<jobOwner>[^ ]+)[ ]+(?P<jobStatus>[^ ]+)[ ]+(?P<jobStartOrSubmitTime>[0-9][0-9]/[0-9][0-9]/[0-9][0-9][0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9])[ ]+(?P<numSlots>[0-9]+)[ ]+(?P<jobArrayDetails>[^ \n ]*)[ \ s]*$ ' )
2011-10-07 17:43:45 +02:00
# example of machine line :
# allintel.q@simpatix34.univ-ren BIP 0/6/8 6.00 darwin-x86
2018-06-27 15:23:51 +02:00
machineRegularExp = re . compile ( ' ^(?P<queueName>[^@]+)@(?P<machineName>[^ ]+)[ ]+(?P<queueTypeString>[^ ]+)[ ]+(?P<numReservedSlots>[^/]+)/(?P<numUsedSlots>[^/]+)/(?P<numTotalSlots>[^ ]+)[ ]+(?P<cpuLoad>[^ ]+)[ \ s]+(?P<archName>[^ ]+)[ \ s]+(?P<queueMachineStatus>[^ \ s]*) ' )
2011-10-07 17:43:45 +02:00
pendingJobsHeaderRegularExp = re . compile ( ' ^ - PENDING JOBS - PENDING JOBS - PENDING JOBS - PENDING JOBS - PENDING JOBS[?]* ' )
while ( len ( line ) > 0 ) :
# print line
# check if the current line is a line describing a job running on a machine
matchObj = jobRegularExp . match ( line )
if matchObj :
# we are dealing with a job line
if not bInPendingJobsSection :
assert ( currentQueueMachine )
#log('QstatParser::parseQstatOutput : jobId = "'+matchObj.group('jobId')+'"')
iJobId = int ( matchObj . group ( ' jobId ' ) )
jobState = self . parseJobState ( matchObj . group ( ' jobStatus ' ) )
strJobArrayDetails = matchObj . group ( ' jobArrayDetails ' )
bIsJobArray = ( len ( strJobArrayDetails ) != 0 )
#logDebug('strJobArrayDetails = "%s", bIsJobArray=%d' % (strJobArrayDetails, int(bIsJobArray)))
# each element of a job array is treated as a separate job for the sake of simplicity.
# For these elements, the job id in sge sense is the same, but they are different in this program's sense
2019-11-14 13:08:01 +01:00
task_ids = range ( 0 , 1 ) # just one element, unless it's a job array
2011-10-07 17:43:45 +02:00
if bIsJobArray :
if bInPendingJobsSection :
2019-11-14 13:08:01 +01:00
task_ids = parse_pending_tasks ( strJobArrayDetails )
2011-10-07 17:43:45 +02:00
else :
# we are in the running jobs section, and here we expect the strJobArrayDetails to just contain the index of the job array element
iJobArrayElementIndex = int ( strJobArrayDetails )
assert ( iJobArrayElementIndex != 0 ) # sge does not allow element indices to be 0
2019-11-14 13:08:01 +01:00
task_ids = range ( iJobArrayElementIndex , iJobArrayElementIndex + 1 )
for task_id in task_ids :
2011-10-07 17:43:45 +02:00
jobId = None
if bIsJobArray :
2019-11-14 13:08:01 +01:00
jobId = JobId ( iJobId , task_id )
2011-10-07 17:43:45 +02:00
else :
jobId = JobId ( iJobId )
job = jobsState . getJob ( jobId )
#logDebug('iElementIndex = %d job id = %s' % (iElementIndex, jobId.asStr()))
if job == None :
# this job hasn't been encountered yet in the output of qstat ...
# we could either be in the pending jobs section or in the running jobs section
job = Job ( jobId )
jobsState . addJob ( job )
job . setState ( jobState )
strJobStartOrSubmitTime = matchObj . group ( ' jobStartOrSubmitTime ' )
jobStartOrSubmitTime = time . strptime ( strJobStartOrSubmitTime , ' % m/ %d / % Y % H: % M: % S ' )
if bInPendingJobsSection :
job . setSubmitTime ( jobStartOrSubmitTime )
else :
job . setStartTime ( jobStartOrSubmitTime )
job . setOwner ( matchObj . group ( ' jobOwner ' ) )
job . setScriptName ( matchObj . group ( ' jobScriptName ' ) )
if bInPendingJobsSection :
job . setNumRequiredSlots ( int ( matchObj . group ( ' numSlots ' ) ) )
else :
assert ( not bInPendingJobsSection ) # if we are in the pending jobs section, the job should be new
if not bInPendingJobsSection :
2012-05-29 16:05:18 +02:00
job . addSlots ( currentQueueMachine . getName ( ) , int ( matchObj . group ( ' numSlots ' ) ) )
2011-10-07 17:43:45 +02:00
else :
# the current line does not describe a job
if not bInPendingJobsSection :
# check if this line describes the status of a machine
matchObj = machineRegularExp . match ( line )
if matchObj :
queueName = matchObj . group ( ' queueName ' )
machineName = matchObj . group ( ' machineName ' )
queueMachine = QueueMachine ( queueName , machineName )
#log(line)
#log('matchObj.group(queueTypeString) :' + matchObj.group('queueTypeString'))
#log('matchObj.group(numTotalSlots) :' + matchObj.group('numTotalSlots'))
queueMachine . setNumSlots ( int ( matchObj . group ( ' numTotalSlots ' ) ) )
2012-05-29 16:05:18 +02:00
queueMachine . setNumUsedSlots ( int ( matchObj . group ( ' numUsedSlots ' ) ) )
strCpuLoad = matchObj . group ( ' cpuLoad ' )
if strCpuLoad != ' -NA- ' :
queueMachine . setCpuLoad ( float ( strCpuLoad ) )
2012-05-29 18:31:01 +02:00
strQueueMachineState = matchObj . group ( ' queueMachineStatus ' )
queueMachine . setState ( self . parseQueueMachineState ( strQueueMachineState ) )
2011-10-07 17:43:45 +02:00
#log('QstatParser::parseQstatOutput : queueName = "'+matchObj.group('queueName')+'"')
#log('QstatParser::parseQstatOutput : machineName = "'+matchObj.group('machineName')+'"')
currentQueueMachine = queueMachine
jobsState . addQueueMachine ( queueMachine )
else :
matchObj = pendingJobsHeaderRegularExp . match ( line )
if matchObj :
bInPendingJobsSection = True
currentQueueMachine = None
else :
#print line
None
else :
# we are in a pending jobs section
matchObj = re . match ( ' ^[#]+$ ' , line )
if not matchObj :
# unexpected line
print ' line = " ' + line + ' " '
assert ( False )
None
line = f . readline ( )
f . close ( )
return jobsState
def parseJobDetails ( self , qstatOutput , job ) :
"""
adds to job the details parsed from the output of the " qstat -j <jobid> " command
"""
f = StringIO . StringIO ( qstatOutput )
line = f . readline ( )
fieldRegularExp = re . compile ( ' ^(?P<fieldName>[^:]+):[ ]+(?P<fieldValue>[?]*)$ ' )
while ( len ( line ) > 0 ) :
# print line
# check if the current line is a line describing a job running on a machine
matchObj = fieldRegularExp . match ( line )
if matchObj :
fieldName = matchObj . group ( ' fieldName ' )
strFieldValue = matchObj . group ( ' fieldValue ' )
if fieldName == ' job_number ' :
assert ( job . getId ( ) . asStr ( ) == strFieldValue )
elif fieldName == ' hard_queue_list ' :
allowedQueues = strFieldValue . split ( ' , ' )
assert ( len ( allowedQueues ) > 0 )
job . m_jobRequirements . m_queues = allowedQueues
elif fieldName == ' parallel environment ' :
# the value could be 'ompi range: 32'
matchObj = re . match ( ' ompi range: (?P<numSlots>[0-9]+)[?]* ' , strFieldValue )
if matchObj :
job . m_jobRequirements . m_parallelEnvironment = ParallelEnvironment . MPI
else :
assert ( False )
else :
# ignore he other fields
None
line = f . readline ( )
f . close ( )