fixed bug: made parseQstatOutput handle domains other than ipr.univ-rennes1.fr properly

before this fix, `parseQstatOutput` created a list of machines with a hardcoded ipr.univ-rennes1.fr domain, resulting in potentatially wrong fqdn

work related to https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3693
This commit is contained in:
Guillaume Raffy 2023-11-17 18:28:20 +01:00
parent 5bf1cbe664
commit 891182587d
3 changed files with 21 additions and 21 deletions

View File

@ -54,9 +54,11 @@ class QstatParser:
assert False, 'unhandled queue machine state flag :"' + c + '"'
return queueMachineState
def parseQstatOutput(self, qstatOutput):
def parseQstatOutput(self, qstatOutput, cluster_domain: str = 'ipr.univ-rennes1.fr'):
"""
parses result of command 'qstat -f -u \* -pri'
parses result of command 'qstat -f -u \\* -pri'
cluster_domain: network domain of the cluster (eg 'ipr.univ-rennes.fr'). This information is missing from qstat's output and is used to form the fully qualified domain name of the cluster machines.
"""
logging.debug('qstatOutput type : %s' % type(qstatOutput))
@ -70,25 +72,25 @@ class QstatParser:
for example, this function would return [1, 2, 3, 4, 6, 7, 8] for the input string "1-4:1,6-8:1"
"""
task_ids = []
astrRanges = re.split(',', task_ranges_sequence)
for strRange in astrRanges:
singleIndexMatch = re.match('^(?P<elementIndex>[0-9]+)$', strRange)
if singleIndexMatch:
iElementIndex = int(singleIndexMatch.group('elementIndex'))
task_ids.extend(range(iElementIndex, iElementIndex + 1))
ranges = re.split(',', task_ranges_sequence)
for task_range in ranges:
single_index_match = re.match('^(?P<elementIndex>[0-9]+)$', task_range)
if single_index_match:
element_index = int(single_index_match.group('elementIndex'))
task_ids.extend(range(element_index, element_index + 1))
else:
# we expect strRange to be of the form "1-4:1", where :
# the 1st number is the min element index (sge imposes it to be greater than 0)
# the 2nd number is the max element index
# the 3rd number is the step between consecutive element indices
rangeMatch = re.match('^(?P<minElementIndex>[0-9]+)-(?P<maxElementIndex>[0-9]+):(?P<stepBetweenIndices>[0-9]+)$', strRange)
if rangeMatch is None:
logError('unexpected format for job array details : "%s" (line="%s"' % (strRange, line))
range_match = re.match('^(?P<minElementIndex>[0-9]+)-(?P<maxElementIndex>[0-9]+):(?P<stepBetweenIndices>[0-9]+)$', task_range)
if range_match is None:
logError('unexpected format for job array details : "%s" (line="%s"' % (task_range, line))
assert False
iMinElementIndex = int(rangeMatch.group('minElementIndex'))
iMaxElementIndex = int(rangeMatch.group('maxElementIndex'))
iStepBetweenIndices = int(rangeMatch.group('stepBetweenIndices'))
task_ids.extend(range(iMinElementIndex, iMaxElementIndex + 1, iStepBetweenIndices))
min_element_index = int(range_match.group('minElementIndex'))
min_element_index = int(range_match.group('maxElementIndex'))
step_between_indices = int(range_match.group('stepBetweenIndices'))
task_ids.extend(range(min_element_index, min_element_index + 1, step_between_indices))
return task_ids
# ugly hack to work around the fact that qstat truncates the fqdn of cluster nodes
@ -97,7 +99,7 @@ class QstatParser:
# ---------------------------------------------------------------------------------
# main.q@physix88.ipr.univ-renne BIP 0/0/36 14.03 lx-amd64
# TODO: fix this properly by parsing the output of 'qstat -f -u \* -xml' instead of 'qstat -f -u \*'
qstatOutput = re.sub(r'\.univ[^ ]*', '.univ-rennes1.fr', qstatOutput)
qstatOutput = re.sub(r'\.ipr\.univ[^ ]*', f'.{cluster_domain}', qstatOutput)
jobsState = JobsState()
f = io.StringIO(qstatOutput)
@ -209,8 +211,7 @@ class QstatParser:
bInPendingJobsSection = True
currentQueueMachine = None
else:
# print line
None
pass
else:
# we are in a pending jobs section
matchObj = re.match('^[#]+$', line)
@ -218,7 +219,6 @@ class QstatParser:
# unexpected line
print('line = "' + line + '"')
assert False
None
line = f.readline()
f.close()
return jobsState

View File

@ -69,7 +69,7 @@ def executeCommand(command):
return result
def executeCommandOn(target_machine_fqdn, command, user=None):
def executeCommandOn(target_machine_fqdn: str, command: str, user: str = None):
"""
execute command on a local or remote machine (using ssh then)
:param str user: if not None, the user that should be used to execute the command (instead of the current user)

View File

@ -20,7 +20,7 @@ class CoclutoTestCase(unittest.TestCase):
qstat_output = file.read()
# qstatParser = ClusterController.QstatParser()
qstatParser = QstatParser()
job_state = qstatParser.parseQstatOutput(qstat_output)
job_state = qstatParser.parseQstatOutput(qstat_output, cluster_domain='ipr.univ-rennes1.fr')
self.assertIsInstance(job_state, JobsState)