fixed bug: made parseQstatOutput handle domains other than ipr.univ-rennes1.fr properly

before this fix, `parseQstatOutput` created a list of machines with a hardcoded ipr.univ-rennes1.fr domain, resulting in potentatially wrong fqdn

work related to https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3693
This commit is contained in:
Guillaume Raffy 2023-11-17 18:28:20 +01:00
parent 5bf1cbe664
commit 891182587d
3 changed files with 21 additions and 21 deletions

View File

@ -54,9 +54,11 @@ class QstatParser:
assert False, 'unhandled queue machine state flag :"' + c + '"' assert False, 'unhandled queue machine state flag :"' + c + '"'
return queueMachineState return queueMachineState
def parseQstatOutput(self, qstatOutput): def parseQstatOutput(self, qstatOutput, cluster_domain: str = 'ipr.univ-rennes1.fr'):
""" """
parses result of command 'qstat -f -u \* -pri' parses result of command 'qstat -f -u \\* -pri'
cluster_domain: network domain of the cluster (eg 'ipr.univ-rennes.fr'). This information is missing from qstat's output and is used to form the fully qualified domain name of the cluster machines.
""" """
logging.debug('qstatOutput type : %s' % type(qstatOutput)) logging.debug('qstatOutput type : %s' % type(qstatOutput))
@ -70,25 +72,25 @@ class QstatParser:
for example, this function would return [1, 2, 3, 4, 6, 7, 8] for the input string "1-4:1,6-8:1" for example, this function would return [1, 2, 3, 4, 6, 7, 8] for the input string "1-4:1,6-8:1"
""" """
task_ids = [] task_ids = []
astrRanges = re.split(',', task_ranges_sequence) ranges = re.split(',', task_ranges_sequence)
for strRange in astrRanges: for task_range in ranges:
singleIndexMatch = re.match('^(?P<elementIndex>[0-9]+)$', strRange) single_index_match = re.match('^(?P<elementIndex>[0-9]+)$', task_range)
if singleIndexMatch: if single_index_match:
iElementIndex = int(singleIndexMatch.group('elementIndex')) element_index = int(single_index_match.group('elementIndex'))
task_ids.extend(range(iElementIndex, iElementIndex + 1)) task_ids.extend(range(element_index, element_index + 1))
else: else:
# we expect strRange to be of the form "1-4:1", where : # we expect strRange to be of the form "1-4:1", where :
# the 1st number is the min element index (sge imposes it to be greater than 0) # the 1st number is the min element index (sge imposes it to be greater than 0)
# the 2nd number is the max element index # the 2nd number is the max element index
# the 3rd number is the step between consecutive element indices # the 3rd number is the step between consecutive element indices
rangeMatch = re.match('^(?P<minElementIndex>[0-9]+)-(?P<maxElementIndex>[0-9]+):(?P<stepBetweenIndices>[0-9]+)$', strRange) range_match = re.match('^(?P<minElementIndex>[0-9]+)-(?P<maxElementIndex>[0-9]+):(?P<stepBetweenIndices>[0-9]+)$', task_range)
if rangeMatch is None: if range_match is None:
logError('unexpected format for job array details : "%s" (line="%s"' % (strRange, line)) logError('unexpected format for job array details : "%s" (line="%s"' % (task_range, line))
assert False assert False
iMinElementIndex = int(rangeMatch.group('minElementIndex')) min_element_index = int(range_match.group('minElementIndex'))
iMaxElementIndex = int(rangeMatch.group('maxElementIndex')) min_element_index = int(range_match.group('maxElementIndex'))
iStepBetweenIndices = int(rangeMatch.group('stepBetweenIndices')) step_between_indices = int(range_match.group('stepBetweenIndices'))
task_ids.extend(range(iMinElementIndex, iMaxElementIndex + 1, iStepBetweenIndices)) task_ids.extend(range(min_element_index, min_element_index + 1, step_between_indices))
return task_ids return task_ids
# ugly hack to work around the fact that qstat truncates the fqdn of cluster nodes # ugly hack to work around the fact that qstat truncates the fqdn of cluster nodes
@ -97,7 +99,7 @@ class QstatParser:
# --------------------------------------------------------------------------------- # ---------------------------------------------------------------------------------
# main.q@physix88.ipr.univ-renne BIP 0/0/36 14.03 lx-amd64 # main.q@physix88.ipr.univ-renne BIP 0/0/36 14.03 lx-amd64
# TODO: fix this properly by parsing the output of 'qstat -f -u \* -xml' instead of 'qstat -f -u \*' # TODO: fix this properly by parsing the output of 'qstat -f -u \* -xml' instead of 'qstat -f -u \*'
qstatOutput = re.sub(r'\.univ[^ ]*', '.univ-rennes1.fr', qstatOutput) qstatOutput = re.sub(r'\.ipr\.univ[^ ]*', f'.{cluster_domain}', qstatOutput)
jobsState = JobsState() jobsState = JobsState()
f = io.StringIO(qstatOutput) f = io.StringIO(qstatOutput)
@ -209,8 +211,7 @@ class QstatParser:
bInPendingJobsSection = True bInPendingJobsSection = True
currentQueueMachine = None currentQueueMachine = None
else: else:
# print line pass
None
else: else:
# we are in a pending jobs section # we are in a pending jobs section
matchObj = re.match('^[#]+$', line) matchObj = re.match('^[#]+$', line)
@ -218,7 +219,6 @@ class QstatParser:
# unexpected line # unexpected line
print('line = "' + line + '"') print('line = "' + line + '"')
assert False assert False
None
line = f.readline() line = f.readline()
f.close() f.close()
return jobsState return jobsState

View File

@ -69,7 +69,7 @@ def executeCommand(command):
return result return result
def executeCommandOn(target_machine_fqdn, command, user=None): def executeCommandOn(target_machine_fqdn: str, command: str, user: str = None):
""" """
execute command on a local or remote machine (using ssh then) execute command on a local or remote machine (using ssh then)
:param str user: if not None, the user that should be used to execute the command (instead of the current user) :param str user: if not None, the user that should be used to execute the command (instead of the current user)

View File

@ -20,7 +20,7 @@ class CoclutoTestCase(unittest.TestCase):
qstat_output = file.read() qstat_output = file.read()
# qstatParser = ClusterController.QstatParser() # qstatParser = ClusterController.QstatParser()
qstatParser = QstatParser() qstatParser = QstatParser()
job_state = qstatParser.parseQstatOutput(qstat_output) job_state = qstatParser.parseQstatOutput(qstat_output, cluster_domain='ipr.univ-rennes1.fr')
self.assertIsInstance(job_state, JobsState) self.assertIsInstance(job_state, JobsState)