From 862f45ec7ad2c1f0799c029442e9cbdf21cd0bb8 Mon Sep 17 00:00:00 2001 From: Guillaume Raffy Date: Mon, 17 Sep 2012 08:37:04 +0000 Subject: [PATCH] contournement du bug 269 --- ClusterController/SunGridEngine.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/ClusterController/SunGridEngine.py b/ClusterController/SunGridEngine.py index 1411177..2467a73 100644 --- a/ClusterController/SunGridEngine.py +++ b/ClusterController/SunGridEngine.py @@ -24,7 +24,7 @@ class SunGridEngine: # read the requirements for pending jobs (which parallel environment, which queue, which architecture) from sge if False: # no need for job details at the moment and since it's very slow, it's been disabled - for jobId, job in jobsState.getPendingJobs().iteritems(): + for unused_jobId, job in jobsState.getPendingJobs().iteritems(): (returnCode, stdout, stderr) = executeProgram( ['qstat', '-j', job.getId().asStr()] ) assert returnCode != 0, 'prout' QstatParser().parseJobDetails( stdout, job ) @@ -35,11 +35,21 @@ class SunGridEngine: argument = 'd' if bEnable: argument = 'e' - errorCode, stdout, stderr = executeProgram(['qmod', '-'+argument, strQueueInstanceName]) + bBUG_00000269_IS_STILL_ALIVE = True # for some reason, qmod -d (and maybe any sge command) could fail with error: commlib error: can't connect to service (Address already in use) + delayBetweenAttemps = 5 # in seconds + while True: + errorCode, unused_stdout, unused_stderr = executeProgram(['qmod', '-'+argument, strQueueInstanceName]) + if bBUG_00000269_IS_STILL_ALIVE: + # if the command failed, try again + if errorCode == 0: + break + time.sleep(delayBetweenAttemps) + else: + break return (errorCode == 0) def queueIsEmpty( self, strMachineName ): - (returnCode, qstatOutput, stderr) = executeProgram( ['qstat', '-f', '-u', '*'] ) + (returnCode, qstatOutput, unused_stderr) = executeProgram( ['qstat', '-f', '-u', '*'] ) assert( returnCode == 0 ) jobsState = QstatParser().parseQstatOutput( qstatOutput ) jobs = jobsState.getJobsOnMachine( strMachineName )