import Util from QstatParser import * class SunGridEngine: def getCurrentJobsState( self ): bBUG_00000009_IS_STILL_ALIVE = True if bBUG_00000009_IS_STILL_ALIVE: logDebug('Querying the current state of jobs') returnCode = -1 delayBetweenAttemps = 5 # in seconds while returnCode != 0: command = ['qstat', '-f', '-u', '*'] (returnCode, qstatOutput, stderr) = executeProgram( command ) if returnCode != 0: logWarning('command "%s" failed (returnCode = %d, stdout="%s", stderr="%s"). Retrying in %d seconds' % (' '.join(command), returnCode, qstatOutput, stderr, delayBetweenAttemps)) time.sleep(delayBetweenAttemps) if bBUG_00000009_IS_STILL_ALIVE: logDebug('Just got current state of jobs') jobsState = QstatParser().parseQstatOutput( qstatOutput ) jobsState.setTime( time.time() ) # read the requirements for pending jobs (which parallel environment, which queue, which architecture) from sge if False: # no need for job details at the moment and since it's very slow, it's been disabled for unused_jobId, job in jobsState.getPendingJobs().iteritems(): (returnCode, stdout, stderr) = executeProgram( ['qstat', '-j', job.getId().asStr()] ) assert returnCode != 0, 'prout' QstatParser().parseJobDetails( stdout, job ) return jobsState def setQueueInstanceActivation( self, strQueueInstanceName, bEnable ): argument = 'd' if bEnable: argument = 'e' bBUG_00000269_IS_STILL_ALIVE = True # for some reason, qmod -d (and maybe any sge command) could fail with error: commlib error: can't connect to service (Address already in use) delayBetweenAttemps = 5 # in seconds while True: errorCode, unused_stdout, unused_stderr = executeProgram(['qmod', '-'+argument, strQueueInstanceName]) if bBUG_00000269_IS_STILL_ALIVE: # if the command failed, try again if errorCode == 0: break time.sleep(delayBetweenAttemps) else: break return (errorCode == 0) def queueIsEmpty( self, strMachineName ): (returnCode, qstatOutput, unused_stderr) = executeProgram( ['qstat', '-f', '-u', '*'] ) assert( returnCode == 0 ) jobsState = QstatParser().parseQstatOutput( qstatOutput ) jobs = jobsState.getJobsOnMachine( strMachineName ) return (len(jobs) == 0)