diff --git a/ClusterController/ClusterController.py b/ClusterController/ClusterController.py index c743de5..749eec3 100644 --- a/ClusterController/ClusterController.py +++ b/ClusterController/ClusterController.py @@ -181,7 +181,7 @@ class ClusterController: self.wakeUpMachinesForPendingJobs() def storeSessionInDatabase( self ): - conn = MySQLdb.connect('simpatix10', 'root', '', 'clustercontroller') + conn = MySQLdb.connect('simpatix10', 'clusterctrl', '', 'clustercontroller') assert(conn) # retrieve the session id, as it's an auto_increment field @@ -226,10 +226,11 @@ class ClusterController: """ """ self.m_iSessionId = self.storeSessionInDatabase() + log("storeSessionInDatabase completed") DELAY_BETWEEN_MEASURES = 10 # in seconds self.m_clusterStatus.startReadingThreads() while not self.m_clusterStatus.isReady(): - #log('waiting for system to be ready') + log('waiting for system to be ready') time.sleep(1) None logInfo('ClusterController::run : cluster initial readings have completed') diff --git a/ClusterController/ClusterStatus.py b/ClusterController/ClusterStatus.py index 7e9d877..490ba54 100644 --- a/ClusterController/ClusterStatus.py +++ b/ClusterController/ClusterStatus.py @@ -17,10 +17,9 @@ class ClusterStatus: self.m_lock = threading.Lock() # to prevent concurrent access to this instance self.m_jobsStateUpdater = JobsStateUpdater( self ) self.m_jobsState = None - #self.m_controlledMachineNames = [ 'simpatix26', 'simpatix27', 'simpatix38', 'simpatix10' ] - #self.m_controlledMachineNames = [ 'simpatix15' ] - self.m_controlledMachineNames = [] # [ 'simpatix10' ] - if True: + self.m_controlledMachineNames = [ 'simpatix30' ] + #self.m_controlledMachineNames = [] # [ 'simpatix30' ] + if False: for iMachine in range(11, 40): if (iMachine == 31) or (iMachine == 32): continue # these machines don't seem to be able to go to sleep properly (bug 00000010) diff --git a/ClusterController/Log.py b/ClusterController/Log.py index b0aa283..388f497 100644 --- a/ClusterController/Log.py +++ b/ClusterController/Log.py @@ -4,7 +4,6 @@ import threading gLogFilePath = '/var/log/ClusterController.log' def log( message ): - return threadName = threading.currentThread().getName() logMessage = time.asctime(time.localtime())+' : '+ threadName + ' : ' + message print logMessage diff --git a/ClusterController/QstatParser.py b/ClusterController/QstatParser.py index 164594c..4f82b95 100644 --- a/ClusterController/QstatParser.py +++ b/ClusterController/QstatParser.py @@ -20,7 +20,7 @@ class QstatParser: elif c == 't': jobState += JobStateFlags.TRANSFERING else: - assert( False, 'unhandled job state flag :"' + c + '"' ) + assert False, 'unhandled job state flag :"' + c + '"' return jobState def parseQstatOutput( self, qstatOutput ): jobsState = JobsState() diff --git a/ClusterController/SunGridEngine.py b/ClusterController/SunGridEngine.py index 9b6465c..1411177 100644 --- a/ClusterController/SunGridEngine.py +++ b/ClusterController/SunGridEngine.py @@ -26,7 +26,7 @@ class SunGridEngine: if False: # no need for job details at the moment and since it's very slow, it's been disabled for jobId, job in jobsState.getPendingJobs().iteritems(): (returnCode, stdout, stderr) = executeProgram( ['qstat', '-j', job.getId().asStr()] ) - assert( returnCode != 0, 'prout' ) + assert returnCode != 0, 'prout' QstatParser().parseJobDetails( stdout, job ) return jobsState diff --git a/ClusterController/Util.py b/ClusterController/Util.py index 951f1c3..9203bdf 100644 --- a/ClusterController/Util.py +++ b/ClusterController/Util.py @@ -207,6 +207,7 @@ def onException(exception): strMessage += f.getvalue() f.close() logError(strMessage) + print(strMessage) Lib.Util.sendTextMail( 'ClusterController ', 'guillaume.raffy@univ-rennes1.fr', 'ClusterController has stopped because of an exception', strMessage) executeCommand('kill -9 %d' % os.getpid()) # stop other threads immediately exit()