cocluto/cocluto/ClusterController/JobsState.py

96 lines
3.8 KiB
Python

from typing import Dict
from datetime import datetime
# from .Log import log_info
from .Job import Task, TaskUid, QueueMachineId
from .QueueMachine import QueueMachine
class JobsState:
"""
represents a snapshot of the state of SGE jobs as seen by the SGE command "qstat -f -u \\*"
"""
tasks: Dict[TaskUid, Task] # list of tasks
job_array_tasks: Dict[int, Dict[TaskUid, Task]] # a dictionary of jobs for each job array, indexed by job array id
queue_machines: Dict[QueueMachineId, QueueMachine] # list of queue machines such as allintel.q@simpatix10
state_time: datetime # the time at which the state was snapshot
def __init__(self):
self.tasks = {}
self.job_array_tasks = {}
self.queue_machines = {}
self.state_time = None
def delete_all_tasks(self):
self.tasks = {}
self.job_array_tasks = {}
def add_task(self, task: Task):
task_uid = task.get_id()
self.tasks[task_uid] = task
if task_uid.is_job_array_element():
tasks = self.job_array_tasks.get(task_uid.job_id)
if tasks is None:
tasks = {}
self.job_array_tasks[task_uid.job_id] = tasks
tasks[task_uid] = task
def get_task(self, task_uid: TaskUid) -> Task:
return self.tasks.get(task_uid)
def get_job_array_tasks(self, job_array_id: int) -> Dict[TaskUid, Task]:
return self.job_array_tasks.get(job_array_id)
def set_time(self, state_time: datetime):
self.state_time = state_time
def get_time(self) -> datetime:
return self.state_time
def get_jobs_on_machine(self, machine_name: str) -> Dict[TaskUid, Task]:
jobs_on_machine = {}
for task_uid, task in self.tasks.items():
for queue_machine_name, _num_slots in task.get_slots().items():
jobMachineName = queue_machine_name.split('@')[1]
if jobMachineName == machine_name:
jobs_on_machine[task_uid] = task
return jobs_on_machine
def get_num_free_slots_on_queue_machine(self, queue_machine: QueueMachine) -> int:
# log_info('getNumFreeSlotsOnQueueMachine : looking for free slots on queuemachine %s' % queueMachine.get_name())
numUsedSlots = 0
for job in self.tasks.values():
numUsedSlotsByThisJob = job.get_slots().get(queue_machine.get_name())
if numUsedSlotsByThisJob is not None:
# log_info('getNumFreeSlotsOnQueueMachine : job %d uses %d slots' % (job.getId().asStr(), numUsedSlotsByThisJob))
numUsedSlots += numUsedSlotsByThisJob
else:
pass
# log_info('getNumFreeSlotsOnQueueMachine : job %d uses no slot' % job.getId().asStr())
numFreeSlots = queue_machine.get_num_slots() - numUsedSlots
assert numFreeSlots >= 0
return numFreeSlots
def add_queue_machine(self, queue_machine: QueueMachine):
self.queue_machines[queue_machine.get_name()] = queue_machine
def get_queue_machine(self, machine_name) -> QueueMachine:
"""
finds the queue machine associated with a machine
"""
queueMachine = None
for _qname, qm in self.queue_machines.items():
if qm.machine_name == machine_name:
assert queueMachine is None # to be sure that no more than one queue machine is on a given machine
queueMachine = qm
return queueMachine
def get_queue_machines(self) -> Dict[QueueMachineId, QueueMachine]:
return self.queue_machines
def get_pending_jobs(self) -> Dict[TaskUid, Task]:
pending_jobs = {}
for _task_id, task in self.tasks.items():
if task.is_pending():
pending_jobs[task.get_id()] = task
return pending_jobs