179 lines
5.6 KiB
Python
179 lines
5.6 KiB
Python
import enum
|
|
from typing import Optional, Dict, List
|
|
from datetime import datetime
|
|
|
|
|
|
class JobStateFlags:
|
|
RUNNING = 1 # the job is running
|
|
WAITING = 2 # the job is waiting
|
|
QUEUED = 4 # not sure what that exactly means but it reflects the q state of jobs as seen in the pending jobs list from qstat -f -u \*
|
|
TRANSFERING = 8
|
|
DELETED = 16
|
|
HOLD = 32
|
|
ERROR = 64
|
|
SUSPENDED = 128
|
|
|
|
|
|
class ParallelEnvironment(enum.Enum):
|
|
MPI = 1
|
|
|
|
|
|
MemoryUnit = int # number of bytes
|
|
JobState = int # combination of JobStateFlags
|
|
JobId = int # the job id in the sense of sge's job id : a simple unique number
|
|
TaskId = int # the id of a task within its array Job
|
|
QueueId = str # eg 'main.q'
|
|
QueueMachineId = str # the identifier of a queue machine, eg 'main.q@physix99.ipr.univ-rennes.fr'
|
|
ResourceRequest = str # eg 'mem_available=5G'
|
|
|
|
|
|
class JobRequirements:
|
|
num_slots: Optional[int]
|
|
architecture: Optional[str] # machine architecture
|
|
parallel_environment: Optional[ParallelEnvironment]
|
|
queues: Optional[List[QueueId]] # the list of queues this job is allowed to run on
|
|
resources: Optional[List[ResourceRequest]]
|
|
|
|
def __init__(self):
|
|
self.num_slots = None
|
|
self.architecture = None
|
|
self.parallel_environment = None
|
|
self.queues = None
|
|
self.resources = None
|
|
|
|
|
|
class TaskUid:
|
|
"""
|
|
the identifier of a task, in the form <job_id>.<element_id>
|
|
We treat each element of a job array as a separate task
|
|
A single integer is no longer enough to identify a job because all elements in a job array
|
|
share the same sge job identifier. To uniquely define a job array element, we also use the task id.
|
|
"""
|
|
job_id: JobId
|
|
task_id: Optional[TaskId] # the identifier of a task within its job None if this identifier does not refer to a job array element
|
|
MAX_NUM_JOBS_IN_ARRAY = 1000000
|
|
|
|
def __init__(self, job_id: JobId, task_id: Optional[TaskId] = None):
|
|
if task_id is not None:
|
|
assert task_id <= self.MAX_NUM_JOBS_IN_ARRAY
|
|
self.job_id = job_id
|
|
self.task_id = task_id
|
|
|
|
def __hash__(self):
|
|
"""
|
|
required to use a TaskUid as a dict hash key
|
|
"""
|
|
_hash = self.job_id * self.MAX_NUM_JOBS_IN_ARRAY
|
|
if self.task_id is not None:
|
|
_hash += self.task_id
|
|
return _hash
|
|
|
|
def __eq__(self, other: 'TaskUid'):
|
|
"""
|
|
required to use a TaskUid as a dict hash key
|
|
"""
|
|
if self.job_id != other.job_id:
|
|
return False
|
|
if self.task_id != other.task_id:
|
|
return False
|
|
return True
|
|
|
|
def is_job_array_element(self) -> bool:
|
|
return (self.task_id is not None)
|
|
|
|
def get_job_id(self) -> JobId:
|
|
return self.job_id
|
|
|
|
def as_str(self):
|
|
result = '%s' % self.job_id
|
|
if self.is_job_array_element():
|
|
result += '.%d' % self.task_id
|
|
return result
|
|
|
|
|
|
class Task:
|
|
task_uid: TaskUid # the unique identified of this task, eg '12345.789'
|
|
start_time: Optional[datetime]
|
|
submit_time: Optional[datetime]
|
|
owner: Optional[str]
|
|
script_name: Optional[str]
|
|
slots: Dict[QueueMachineId, int]
|
|
state_flags: JobStateFlags
|
|
job_requirements: JobRequirements
|
|
requested_ram_per_core: MemoryUnit
|
|
|
|
def __init__(self, task_uid):
|
|
self.task_uid = task_uid
|
|
self.start_time = None
|
|
self.submit_time = None
|
|
self.owner = None
|
|
self.script_name = None
|
|
self.slots = {}
|
|
self.state_flags = 0
|
|
self.job_requirements = JobRequirements()
|
|
self.requested_ram_per_core = 0
|
|
|
|
def get_id(self):
|
|
return self.task_uid
|
|
|
|
def set_state(self, state: JobState):
|
|
self.state_flags = state
|
|
|
|
def set_owner(self, job_owner: str):
|
|
if self.owner:
|
|
assert self.owner == job_owner
|
|
self.owner = job_owner
|
|
|
|
def get_owner(self) -> str:
|
|
return self.owner
|
|
|
|
def set_start_time(self, job_start_time: datetime):
|
|
if self.start_time:
|
|
assert self.start_time == job_start_time
|
|
self.start_time = job_start_time
|
|
|
|
def get_submit_time(self, job_submit_time: datetime):
|
|
if self.submit_time:
|
|
assert self.submit_time == job_submit_time
|
|
self.submit_time = job_submit_time
|
|
|
|
def get_start_time(self) -> datetime:
|
|
return self.start_time
|
|
|
|
def set_script_name(self, job_script_name: str):
|
|
if self.script_name:
|
|
assert self.script_name == job_script_name
|
|
self.script_name = job_script_name
|
|
|
|
def add_slots(self, queue_machine_id: QueueMachineId, num_slots: int):
|
|
assert self.slots.get(queue_machine_id) is None
|
|
if self.slots.get(queue_machine_id) is None:
|
|
self.slots[queue_machine_id] = num_slots
|
|
else:
|
|
# should never happen
|
|
self.slots[queue_machine_id] += num_slots
|
|
|
|
def get_slots(self) -> Dict[QueueMachineId, int]:
|
|
return self.slots
|
|
|
|
def set_num_required_slots(self, num_slots: int):
|
|
self.job_requirements.num_slots = num_slots
|
|
|
|
def is_pending(self):
|
|
"""
|
|
returns true if this task is waiting in the queue for whatever reason
|
|
"""
|
|
return self.state_flags & JobStateFlags.QUEUED
|
|
|
|
def get_requested_ram_per_core(self) -> MemoryUnit:
|
|
"""
|
|
requested RAM per core in bytes
|
|
"""
|
|
return self.requested_ram_per_core
|
|
|
|
def set_requested_ram_per_core(self, requested_ram: MemoryUnit):
|
|
"""
|
|
requestedRam : requested RAM per core in bytes
|
|
"""
|
|
self.requested_ram_per_core = requested_ram
|