cocluto v1.0.13 - improvements to quman
- the userid and the host fqdn of are now recorded - found better names (more appropriate and easier to understand) - disable reason -> disable request - requester -> disable tag work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3093]
This commit is contained in:
parent
12be70500b
commit
bfe1eeb084
135
cocluto/quman.py
135
cocluto/quman.py
|
|
@ -10,8 +10,10 @@ from cocluto.ClusterController.QstatParser import QstatParser
|
||||||
from cocluto.ClusterController.JobsState import JobsState
|
from cocluto.ClusterController.JobsState import JobsState
|
||||||
|
|
||||||
LogId = int # identifies a log entry in the database
|
LogId = int # identifies a log entry in the database
|
||||||
RequesterId = str # identifies the queue enable/disable requester eg auto.croconaus, manual.graffy, etc.
|
DisableTag = str # unique string that identifies the disable request eg auto.croconaus, manual.graffy, etc.
|
||||||
QueueMachineId = str # identifies the queue machine eg main.q@alambix42.ipr.univ-rennes.fr
|
QueueMachineId = str # identifies the queue machine eg main.q@alambix42.ipr.univ-rennes.fr
|
||||||
|
HostFqdn = str # fully qualified domain name of a host eg alambix42.ipr.univ-rennes.fr
|
||||||
|
UserId = str # unix user id of a user eg graffy
|
||||||
|
|
||||||
|
|
||||||
class QueuesStatus():
|
class QueuesStatus():
|
||||||
|
|
@ -90,32 +92,37 @@ class Sge(IGridEngine):
|
||||||
# Parse the output to extract queue statuses
|
# Parse the output to extract queue statuses
|
||||||
# This is a simplified example - you would need to parse the actual qstat output
|
# This is a simplified example - you would need to parse the actual qstat output
|
||||||
queues_status = QueuesStatus()
|
queues_status = QueuesStatus()
|
||||||
jobs_state: JobsState = QstatParser.parseQstatOutput(process.stdout.decode())
|
qstat_parser = QstatParser()
|
||||||
queue_machines = jobs_state.getQueueMachines()
|
jobs_state: JobsState = qstat_parser.parse_qstat_output(process.stdout.decode())
|
||||||
for queue_machine in queue_machines.itervalues():
|
queue_machines = jobs_state.get_queue_machines()
|
||||||
|
for queue_machine in queue_machines.values():
|
||||||
queues_status.add_queue(queue_machine.get_name(), queue_machine.is_enabled())
|
queues_status.add_queue(queue_machine.get_name(), queue_machine.is_enabled())
|
||||||
|
|
||||||
return queues_status
|
return queues_status
|
||||||
|
|
||||||
|
|
||||||
def init_db(db_backend: ISqlDatabaseBackend):
|
def init_db(db_backend: ISqlDatabaseBackend):
|
||||||
|
# a database storing the log of actions (queue activation or deactivation)
|
||||||
if not db_backend.table_exists('log'):
|
if not db_backend.table_exists('log'):
|
||||||
fields = [
|
fields = [
|
||||||
SqlTableField('id', SqlTableField.Type.FIELD_TYPE_INT, 'unique identifier of the modification', is_autoinc_index=True),
|
SqlTableField('id', SqlTableField.Type.FIELD_TYPE_INT, 'unique identifier of the modification', is_autoinc_index=True),
|
||||||
SqlTableField('timestamp', SqlTableField.Type.FIELD_TYPE_TIME, 'the time (and date) at which this modification has been made'),
|
SqlTableField('timestamp', SqlTableField.Type.FIELD_TYPE_TIME, 'the time (and date) at which this modification has been made'),
|
||||||
|
SqlTableField('user_id', SqlTableField.Type.FIELD_TYPE_STRING, 'the id of user performing the action (unix user id)'),
|
||||||
|
SqlTableField('host_fqdn', SqlTableField.Type.FIELD_TYPE_STRING, 'the fully qualified domain name of the host on which the action is performed'),
|
||||||
SqlTableField('queue_name', SqlTableField.Type.FIELD_TYPE_STRING, 'the name of the queue that was modified'),
|
SqlTableField('queue_name', SqlTableField.Type.FIELD_TYPE_STRING, 'the name of the queue that was modified'),
|
||||||
SqlTableField('action', SqlTableField.Type.FIELD_TYPE_STRING, 'the action performed: "disable" or "enable"'),
|
SqlTableField('action', SqlTableField.Type.FIELD_TYPE_STRING, 'the action performed: "disable" or "enable"'),
|
||||||
SqlTableField('requester_id', SqlTableField.Type.FIELD_TYPE_STRING, 'the ID of the requester'),
|
SqlTableField('disable_tag', SqlTableField.Type.FIELD_TYPE_STRING, 'the tag of the disable request that led to this modification (e.g., auto.croconaus, manual.graffy, etc.)'),
|
||||||
SqlTableField('reason', SqlTableField.Type.FIELD_TYPE_STRING, 'the reason for the modification'),
|
SqlTableField('reason', SqlTableField.Type.FIELD_TYPE_STRING, 'the reason for the modification'),
|
||||||
]
|
]
|
||||||
db_backend.create_table('log', fields)
|
db_backend.create_table('log', fields)
|
||||||
|
|
||||||
if not db_backend.table_exists('state'):
|
# a database storing the current disable requests
|
||||||
|
if not db_backend.table_exists('disables'):
|
||||||
fields = [
|
fields = [
|
||||||
SqlTableField('disable_reason_id', SqlTableField.Type.FIELD_TYPE_INT, 'log.id of the disable action that led to this state'),
|
SqlTableField('disable_request_id', SqlTableField.Type.FIELD_TYPE_INT, 'log.id of the disable action that led to this state'),
|
||||||
SqlTableField('queue_name', SqlTableField.Type.FIELD_TYPE_STRING, 'the name of the queue that was modified'),
|
SqlTableField('queue_name', SqlTableField.Type.FIELD_TYPE_STRING, 'the name of the queue that was modified'),
|
||||||
]
|
]
|
||||||
db_backend.create_table('state', fields)
|
db_backend.create_table('disables', fields)
|
||||||
|
|
||||||
|
|
||||||
def create_db_backend() -> ISqlDatabaseBackend:
|
def create_db_backend() -> ISqlDatabaseBackend:
|
||||||
|
|
@ -131,19 +138,35 @@ def create_db_backend() -> ISqlDatabaseBackend:
|
||||||
return backend
|
return backend
|
||||||
|
|
||||||
|
|
||||||
class DisableReason:
|
class DisableRequest:
|
||||||
db_id: int
|
db_id: int # index of this disable request in the database
|
||||||
|
user_id: UserId # the id of the user who requested the disable action (unix user id)
|
||||||
|
host_fqdn: HostFqdn # the fully qualified domain name of the host on which the disable action was requested
|
||||||
|
queue_name: QueueMachineId # the name of the queue that was
|
||||||
reason: str
|
reason: str
|
||||||
requester_id: RequesterId
|
disable_tag: DisableTag
|
||||||
timestamp: datetime
|
timestamp: datetime
|
||||||
|
|
||||||
def __init__(self, log_id: int, queue_name: QueueMachineId, reason: str, requester_id: RequesterId, timestamp: datetime):
|
def __init__(self, log_id: int, user_id: UserId, host_fqdn: HostFqdn, queue_name: QueueMachineId, reason: str, disable_tag: DisableTag, timestamp: datetime):
|
||||||
self.log_id = log_id
|
self.log_id = log_id
|
||||||
|
self.user_id = user_id
|
||||||
|
self.host_fqdn = host_fqdn
|
||||||
self.queue_name = queue_name
|
self.queue_name = queue_name
|
||||||
self.reason = reason
|
self.reason = reason
|
||||||
self.requester_id = requester_id
|
self.disable_tag = disable_tag
|
||||||
self.timestamp = timestamp
|
self.timestamp = timestamp
|
||||||
|
|
||||||
|
def as_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"log_id": self.log_id,
|
||||||
|
"user_id": self.user_id,
|
||||||
|
"host_fqdn": self.host_fqdn,
|
||||||
|
"queue_name": self.queue_name,
|
||||||
|
"reason": self.reason,
|
||||||
|
"disable_tag": self.disable_tag,
|
||||||
|
"timestamp": self.timestamp.isoformat(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class QueueManager:
|
class QueueManager:
|
||||||
db_backend: ISqlDatabaseBackend
|
db_backend: ISqlDatabaseBackend
|
||||||
|
|
@ -153,72 +176,83 @@ class QueueManager:
|
||||||
self.db_backend = db_backend
|
self.db_backend = db_backend
|
||||||
self.grid_engine = grid_engine
|
self.grid_engine = grid_engine
|
||||||
|
|
||||||
def log_modification(self, queue_name: str, action: str, requester_id: RequesterId, reason: str) -> LogId:
|
def log_modification(self, queue_name: str, action: str, disable_tag: DisableTag, reason: str) -> LogId:
|
||||||
assert action in ["disable", "enable"], "Action must be either 'disable' or 'enable'"
|
assert action in ["disable", "enable"], "Action must be either 'disable' or 'enable'"
|
||||||
|
userid = subprocess.check_output(['whoami']).decode().strip()
|
||||||
|
host_fqdn = subprocess.check_output(['hostname', '-f']).decode().strip()
|
||||||
timestamp = datetime.now().isoformat()
|
timestamp = datetime.now().isoformat()
|
||||||
sql_query = f"INSERT INTO log (timestamp, queue_name, action, requester_id, reason) VALUES ('{timestamp}', '{queue_name}', '{action}', '{requester_id}', '{reason}');"
|
sql_query = f"INSERT INTO log (timestamp, user_id, host_fqdn, queue_name, action, disable_tag, reason) VALUES ('{timestamp}', '{userid}', '{host_fqdn}', '{queue_name}', '{action}', '{disable_tag}', '{reason}');"
|
||||||
self.db_backend.query(sql_query)
|
self.db_backend.query(sql_query)
|
||||||
# get the log id of the disable action that was just inserted
|
# get the log id of the disable action that was just inserted
|
||||||
log_id = self.db_backend.query("SELECT last_insert_rowid();")[0][0]
|
log_id = self.db_backend.query("SELECT last_insert_rowid();")[0][0]
|
||||||
return log_id
|
return log_id
|
||||||
|
|
||||||
def get_disable_reasons(self, queue_name: QueueMachineId) -> Dict[int, DisableReason]:
|
def get_disable_requests(self, queue_name: QueueMachineId) -> Dict[int, DisableRequest]:
|
||||||
sql_query = f"SELECT log.id, log.queue_name, log.reason, log.requester_id, log.timestamp FROM log JOIN state ON log.id = state.disable_reason_id WHERE state.queue_name = '{queue_name}' AND log.action = 'disable';"
|
sql_query = f"SELECT log.id, log.user_id, log.host_fqdn, log.queue_name, log.reason, log.disable_tag, log.timestamp FROM log JOIN disables ON log.id = disables.disable_request_id WHERE disables.queue_name = '{queue_name}' AND log.action = 'disable';"
|
||||||
results = self.db_backend.query(sql_query)
|
results = self.db_backend.query(sql_query)
|
||||||
|
disable_requests_as_dict = {}
|
||||||
for row in results:
|
for row in results:
|
||||||
assert row[1] == queue_name, "All results should be for the same queue"
|
log_id = row[0]
|
||||||
return {row[0]: DisableReason(log_id=row[0], queue_name=row[1], reason=row[2], requester_id=row[3], timestamp=datetime.fromisoformat(row[4])) for row in results}
|
user_id = row[1]
|
||||||
|
host_fqdn = row[2]
|
||||||
|
queue_machine = row[3]
|
||||||
|
assert queue_machine == queue_name, "All results should be for the same queue"
|
||||||
|
reason = row[4]
|
||||||
|
disable_tag = row[5]
|
||||||
|
timestamp = datetime.fromisoformat(row[6])
|
||||||
|
disable_requests_as_dict[row[0]] = DisableRequest(log_id=log_id, user_id=user_id, host_fqdn=host_fqdn, queue_name=queue_name, reason=reason, disable_tag=disable_tag, timestamp=timestamp)
|
||||||
|
return disable_requests_as_dict
|
||||||
|
|
||||||
def request_queue_deactivation(self, queue_name: QueueMachineId, requester_id: RequesterId, reason: str, perform_disable: bool = True):
|
def request_queue_deactivation(self, queue_name: QueueMachineId, disable_tag: DisableTag, reason: str, perform_disable: bool = True):
|
||||||
|
|
||||||
disable_reasons = self.get_disable_reasons(queue_name)
|
disable_requests = self.get_disable_requests(queue_name)
|
||||||
for dr in disable_reasons.values():
|
for dr in disable_requests.values():
|
||||||
assert dr.requester_id != requester_id, f"Requester {requester_id} has already requested deactivation of queue {queue_name} for reason '{dr.reason}' at {dr.timestamp.isoformat()}. Cannot request deactivation again without reactivating first."
|
assert dr.disable_tag != disable_tag, f"Disable tag {disable_tag} has already requested deactivation of queue {queue_name} for reason '{dr.reason}' at {dr.timestamp.isoformat()}. Cannot request deactivation again without reactivating first."
|
||||||
|
|
||||||
if perform_disable:
|
if perform_disable:
|
||||||
if len(disable_reasons) == 0:
|
if len(disable_requests) == 0:
|
||||||
# queue is currently active, we can disable it
|
# queue is currently active, we can disable it
|
||||||
self.grid_engine.disable_queue(queue_name)
|
self.grid_engine.disable_queue(queue_name)
|
||||||
|
|
||||||
disable_log_id = self.log_modification(queue_name, "disable", requester_id, reason)
|
disable_log_id = self.log_modification(queue_name, "disable", disable_tag, reason)
|
||||||
self.db_backend.query(f"INSERT INTO state (disable_reason_id, queue_name) VALUES ({disable_log_id}, '{queue_name}');")
|
self.db_backend.query(f"INSERT INTO disables (disable_request_id, queue_name) VALUES ({disable_log_id}, '{queue_name}');")
|
||||||
|
|
||||||
def request_queue_activation(self, queue_name: QueueMachineId, requester_id: RequesterId, reason: str, perform_enable: bool = True):
|
def request_queue_activation(self, queue_name: QueueMachineId, disable_tag: DisableTag, reason: str, perform_enable: bool = True):
|
||||||
disable_reasons = self.get_disable_reasons(queue_name)
|
disable_requests = self.get_disable_requests(queue_name)
|
||||||
dr_to_remove = None # the disable reason to remove
|
dr_to_remove = None # the disable reason to remove
|
||||||
for dr in disable_reasons.values():
|
for dr in disable_requests.values():
|
||||||
if dr.requester_id == requester_id:
|
if dr.disable_tag == disable_tag:
|
||||||
dr_to_remove = dr
|
dr_to_remove = dr
|
||||||
break
|
break
|
||||||
|
|
||||||
assert dr_to_remove is not None, f"Requester {requester_id} has not requested deactivation of queue {queue_name}. Cannot request activation without a prior deactivation."
|
assert dr_to_remove is not None, f"Disable tag {disable_tag} has not requested deactivation of queue {queue_name}. Cannot request activation without a prior deactivation."
|
||||||
|
|
||||||
if perform_enable:
|
if perform_enable:
|
||||||
if len(disable_reasons) == 1:
|
if len(disable_requests) == 1:
|
||||||
# queue is currently disabled and there is only one disable reason, we can enable it
|
# queue is currently disabled and there is only one disable reason, we can enable it
|
||||||
self.grid_engine.enable_queue(queue_name)
|
self.grid_engine.enable_queue(queue_name)
|
||||||
enable_log_id = self.log_modification(queue_name, "enable", requester_id, reason) # noqa: F841
|
enable_log_id = self.log_modification(queue_name, "enable", disable_tag, reason) # noqa: F841 pylint:disable=unused-variable
|
||||||
self.db_backend.query(f"DELETE FROM state WHERE disable_reason_id = {dr_to_remove.log_id} AND queue_name = '{queue_name}';")
|
self.db_backend.query(f"DELETE FROM disables WHERE disable_request_id = {dr_to_remove.log_id} AND queue_name = '{queue_name}';")
|
||||||
|
|
||||||
def synchronize_with_grid_engine(self):
|
def synchronize_with_grid_engine(self):
|
||||||
"""synchronizes the state of the queues in the database with the actual state of the queues in the grid engine by querying qstat."""
|
"""synchronizes the state of the queues in the database with the actual state of the queues in the grid engine by querying qstat."""
|
||||||
qs = self.grid_engine.get_status()
|
qs = self.grid_engine.get_status()
|
||||||
for queue_name, is_enabled in qs.is_enabled.items():
|
for queue_name, is_enabled in qs.is_enabled.items():
|
||||||
disable_reasons = self.get_disable_reasons(queue_name)
|
disable_requests = self.get_disable_requests(queue_name)
|
||||||
if not is_enabled and len(disable_reasons) == 0:
|
if not is_enabled and len(disable_requests) == 0:
|
||||||
# queue is disabled in the grid engine but there is no disable reason in the database, we add a disable reason with requester_id "unknown" and reason "synchronized with grid engine"
|
# queue is disabled in the grid engine but there is no disable reason in the database, we add a disable reason with disable_tag "unknown" and reason "synchronized with grid engine"
|
||||||
self.request_queue_deactivation(queue_name, "quman-sync", "synchronized with grid engine", perform_disable=False)
|
self.request_queue_deactivation(queue_name, "quman-sync", "synchronized with grid engine", perform_disable=False)
|
||||||
assert len(self.get_disable_reasons(queue_name)) > 0, f"After synchronization, there should be at least one disable reason for queue {queue_name} but there are none."
|
assert len(self.get_disable_requests(queue_name)) > 0, f"After synchronization, there should be at least one disable reason for queue {queue_name} but there are none."
|
||||||
elif is_enabled and len(disable_reasons) > 0:
|
elif is_enabled and len(disable_requests) > 0:
|
||||||
# queue is enabled in the grid engine but there are disable reasons in the database, we remove all disable reasons for this queue and requester_id "unknown" with reason "synchronized with grid engine"
|
# queue is enabled in the grid engine but there are disable reasons in the database, we remove all disable reasons for this queue and disable_tag "unknown" with reason "synchronized with grid engine"
|
||||||
for dr in disable_reasons.values():
|
for dr in disable_requests.values():
|
||||||
self.request_queue_activation(queue_name, dr.requester_id, "synchronized with grid engine", perform_enable=False)
|
self.request_queue_activation(queue_name, dr.disable_tag, "synchronized with grid engine", perform_enable=False)
|
||||||
assert len(self.get_disable_reasons(queue_name)) == 0, f"After synchronization, there should be no disable reasons for queue {queue_name} but there are still {len(self.get_disable_reasons(queue_name))} disable reasons."
|
assert len(self.get_disable_requests(queue_name)) == 0, f"After synchronization, there should be no disable reasons for queue {queue_name} but there are still {len(self.get_disable_requests(queue_name))} disable reasons."
|
||||||
|
|
||||||
def get_state_as_json(self) -> Dict[str, Any]:
|
def get_state_as_json(self) -> Dict[str, Any]:
|
||||||
"""returns the state of the queues as a json string."""
|
"""returns the state of the queues as a json string."""
|
||||||
# get the list of queue names from the state table in the database
|
# get the list of queue names from the disables table in the database
|
||||||
sql_query = "SELECT DISTINCT queue_name FROM state;"
|
sql_query = "SELECT DISTINCT queue_name FROM disables;"
|
||||||
results = self.db_backend.query(sql_query)
|
results = self.db_backend.query(sql_query)
|
||||||
for row in results:
|
for row in results:
|
||||||
assert len(row) == 1, "Each row should have only one column (queue_name)"
|
assert len(row) == 1, "Each row should have only one column (queue_name)"
|
||||||
|
|
@ -226,9 +260,9 @@ class QueueManager:
|
||||||
|
|
||||||
state = {}
|
state = {}
|
||||||
for queue_name in queue_names:
|
for queue_name in queue_names:
|
||||||
disable_reasons = self.get_disable_reasons(queue_name)
|
disable_requests = self.get_disable_requests(queue_name)
|
||||||
state[queue_name] = {
|
state[queue_name] = {
|
||||||
"disable_reasons": [{"reason": dr.reason, "requester_id": dr.requester_id, "timestamp": dr.timestamp.isoformat()} for dr in disable_reasons.values()]
|
"disable_requests": [dr.as_dict() for dr in disable_requests.values()]
|
||||||
}
|
}
|
||||||
return state
|
return state
|
||||||
|
|
||||||
|
|
@ -239,7 +273,7 @@ def main():
|
||||||
parser.add_argument("action", choices=["d", "e"], help="Action: d (deactivate) or e (activate)")
|
parser.add_argument("action", choices=["d", "e"], help="Action: d (deactivate) or e (activate)")
|
||||||
parser.add_argument("queue", help="Queue to modify (e.g., main.q@node42@univ-rennes.fr)")
|
parser.add_argument("queue", help="Queue to modify (e.g., main.q@node42@univ-rennes.fr)")
|
||||||
parser.add_argument("--reason", required=True, help="Reason for the deactivation/activation")
|
parser.add_argument("--reason", required=True, help="Reason for the deactivation/activation")
|
||||||
parser.add_argument("--requester", required=True, help="ID of the requester")
|
parser.add_argument("--disable-tag", required=True, help="tag for the disable request (e.g., auto.croconaus, manual.graffy, etc.)")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
db_backend = create_db_backend()
|
db_backend = create_db_backend()
|
||||||
|
|
@ -248,10 +282,9 @@ def main():
|
||||||
queue = args.queue
|
queue = args.queue
|
||||||
|
|
||||||
if args.action == "d":
|
if args.action == "d":
|
||||||
qmod.request_queue_deactivation(queue, args.requester, args.reason)
|
qmod.request_queue_deactivation(queue, args.disable_tag, args.reason)
|
||||||
elif args.action == "e":
|
elif args.action == "e":
|
||||||
qmod.request_queue_activation(queue, args.requester, args.reason)
|
qmod.request_queue_activation(queue, args.disable_tag, args.reason)
|
||||||
qmod.db_backend.dump(Path('./quman_test/log.sql'))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
__version__ = '1.0.12'
|
__version__ = '1.0.13'
|
||||||
|
|
||||||
|
|
||||||
class Version(object):
|
class Version(object):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue