cocluto v1.0.15 - added user friedly error message when qmod is not available

work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3093]
This commit is contained in:
Guillaume Raffy 2026-04-03 14:20:37 +02:00
parent 4cc541d9c3
commit 25afd32504
2 changed files with 33 additions and 17 deletions

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python3
from abc import ABC, abstractmethod
import sys
from typing import List, Dict, Any, Union
import logging
import subprocess
@ -81,6 +82,12 @@ class Sge(IGridEngine):
def __init__(self, dry_run: bool = False):
self.dry_run = dry_run
if not self.dry_run:
# check that qmod command is available
try:
subprocess.run(["qmod", "-h"], check=True, capture_output=True)
except FileNotFoundError:
raise RuntimeError("qmod command not found. Please make sure that the grid engine client is installed and qmod command is available in the PATH.")
def run_qmod(self, args):
"""runs qmod with the given arguments."""
@ -88,7 +95,10 @@ class Sge(IGridEngine):
if self.dry_run:
print(f"Dry run: {' '.join(cmd)}")
else:
subprocess.run(cmd, check=True)
try:
subprocess.run(cmd, check=True)
except subprocess.CalledProcessError as e:
raise RuntimeError(f"qmod command failed: {e}")
def disable_queue_machine(self, queue_machine: QueueMachineId):
self.run_qmod(["-d", queue_machine])
@ -188,8 +198,10 @@ class QueueManager:
db_backend: ISqlDatabaseBackend
grid_engine: IGridEngine
def __init__(self, db_backend: ISqlDatabaseBackend, grid_engine: IGridEngine = Sge()):
def __init__(self, db_backend: ISqlDatabaseBackend, grid_engine: IGridEngine = None):
self.db_backend = db_backend
if grid_engine is None:
grid_engine = Sge()
self.grid_engine = grid_engine
def log_modification(self, queue_machines: List[QueueMachineId], action: str, disable_tag: DisableTag, reason: str) -> LogId:
@ -318,23 +330,27 @@ class QueueManager:
def main():
parser = argparse.ArgumentParser(description="qmod wrapper to manage queue states with a counter and logging.", epilog="Example usage: quman d main.q --disable-tag admin.graffy.bug4242 --reason 'preparing cluster to shutdown for power shortage, see bug 4242'")
parser.add_argument("action", choices=["d", "e"], help="Action: d (deactivate) or e (activate)")
parser.add_argument("queue", help="Queue to modify (e.g., main.q@node42@univ-rennes.fr, main.q, etc.)")
parser.add_argument("--reason", required=True, help="Reason for the deactivation/activation")
parser.add_argument("--disable-tag", required=True, help="tag for the disable request (e.g., auto.croconaus, manual.graffy, etc.)")
args = parser.parse_args()
try:
parser = argparse.ArgumentParser(description="qmod wrapper to manage queue states with a counter and logging.", epilog="Example usage: quman d main.q --disable-tag admin.graffy.bug4242 --reason 'preparing cluster to shutdown for power shortage, see bug 4242'")
parser.add_argument("action", choices=["d", "e"], help="Action: d (deactivate) or e (activate)")
parser.add_argument("queue", help="Queue to modify (e.g., main.q@node42@univ-rennes.fr, main.q, etc.)")
parser.add_argument("--reason", required=True, help="Reason for the deactivation/activation")
parser.add_argument("--disable-tag", required=True, help="tag for the disable request (e.g., auto.croconaus, manual.graffy, etc.)")
args = parser.parse_args()
db_backend = create_db_backend()
quman = QueueManager(db_backend, Sge(dry_run=False)) # set dry_run to False to actually run qmod commands
db_backend = create_db_backend()
quman = QueueManager(db_backend, Sge(dry_run=False)) # set dry_run to False to actually run qmod commands
quman.synchronize_with_grid_engine()
queue_machines = quman.get_queue_machines(args.queue)
quman.synchronize_with_grid_engine()
queue_machines = quman.get_queue_machines(args.queue)
if args.action == "d":
quman.request_queue_machines_deactivation(queue_machines, args.disable_tag, args.reason)
elif args.action == "e":
quman.request_queue_machines_activation(queue_machines, args.disable_tag, args.reason)
if args.action == "d":
quman.request_queue_machines_deactivation(queue_machines, args.disable_tag, args.reason)
elif args.action == "e":
quman.request_queue_machines_activation(queue_machines, args.disable_tag, args.reason)
except RuntimeError as e:
sys.stderr.write(f"An error occurred: {e}\n")
exit(1)
if __name__ == "__main__":

View File

@ -1,4 +1,4 @@
__version__ = '1.0.14'
__version__ = '1.0.15'
class Version(object):