commit e4a7f22f4a6cfd3341bd3b67ea3b44996a34be0f Author: Guillaume Raffy Date: Fri Mar 29 08:05:14 2024 +0100 managed to get a functional toml project diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..671ff22 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +quman.venv +quman.egg-info +quman.git.code-workspace +build/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..aa92c33 --- /dev/null +++ b/README.md @@ -0,0 +1,27 @@ +# queue manager + +compute cluster queue manager: a wrapper for [Son of Grid Engine](https://wiki.archlinux.org/title/Son_of_Grid_Engine)'s `qmod ` application + +The goals of this tool are: +- to allow the job scheduler manager to provide a reason that explains the reason when disabling the queue +- to provide a reference counting mechanism that allows to disable the same queue for multiple reasons. The queue becomes enabled only when all disabling reasons have been removed. For example, a queue can be disabled because both following reasons simultaneously: + 1. there is an undergoing automatic update + 2. the sys admin decides to disable the queue to change a faulty ram + then when the automatic update system completes, it would ask for a queue enabling, but because the queue is still disabled for reason 2, the queue is not actually enabled +- to provide an agnostic abstraction layer able to interface any job manager (sge, slurm, etc.) + +As a result, this tool can be used to provide information regrading the reason why a queue is disabled, but it also helps the sys admin to remember why a queue was disabled. + + +## example + +```sh +bob@bobland~> quman --get-disable-reasons main.q@alambix42.ipr.univ-rennes.fr +bob@bobland~> quman --disable-queue main.q@alambix42.ipr.univ-rennes.fr --message 'requires maintenance for ram replacement' +maco@alambix42~> quman --disable-queue main.q@alambix42.ipr.univ-rennes.fr --message 'requires a security update' +bob@bobland~> quman --get-disable-reasons main.q@alambix42.ipr.univ-rennes.fr +2024-03-13 17:54:18 bob@bobland requires maintenance for ram replacement +2024-03-14 08:42:23 maco@alambix42 requires a security update +``` + + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..76e774f --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,28 @@ +[build-system] +requires = ["setuptools"] +build-backup = "setuptools.build_meta" + +[project] +name = "quman" +dynamic = ["version"] # the list of fields whose values are dicovered by the backend (eg __version__) +description = "compute cluster queue manager" +readme = "README.md" +keywords = ["son of grid engine", "slurm", "sge"] +license = {text = "MIT License"} +dependencies = [ +# "cocluto >= 1.2" + "cocluto@git+https://git.ipr.univ-rennes.fr/cellinfo/cocluto" +] +requires-python = ">= 3.8" +authors = [ + {name = "Guillaume Raffy", email = "guillaume.raffy@univ-rennes.fr"} +] + +[project.scripts] +quman = "quman.main:main" + +[project.urls] +Repository = "https://git.ipr.univ-rennes.fr/cellinfo/quman" + +[tool.setuptools.dynamic] +version = {attr = "quman.main.__version__"} diff --git a/samples/sample.sql b/samples/sample.sql new file mode 100644 index 0000000..981eb1c --- /dev/null +++ b/samples/sample.sql @@ -0,0 +1,26 @@ +SET SQL_MODE="NO_AUTO_VALUE_ON_ZERO"; + +-- CREATE TABLE IF NOT EXISTS `queue_disables` ( +-- `disable_id` integer NOT NULL, +-- `queue_id` varchar(255) NOT NULL, +-- PRIMARY KEY (`disable_id`) +-- ) ENGINE=MyISAM DEFAULT CHARSET=latin1; + +CREATE TABLE IF NOT EXISTS `queue_disables` ( + `disable_id` integer NOT NULL, + `queue_id` varchar(255) NOT NULL, + `disable_start_time` text NOT NULL, -- date time encoded as ISO8601 strings ("YYYY-MM-DD HH:MM:SS.SSS") + `disable_end_time` text NOT NULL, -- date time encoded as ISO8601 strings ("YYYY-MM-DD HH:MM:SS.SSS") + `operator_id` varchar(128) NOT NULL, -- sesame login of the sys admin that operated the dimm swap + `operator_host_fqdn` varchar(128) NOT NULL, -- machine on whach the queue disable operation was performed + `operator_role` varchar(128) NOT NULL, -- eg maco + `ticket_url` varchar(255) NOT NULL, -- the ticket to which this operation is related eg https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3752 + `comment` varchar(255) NOT NULL, -- + PRIMARY KEY (`disable_id`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1; + +INSERT INTO `queue_disables` (`disable_id`, `queue_id`, `disable_start_time`, `disable_end_time`, `operator_id`, `operator_host_fqdn`, `operator_role`, `ticket_url`, `comment`) VALUES +(1, 'main.q@alambix42.ipr.univ-rennes.fr', '2023-03-14 19:30:00.000-05:00', '2023-10-06 19:30:00.000-05:00', 'graffy', 'graffy-ws2.spm.univ-rennes1.fr', 'sys-admin', 'https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3213', 'disabled to change dimmm a5'), +(2, 'main.q@alambix42.ipr.univ-rennes.fr', '2024-02-14 19:30:00.000-05:00', '', 'root', 'alambix42.ipr.univ-rennes1.fr', 'maco', 'https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3213', 'security update'), +(3, 'main.q', '2024-02-28 19:30:00.000-05:00', '', 'graffy', 'alambix.ipr.univ-rennes1.fr', 'sys-admin', '', 'cluster shutdown'); + diff --git a/src/quman/__init__.py b/src/quman/__init__.py new file mode 100644 index 0000000..a451d35 --- /dev/null +++ b/src/quman/__init__.py @@ -0,0 +1,6 @@ +# from .main import __version__, main + +# __all__ = [ +# '__version__', +# 'main' +# ] diff --git a/src/quman/main.py b/src/quman/main.py new file mode 100644 index 0000000..2461db6 --- /dev/null +++ b/src/quman/main.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +__version__ = '1.0.1' +import argparse +from pathlib import Path +import logging +from textwrap import dedent +from cocluto.SimpaDbUtil import SqlFile, SqlDatabaseReader + + +QueueId = str + + +class QueueManager(): + db: SqlDatabaseReader + + def __init__(self, quman_db: SqlDatabaseReader): + self.db = quman_db + + def show_disable_reasons(self, requested_queue_id: QueueId): + print(requested_queue_id) + rows = self.db.query(f'SELECT * FROM queue_disables WHERE queue_id = "{requested_queue_id}" and disable_end_time = ""') + + for row in rows: + (disable_id, _queue_id, disable_start_time, _disable_end_time, operator_id, operator_host_fqdn, operator_role, _ticket_url, comment) = row + print(f'{disable_id}: disabled by {operator_id}@{operator_host_fqdn} (role:{operator_role}) on {disable_start_time} because {comment}') + + raise NotImplementedError() + + def set_queue_activation(self, queue_id: QueueId, _activation: bool): + print(queue_id) + raise NotImplementedError() + + +def main(): + + logging.basicConfig(level=logging.DEBUG) # , format='%(asctime)s - %(levelname)s - %(message)s') + parser = argparse.ArgumentParser(description='compute cluster queue manager: a wrapper for [Son of Grid Engine](https://wiki.archlinux.org/title/Son_of_Grid_Engine)\'s `qmod ` application', formatter_class=argparse.RawDescriptionHelpFormatter) + + subparsers = parser.add_subparsers(dest='cmd', required=True, description='action to perform') + + parser.add_argument('--version', action='version', help=f'shows {parser.prog}\'s version', version=f'{parser.prog} version {__version__}') + epilog = dedent(f'''\ + examples: + {parser.prog} show-disable-reasons --queue main.q@alambix42 + + ''') + parser.epilog = epilog + + # show-disable-reasons subcommand + show_disable_reasons_parser = subparsers.add_parser('show-disable-reasons', description='prints the reasons causing the specified queue to be disabled') + show_disable_reasons_parser.add_argument('--queue', type=str, required=True, help='the queue that is checked') + + quman_db_file = SqlFile(Path('./samples/sample.sql')) + quman_db = SqlDatabaseReader(quman_db_file) + quman = QueueManager(quman_db) + + args = parser.parse_args() + if args.cmd == 'show-disable-reasons': + quman.show_disable_reasons(args.queue) + elif args.cmd == 'disable-queue': + quman.set_queue_activation(args.queue, _activation=False) +