the user of `iprbench-submit` can now choose the cluster he wants to use (previously it was hardcoded to the test cluster `DummyCluster`)

work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3958]
2024-10-23 11:18:07 +02:00 · 2024-10-23 11:18:07 +02:00 · 6000e20d6b
parent fe4d66fb52
commit 6000e20d6b
2 changed files with 28 additions and 8 deletions
--- a/iprbench/clusterbench.py
+++ b/iprbench/clusterbench.py
@ -5,6 +5,7 @@ import argparse
 from os import getenv, makedirs
 from .core import IBenchmark, BenchmarkConfig, BenchmarkId
 from .main import BenchmarkFactory
+from .util import Singleton
 import shutil
 from pathlib import Path
 import subprocess
@ -20,6 +21,7 @@ HostFqdn = str  # eg 'physix90.ipr.univ-rennes1.fr'
 GitCommitTag = str  # commit number eg 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad'
 HostGroupId = str  # eg 'xeon_gold_6140'
 CompilerId = str  # eg 'gfortran'
+ClusterId = str  # eg 'alambix'


 def substitute_tag_with_filecontents(input_file_path: Path, tag: str, contents_file: Path, output_file_path: Path):
@ -67,7 +69,7 @@ class ClusterNodeDb:
    cluster_nodes_defs: List[ClusterNodeDef]
    cpu_defs: Dict[str, int]

-    def __init__(self, cluster_id='alambix'):
+    def __init__(self, cluster_id: ClusterId = 'alambix'):
        self.cluster_nodes_defs = []
        include_multiqueue_nodes = False  # at the moment hibench only works on nodes that have all their cores in the same queue
        if cluster_id == 'alambix':
@ -179,8 +181,10 @@ class ClusterNodeDb:

 class ICluster(abc.ABC):
    cluster_db: ClusterNodeDb
+    cluster_id: ClusterId

-    def __init__(self, cluster_db: ClusterNodeDb):
+    def __init__(self, cluster_id: ClusterId, cluster_db: ClusterNodeDb):
+        self.cluster_id = cluster_id
        self.cluster_db = cluster_db

    @abc.abstractmethod
@ -199,8 +203,8 @@ class ICluster(abc.ABC):

 class IprCluster(ICluster):

-    def __init__(self):
-        super().__init__(ClusterNodeDb('alambix'))
+    def __init__(self, cluster_id: ClusterId):
+        super().__init__(cluster_id, ClusterNodeDb(cluster_id))

    def path_is_reachable_by_compute_nodes(self, path: Path):
        path_is_reachable = False
@ -222,7 +226,8 @@ class IprCluster(ICluster):
 class DummyCluster(ICluster):

    def __init__(self):
-        super().__init__(ClusterNodeDb('dummy'))
+        cluster_id = 'dummy'
+        super().__init__(cluster_id, ClusterNodeDb(cluster_id))

    def path_is_reachable_by_compute_nodes(self, path: Path):
        return True
@ -233,6 +238,20 @@ class DummyCluster(ICluster):
        subprocess.run(exec_path, check=True, cwd=working_dir)


+class ClusterFactory(metaclass=Singleton):
+
+    def __init__(self):
+        pass
+
+    def create_cluster(self, cluster_id: ClusterId) -> ICluster:
+        cluster = {
+            'dummy': DummyCluster(),
+            'physix': IprCluster('physix'),
+            'alambix': IprCluster('alambix')
+        }[cluster_id]
+        return cluster
+
+
 def duplicate_this_virtualenv_to(duplicate_virtualenv_path: Path):
    this_virtualenv_path = Path(getenv('VIRTUAL_ENV'))  # eg /home/graffy/work/starbench/iprbench.git/iprbench.venv
    assert this_virtualenv_path.exists(), f'failed to find the root the virtual environment in use (VIRTUAL_ENV environment variable has the value {this_virtualenv_path})'
@ -343,6 +362,7 @@ def main():
    '''

    arg_parser = argparse.ArgumentParser(description='submits a benchmark on the compute cluster (assuming this is running from a sge cluster machine where qsub command is available)', epilog=example_text, formatter_class=argparse.RawDescriptionHelpFormatter)
+    arg_parser.add_argument('--cluster-id', type=ClusterId, required=True, help='the identifier of cluster on which to submit the benchmark eg (\'dummy\', \'alambix\', etc.)')
    arg_parser.add_argument('--benchmark-id', type=BenchmarkId, required=True, help='the benchmark id of the benchmark to perform (eg mamul1)')
    arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)')
    arg_parser.add_argument('--config', type=str, default='cmake', help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}')
@ -350,7 +370,7 @@ def main():
    arg_parser.add_argument('--cmake-path', type=str, default='cmake', help='the location of the cmake command to use (eg /opt/cmake/cmake-3.23.0/bin/cmake)')

    args = arg_parser.parse_args()
-    benchmark_id = BenchmarkId(args.benchmark_id)
+    benchmark_id = ClusterId(args.benchmark_id)
    benchmark = BenchmarkFactory().create_benchmark(benchmark_id)

    results_dir = Path(args.results_dir)
@ -358,7 +378,7 @@ def main():
    cmake_path = args.cmake_path
    benchmark_config = json.loads(args.config)

-    cluster = DummyCluster()
+    cluster = ClusterFactory().create_cluster(args.cluster_id)

    if not cluster.path_is_reachable_by_compute_nodes(results_dir):
        raise ValueError('the results path is expected to be on a disk that is accessible to all cluster nodes, and it doesn\'t seem to be the case for {results_dir}')
--- a/test/test_clusterbench.py
+++ b/test/test_clusterbench.py
@ -13,7 +13,7 @@ class ClusterBenchTestCase(unittest.TestCase):
    def test_clusterbench_submit(self):
        logging.info('test_clusterbench_submit')
        subprocess.run('pip list', shell=True, check=True, executable='/bin/bash')
-        command = 'clusterbench-submit --arch-regexp "intel_core.*"  --benchmark-id \'mamul1\' --config \'{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10}\' --results-dir /tmp/mamul1_out'
+        command = 'clusterbench-submit --cluster-id \'dummy\' --arch-regexp "intel_core.*" --benchmark-id \'mamul1\' --config \'{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10}\' --results-dir /tmp/mamul1_out'
        subprocess.run(command, shell=True, check=True, executable='/bin/bash')

    # def test_clusterbench_hibench(self):