the user of `iprbench-submit` can now choose the cluster he wants to use (previously it was hardcoded to the test cluster `DummyCluster`)

work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3958]
This commit is contained in:
Guillaume Raffy 2024-10-23 11:18:07 +02:00
parent fe4d66fb52
commit 6000e20d6b
2 changed files with 28 additions and 8 deletions

View File

@ -5,6 +5,7 @@ import argparse
from os import getenv, makedirs
from .core import IBenchmark, BenchmarkConfig, BenchmarkId
from .main import BenchmarkFactory
from .util import Singleton
import shutil
from pathlib import Path
import subprocess
@ -20,6 +21,7 @@ HostFqdn = str # eg 'physix90.ipr.univ-rennes1.fr'
GitCommitTag = str # commit number eg 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad'
HostGroupId = str # eg 'xeon_gold_6140'
CompilerId = str # eg 'gfortran'
ClusterId = str # eg 'alambix'
def substitute_tag_with_filecontents(input_file_path: Path, tag: str, contents_file: Path, output_file_path: Path):
@ -67,7 +69,7 @@ class ClusterNodeDb:
cluster_nodes_defs: List[ClusterNodeDef]
cpu_defs: Dict[str, int]
def __init__(self, cluster_id='alambix'):
def __init__(self, cluster_id: ClusterId = 'alambix'):
self.cluster_nodes_defs = []
include_multiqueue_nodes = False # at the moment hibench only works on nodes that have all their cores in the same queue
if cluster_id == 'alambix':
@ -179,8 +181,10 @@ class ClusterNodeDb:
class ICluster(abc.ABC):
cluster_db: ClusterNodeDb
cluster_id: ClusterId
def __init__(self, cluster_db: ClusterNodeDb):
def __init__(self, cluster_id: ClusterId, cluster_db: ClusterNodeDb):
self.cluster_id = cluster_id
self.cluster_db = cluster_db
@abc.abstractmethod
@ -199,8 +203,8 @@ class ICluster(abc.ABC):
class IprCluster(ICluster):
def __init__(self):
super().__init__(ClusterNodeDb('alambix'))
def __init__(self, cluster_id: ClusterId):
super().__init__(cluster_id, ClusterNodeDb(cluster_id))
def path_is_reachable_by_compute_nodes(self, path: Path):
path_is_reachable = False
@ -222,7 +226,8 @@ class IprCluster(ICluster):
class DummyCluster(ICluster):
def __init__(self):
super().__init__(ClusterNodeDb('dummy'))
cluster_id = 'dummy'
super().__init__(cluster_id, ClusterNodeDb(cluster_id))
def path_is_reachable_by_compute_nodes(self, path: Path):
return True
@ -233,6 +238,20 @@ class DummyCluster(ICluster):
subprocess.run(exec_path, check=True, cwd=working_dir)
class ClusterFactory(metaclass=Singleton):
def __init__(self):
pass
def create_cluster(self, cluster_id: ClusterId) -> ICluster:
cluster = {
'dummy': DummyCluster(),
'physix': IprCluster('physix'),
'alambix': IprCluster('alambix')
}[cluster_id]
return cluster
def duplicate_this_virtualenv_to(duplicate_virtualenv_path: Path):
this_virtualenv_path = Path(getenv('VIRTUAL_ENV')) # eg /home/graffy/work/starbench/iprbench.git/iprbench.venv
assert this_virtualenv_path.exists(), f'failed to find the root the virtual environment in use (VIRTUAL_ENV environment variable has the value {this_virtualenv_path})'
@ -343,6 +362,7 @@ def main():
'''
arg_parser = argparse.ArgumentParser(description='submits a benchmark on the compute cluster (assuming this is running from a sge cluster machine where qsub command is available)', epilog=example_text, formatter_class=argparse.RawDescriptionHelpFormatter)
arg_parser.add_argument('--cluster-id', type=ClusterId, required=True, help='the identifier of cluster on which to submit the benchmark eg (\'dummy\', \'alambix\', etc.)')
arg_parser.add_argument('--benchmark-id', type=BenchmarkId, required=True, help='the benchmark id of the benchmark to perform (eg mamul1)')
arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)')
arg_parser.add_argument('--config', type=str, default='cmake', help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}')
@ -350,7 +370,7 @@ def main():
arg_parser.add_argument('--cmake-path', type=str, default='cmake', help='the location of the cmake command to use (eg /opt/cmake/cmake-3.23.0/bin/cmake)')
args = arg_parser.parse_args()
benchmark_id = BenchmarkId(args.benchmark_id)
benchmark_id = ClusterId(args.benchmark_id)
benchmark = BenchmarkFactory().create_benchmark(benchmark_id)
results_dir = Path(args.results_dir)
@ -358,7 +378,7 @@ def main():
cmake_path = args.cmake_path
benchmark_config = json.loads(args.config)
cluster = DummyCluster()
cluster = ClusterFactory().create_cluster(args.cluster_id)
if not cluster.path_is_reachable_by_compute_nodes(results_dir):
raise ValueError('the results path is expected to be on a disk that is accessible to all cluster nodes, and it doesn\'t seem to be the case for {results_dir}')

View File

@ -13,7 +13,7 @@ class ClusterBenchTestCase(unittest.TestCase):
def test_clusterbench_submit(self):
logging.info('test_clusterbench_submit')
subprocess.run('pip list', shell=True, check=True, executable='/bin/bash')
command = 'clusterbench-submit --arch-regexp "intel_core.*" --benchmark-id \'mamul1\' --config \'{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10}\' --results-dir /tmp/mamul1_out'
command = 'clusterbench-submit --cluster-id \'dummy\' --arch-regexp "intel_core.*" --benchmark-id \'mamul1\' --config \'{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10}\' --results-dir /tmp/mamul1_out'
subprocess.run(command, shell=True, check=True, executable='/bin/bash')
# def test_clusterbench_hibench(self):