the user of `iprbench-submit` can now choose the cluster he wants to use (previously it was hardcoded to the test cluster `DummyCluster`)

work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3958]
This commit is contained in:
Guillaume Raffy 2024-10-23 11:18:07 +02:00
parent fe4d66fb52
commit 6000e20d6b
2 changed files with 28 additions and 8 deletions

View File

@ -5,6 +5,7 @@ import argparse
from os import getenv, makedirs from os import getenv, makedirs
from .core import IBenchmark, BenchmarkConfig, BenchmarkId from .core import IBenchmark, BenchmarkConfig, BenchmarkId
from .main import BenchmarkFactory from .main import BenchmarkFactory
from .util import Singleton
import shutil import shutil
from pathlib import Path from pathlib import Path
import subprocess import subprocess
@ -20,6 +21,7 @@ HostFqdn = str # eg 'physix90.ipr.univ-rennes1.fr'
GitCommitTag = str # commit number eg 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad' GitCommitTag = str # commit number eg 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad'
HostGroupId = str # eg 'xeon_gold_6140' HostGroupId = str # eg 'xeon_gold_6140'
CompilerId = str # eg 'gfortran' CompilerId = str # eg 'gfortran'
ClusterId = str # eg 'alambix'
def substitute_tag_with_filecontents(input_file_path: Path, tag: str, contents_file: Path, output_file_path: Path): def substitute_tag_with_filecontents(input_file_path: Path, tag: str, contents_file: Path, output_file_path: Path):
@ -67,7 +69,7 @@ class ClusterNodeDb:
cluster_nodes_defs: List[ClusterNodeDef] cluster_nodes_defs: List[ClusterNodeDef]
cpu_defs: Dict[str, int] cpu_defs: Dict[str, int]
def __init__(self, cluster_id='alambix'): def __init__(self, cluster_id: ClusterId = 'alambix'):
self.cluster_nodes_defs = [] self.cluster_nodes_defs = []
include_multiqueue_nodes = False # at the moment hibench only works on nodes that have all their cores in the same queue include_multiqueue_nodes = False # at the moment hibench only works on nodes that have all their cores in the same queue
if cluster_id == 'alambix': if cluster_id == 'alambix':
@ -179,8 +181,10 @@ class ClusterNodeDb:
class ICluster(abc.ABC): class ICluster(abc.ABC):
cluster_db: ClusterNodeDb cluster_db: ClusterNodeDb
cluster_id: ClusterId
def __init__(self, cluster_db: ClusterNodeDb): def __init__(self, cluster_id: ClusterId, cluster_db: ClusterNodeDb):
self.cluster_id = cluster_id
self.cluster_db = cluster_db self.cluster_db = cluster_db
@abc.abstractmethod @abc.abstractmethod
@ -199,8 +203,8 @@ class ICluster(abc.ABC):
class IprCluster(ICluster): class IprCluster(ICluster):
def __init__(self): def __init__(self, cluster_id: ClusterId):
super().__init__(ClusterNodeDb('alambix')) super().__init__(cluster_id, ClusterNodeDb(cluster_id))
def path_is_reachable_by_compute_nodes(self, path: Path): def path_is_reachable_by_compute_nodes(self, path: Path):
path_is_reachable = False path_is_reachable = False
@ -222,7 +226,8 @@ class IprCluster(ICluster):
class DummyCluster(ICluster): class DummyCluster(ICluster):
def __init__(self): def __init__(self):
super().__init__(ClusterNodeDb('dummy')) cluster_id = 'dummy'
super().__init__(cluster_id, ClusterNodeDb(cluster_id))
def path_is_reachable_by_compute_nodes(self, path: Path): def path_is_reachable_by_compute_nodes(self, path: Path):
return True return True
@ -233,6 +238,20 @@ class DummyCluster(ICluster):
subprocess.run(exec_path, check=True, cwd=working_dir) subprocess.run(exec_path, check=True, cwd=working_dir)
class ClusterFactory(metaclass=Singleton):
def __init__(self):
pass
def create_cluster(self, cluster_id: ClusterId) -> ICluster:
cluster = {
'dummy': DummyCluster(),
'physix': IprCluster('physix'),
'alambix': IprCluster('alambix')
}[cluster_id]
return cluster
def duplicate_this_virtualenv_to(duplicate_virtualenv_path: Path): def duplicate_this_virtualenv_to(duplicate_virtualenv_path: Path):
this_virtualenv_path = Path(getenv('VIRTUAL_ENV')) # eg /home/graffy/work/starbench/iprbench.git/iprbench.venv this_virtualenv_path = Path(getenv('VIRTUAL_ENV')) # eg /home/graffy/work/starbench/iprbench.git/iprbench.venv
assert this_virtualenv_path.exists(), f'failed to find the root the virtual environment in use (VIRTUAL_ENV environment variable has the value {this_virtualenv_path})' assert this_virtualenv_path.exists(), f'failed to find the root the virtual environment in use (VIRTUAL_ENV environment variable has the value {this_virtualenv_path})'
@ -343,6 +362,7 @@ def main():
''' '''
arg_parser = argparse.ArgumentParser(description='submits a benchmark on the compute cluster (assuming this is running from a sge cluster machine where qsub command is available)', epilog=example_text, formatter_class=argparse.RawDescriptionHelpFormatter) arg_parser = argparse.ArgumentParser(description='submits a benchmark on the compute cluster (assuming this is running from a sge cluster machine where qsub command is available)', epilog=example_text, formatter_class=argparse.RawDescriptionHelpFormatter)
arg_parser.add_argument('--cluster-id', type=ClusterId, required=True, help='the identifier of cluster on which to submit the benchmark eg (\'dummy\', \'alambix\', etc.)')
arg_parser.add_argument('--benchmark-id', type=BenchmarkId, required=True, help='the benchmark id of the benchmark to perform (eg mamul1)') arg_parser.add_argument('--benchmark-id', type=BenchmarkId, required=True, help='the benchmark id of the benchmark to perform (eg mamul1)')
arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)') arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)')
arg_parser.add_argument('--config', type=str, default='cmake', help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}') arg_parser.add_argument('--config', type=str, default='cmake', help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}')
@ -350,7 +370,7 @@ def main():
arg_parser.add_argument('--cmake-path', type=str, default='cmake', help='the location of the cmake command to use (eg /opt/cmake/cmake-3.23.0/bin/cmake)') arg_parser.add_argument('--cmake-path', type=str, default='cmake', help='the location of the cmake command to use (eg /opt/cmake/cmake-3.23.0/bin/cmake)')
args = arg_parser.parse_args() args = arg_parser.parse_args()
benchmark_id = BenchmarkId(args.benchmark_id) benchmark_id = ClusterId(args.benchmark_id)
benchmark = BenchmarkFactory().create_benchmark(benchmark_id) benchmark = BenchmarkFactory().create_benchmark(benchmark_id)
results_dir = Path(args.results_dir) results_dir = Path(args.results_dir)
@ -358,7 +378,7 @@ def main():
cmake_path = args.cmake_path cmake_path = args.cmake_path
benchmark_config = json.loads(args.config) benchmark_config = json.loads(args.config)
cluster = DummyCluster() cluster = ClusterFactory().create_cluster(args.cluster_id)
if not cluster.path_is_reachable_by_compute_nodes(results_dir): if not cluster.path_is_reachable_by_compute_nodes(results_dir):
raise ValueError('the results path is expected to be on a disk that is accessible to all cluster nodes, and it doesn\'t seem to be the case for {results_dir}') raise ValueError('the results path is expected to be on a disk that is accessible to all cluster nodes, and it doesn\'t seem to be the case for {results_dir}')

View File

@ -13,7 +13,7 @@ class ClusterBenchTestCase(unittest.TestCase):
def test_clusterbench_submit(self): def test_clusterbench_submit(self):
logging.info('test_clusterbench_submit') logging.info('test_clusterbench_submit')
subprocess.run('pip list', shell=True, check=True, executable='/bin/bash') subprocess.run('pip list', shell=True, check=True, executable='/bin/bash')
command = 'clusterbench-submit --arch-regexp "intel_core.*" --benchmark-id \'mamul1\' --config \'{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10}\' --results-dir /tmp/mamul1_out' command = 'clusterbench-submit --cluster-id \'dummy\' --arch-regexp "intel_core.*" --benchmark-id \'mamul1\' --config \'{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10}\' --results-dir /tmp/mamul1_out'
subprocess.run(command, shell=True, check=True, executable='/bin/bash') subprocess.run(command, shell=True, check=True, executable='/bin/bash')
# def test_clusterbench_hibench(self): # def test_clusterbench_hibench(self):