-  the user can now choose the host type id. This mechanism will allow the benchmarks to be run on ipr cluster nodes, taking advantage of the specific use of environment modules to discover and activate packages.
- note: at the moment, the implementation of host type fr.univ-rennes.ipr.cluster-node is not yet finished

work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3958]
This commit is contained in:
Guillaume Raffy 2024-11-25 14:52:08 +01:00
parent 026c5f6100
commit 4ca2e02381
9 changed files with 64 additions and 69 deletions

View File

@ -3,9 +3,6 @@
from typing import List, Tuple, Dict from typing import List, Tuple, Dict
import argparse import argparse
from os import getenv, makedirs from os import getenv, makedirs
from .core import IBenchmark, BenchmarkConfig, BenchmarkId, ResultsDbParams, BenchParam
from .main import BenchmarkFactory
from .util import Singleton
import shutil import shutil
from pathlib import Path from pathlib import Path
import subprocess import subprocess
@ -15,6 +12,9 @@ import importlib.resources
import venv import venv
import json import json
import abc import abc
from .core import IBenchmark, BenchmarkConfig, BenchmarkId, ResultsDbParams, BenchParam, HostTypeId
from .main import BenchmarkFactory
from .util import Singleton
HostFqdn = str # eg 'physix90.ipr.univ-rennes1.fr' HostFqdn = str # eg 'physix90.ipr.univ-rennes1.fr'
@ -271,7 +271,7 @@ def archive_this_virtualenv_to(venv_archive_path: Path, venv_hardcoded_path: Pat
subprocess.run(f'tar czvf {venv_archive_path} {venv_hardcoded_path.relative_to(venv_hardcoded_path.parent)}', shell=True, check=True, cwd=venv_hardcoded_path.parent, stdout=subprocess.DEVNULL) subprocess.run(f'tar czvf {venv_archive_path} {venv_hardcoded_path.relative_to(venv_hardcoded_path.parent)}', shell=True, check=True, cwd=venv_hardcoded_path.parent, stdout=subprocess.DEVNULL)
def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, host_group_id: HostGroupId, results_dir: Path, cluster: ICluster, resultsdb_params: ResultsDbParams): def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, host_group_id: HostGroupId, results_dir: Path, cluster: ICluster, resultsdb_params: ResultsDbParams, target_system_type_id: HostTypeId):
compiler_id: CompilerId = benchmark_config['fortran_compiler'] compiler_id: CompilerId = benchmark_config['fortran_compiler']
@ -315,6 +315,7 @@ def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: Benchmark
'<results_dir>': str(results_dir), '<results_dir>': str(results_dir),
'<resultsdb_params>': json.dumps(resultsdb_params).replace('"', r'\"'), '<resultsdb_params>': json.dumps(resultsdb_params).replace('"', r'\"'),
'<num_cores>': str(num_cores), '<num_cores>': str(num_cores),
'<target_system_type_id>': str(target_system_type_id),
} }
logging.debug('tags_dict = %s', str(tags_dict)) logging.debug('tags_dict = %s', str(tags_dict))
with importlib.resources.path('iprbench.resources', 'clusterbench-template.job') as job_template_path: with importlib.resources.path('iprbench.resources', 'clusterbench-template.job') as job_template_path:
@ -343,7 +344,7 @@ def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: Benchmark
cluster.submit_job(qsub_args, exec_path, exec_args, this_bench_dir) cluster.submit_job(qsub_args, exec_path, exec_args, this_bench_dir)
def launch_perf_jobs(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, results_dir: Path, cluster: ICluster, arch_regexp: str, resultsdb_params: ResultsDbParams): def launch_perf_jobs(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, results_dir: Path, cluster: ICluster, arch_regexp: str, resultsdb_params: ResultsDbParams, target_system_type_id: HostTypeId):
""" """
results_dir: where the results of the benchmark are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench) results_dir: where the results of the benchmark are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)
""" """
@ -356,7 +357,7 @@ def launch_perf_jobs(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, r
logging.info('requested host groups: %s', host_groups) logging.info('requested host groups: %s', host_groups)
for host_group in host_groups: for host_group in host_groups:
launch_job_for_host_group(benchmark, benchmark_config, host_group, results_dir, cluster, resultsdb_params) launch_job_for_host_group(benchmark, benchmark_config, host_group, results_dir, cluster, resultsdb_params, target_system_type_id)
def main(): def main():
@ -374,6 +375,7 @@ def main():
arg_parser.add_argument('--config', type=str, default='cmake', help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}') arg_parser.add_argument('--config', type=str, default='cmake', help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}')
arg_parser.add_argument('--arch-regexp', type=str, default='.*', help='the regular expression for the architectures the benchmark is allowed to run on (eg "intel_xeon_.*"). By defauls, all available architectures are allowed.') arg_parser.add_argument('--arch-regexp', type=str, default='.*', help='the regular expression for the architectures the benchmark is allowed to run on (eg "intel_xeon_.*"). By defauls, all available architectures are allowed.')
arg_parser.add_argument('--resultsdb-params', type=str, required=True, help='the resultsdb configuration in json format, eg {"type": "tsv-files", "tsv_results_dir": "/tmp/toto"}') arg_parser.add_argument('--resultsdb-params', type=str, required=True, help='the resultsdb configuration in json format, eg {"type": "tsv-files", "tsv_results_dir": "/tmp/toto"}')
arg_parser.add_argument('--target-system-type-id', type=str, required=True, help='id of the operating system type to use. This is used to get the list installed packages, how to activate them, etc, eg "debian", "fr.univ-rennes.ipr.cluster-node".')
args = arg_parser.parse_args() args = arg_parser.parse_args()
benchmark_id = ClusterId(args.benchmark_id) benchmark_id = ClusterId(args.benchmark_id)
@ -391,7 +393,9 @@ def main():
benchmark = BenchmarkFactory().create_benchmark(benchmark_id, common_params) benchmark = BenchmarkFactory().create_benchmark(benchmark_id, common_params)
target_system_type_id = HostTypeId(args.target_system_type_id)
if not cluster.path_is_reachable_by_compute_nodes(results_dir): if not cluster.path_is_reachable_by_compute_nodes(results_dir):
raise ValueError('the results path is expected to be on a disk that is accessible to all cluster nodes, and it doesn\'t seem to be the case for {results_dir}') raise ValueError('the results path is expected to be on a disk that is accessible to all cluster nodes, and it doesn\'t seem to be the case for {results_dir}')
launch_perf_jobs(benchmark, benchmark_config, results_dir, cluster, arch_regexp, resultsdb_params) launch_perf_jobs(benchmark, benchmark_config, results_dir, cluster, arch_regexp, resultsdb_params, target_system_type_id)

View File

@ -10,10 +10,16 @@ import re
PackageVersion = str # a version string, such as 4.9.3 PackageVersion = str # a version string, such as 4.9.3
PackageId = str # a generic identifier of a package (eg libopenblas-pthread) PackageId = str # a generic identifier of a package (eg libopenblas-pthread)
HostTypeId = str # uniquely identifies a ITargetHost instance eg fr.univ-rennes.ipr.cluster-node
class ITargetHost(abc.ABC): class ITargetHost(abc.ABC):
"""the host that runs the benchmark""" """the host that runs the benchmark"""
@abc.abstractmethod
def get_host_type_id(self) -> HostTypeId:
"""returns the unique identifier of tyis host type"""
@abc.abstractmethod @abc.abstractmethod
def get_package_default_version(self, package_id: PackageId) -> PackageVersion: def get_package_default_version(self, package_id: PackageId) -> PackageVersion:
"""returns the latest installed version of the given package (eg '2021.1.2' for 'ifort')""" """returns the latest installed version of the given package (eg '2021.1.2' for 'ifort')"""

View File

@ -1,17 +1,16 @@
from typing import List from typing import List
from .core import BenchmarkId, IBenchmark, ResultsDbFactory, BenchParam
from .targethosts import GraffyWs2
from .benchmarks.hibench import HiBench
from .benchmarks.mamul1 import MaMul1
from .resultsdb.tsvresultsdb import TsvResultsDbCreator
from .resultsdb.sqlresultsdb import SqliteResultsDbCreator, SqlServerResultsDbCreator
from .util import Singleton
from .autoparams import MeasurementTime, HostFqdn, User, NumCpus, CpuModel, IprBenchVersion, HostId
import logging import logging
import argparse import argparse
from pathlib import Path from pathlib import Path
import json import json
from .core import BenchmarkId, IBenchmark, ResultsDbFactory, BenchParam
from .targethosts import DebianHost, IprClusterNode
from .benchmarks.hibench import HiBench
from .benchmarks.mamul1 import MaMul1
from .resultsdb.tsvresultsdb import TsvResultsDbCreator
from .resultsdb.sqlresultsdb import SqliteResultsDbCreator, SqlServerResultsDbCreator
from .util import Singleton
from .autoparams import MeasurementTime, HostFqdn, User, NumCpus, CpuModel, IprBenchVersion, HostId
class BenchmarkFactory(metaclass=Singleton): class BenchmarkFactory(metaclass=Singleton):
@ -44,10 +43,10 @@ def main():
arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)') arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)')
arg_parser.add_argument('--config', type=str, required=True, help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}') arg_parser.add_argument('--config', type=str, required=True, help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}')
arg_parser.add_argument('--resultsdb-params', type=str, required=True, help='the resultsdb configuration in json format, eg {"type": "tsv-files", "tsv_results_dir": "/tmp/toto"}') arg_parser.add_argument('--resultsdb-params', type=str, required=True, help='the resultsdb configuration in json format, eg {"type": "tsv-files", "tsv_results_dir": "/tmp/toto"}')
arg_parser.add_argument('--target-system-type-id', type=str, required=True, help='id of the operating system type to use. This is used to get the list installed packages, how to activate them, etc, eg "debian", "fr.univ-rennes.ipr.cluster-node".')
args = arg_parser.parse_args() args = arg_parser.parse_args()
target_host = GraffyWs2()
results_dir = args.results_dir results_dir = args.results_dir
ResultsDbFactory().register_resultsdb_creator(TsvResultsDbCreator()) ResultsDbFactory().register_resultsdb_creator(TsvResultsDbCreator())
@ -65,6 +64,11 @@ def main():
results_db.add_auto_param(CpuModel()) results_db.add_auto_param(CpuModel())
results_db.add_common_param(BenchParam('launcher', BenchParam.Type.PARAM_TYPE_STRING, description='what triggered the benchmark (eg "alambix.job.12345", or "manual")')) results_db.add_common_param(BenchParam('launcher', BenchParam.Type.PARAM_TYPE_STRING, description='what triggered the benchmark (eg "alambix.job.12345", or "manual")'))
target_host = {
'debian': DebianHost(),
'fr.univ-rennes.ipr.cluster-node': IprClusterNode(),
}[args.target_system_type_id]
benchmark_id = BenchmarkId(args.benchmark_id) benchmark_id = BenchmarkId(args.benchmark_id)
benchmark = BenchmarkFactory().create_benchmark(benchmark_id, results_db.common_params) benchmark = BenchmarkFactory().create_benchmark(benchmark_id, results_db.common_params)
benchmark_config = benchmark.load_config(args.config, target_host) benchmark_config = benchmark.load_config(args.config, target_host)

View File

@ -55,7 +55,7 @@ num_cores=${NSLOTS}
# launch the benchmark # launch the benchmark
command="iprbench-run --benchmark-id '<benchmark_id>' --config '<benchmark_config>' --results-dir '${output_dir}' --resultsdb-params '<resultsdb_params>'" command="iprbench-run --benchmark-id '<benchmark_id>' --config '<benchmark_config>' --results-dir '${output_dir}' --resultsdb-params '<resultsdb_params>' --target-system-type-id '<target_system_type_id>'"
echo "command: ${command}" echo "command: ${command}"
eval ${command} eval ${command}

View File

@ -1,8 +1,8 @@
from typing import Set, Dict from typing import Dict
from .core import ITargetHost, PackageId, PackageVersion
import subprocess import subprocess
import re import re
from pathlib import Path from pathlib import Path
from .core import ITargetHost, PackageId, PackageVersion, HostTypeId
DebianPackageVersion = str # a version string, as in debian package versions, eg 4:9.3.0-1ubuntu2 DebianPackageVersion = str # a version string, as in debian package versions, eg 4:9.3.0-1ubuntu2
@ -11,6 +11,22 @@ DebianPackageId = str # the identifier of a package in debian repositories (eg
class DebianHost(ITargetHost): class DebianHost(ITargetHost):
def get_host_type_id(self) -> HostTypeId:
return HostTypeId('debian')
def get_package_default_version(self, package_id: PackageId) -> PackageVersion:
package_version = ''
if self.is_installed_os_package(package_id):
package_version = self.get_installed_package_version(package_id)
return package_version
def get_package_activation_command(self, package_id: str, package_version: str) -> str:
current_version = self.get_package_default_version(package_id)
if current_version != package_version:
raise ValueError(f'{package_id} version {package_version} not available: only {package_id} version {current_version} is available on this host')
else:
return '' # no special instructions are required to activate the current package version
def _get_debian_default(self, debian_generic_name: str) -> PackageVersion: def _get_debian_default(self, debian_generic_name: str) -> PackageVersion:
debian_default = None debian_default = None
completed_process = subprocess.run(f'update-alternatives --get-selections | grep "^{debian_generic_name} "', shell=True, check=False, capture_output=True) completed_process = subprocess.run(f'update-alternatives --get-selections | grep "^{debian_generic_name} "', shell=True, check=False, capture_output=True)
@ -140,49 +156,11 @@ class DebianHost(ITargetHost):
return package_id return package_id
class GraffyWs2(DebianHost):
host_name: str
available_packages: Set[str]
def __init__(self):
super().__init__()
self.host_name = 'graffy-ws2'
self.available_packages = {'gfortran'}
def get_package_default_version(self, package_id: PackageId) -> PackageVersion:
package_version = ''
if self.is_installed_os_package(package_id):
package_version = self.get_installed_package_version(package_id)
# if package_id not in self.available_packages:
# raise ValueError(f'{package_id} is not available on {self.host_name}')
# elif package_id == 'gfortran':
# completed_process = subprocess.run('gfortran --version', capture_output=True, check=False, shell=True)
# if completed_process.returncode != 0:
# raise ValueError(f'gfortran is not available on {self.host_name}')
# else:
# # GNU Fortran (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0
# # Copyright (C) 2019 Free Software Foundation, Inc.
# # This is free software; see the source for copying conditions. There is NO
# # warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# first_line = completed_process.stdout.decode('utf-8').split('\n')[0]
# logging.debug('first line: %s', first_line)
# gfortran_version = first_line.split(' ')[-1]
# assert re.match(r'[0-9]+\.[0-9]+\.[0-9]+', gfortran_version), f'unexpected format for gfortran version {gfortran_version}'
# return gfortran_version
# else:
# assert False, f'unhandled package: {package_id}'
return package_version
def get_package_activation_command(self, package_id: str, package_version: str) -> str:
current_version = self.get_package_default_version(package_id)
if current_version != package_version:
raise ValueError(f'{package_id} version {package_version} not available: only {package_id} version {current_version} is available on {self.host_name}')
else:
return '' # no special instructions are required to activate the current package version
class IprClusterNode(DebianHost): class IprClusterNode(DebianHost):
def get_host_type_id(self) -> HostTypeId:
return HostTypeId('fr.univ-rennes.ipr.cluster-node')
def get_latest_version_for_env_module(self, package_env_module: str): def get_latest_version_for_env_module(self, package_env_module: str):
# package_env_module: eg compilers/ifort # package_env_module: eg compilers/ifort
# graffy@alambix-frontal:~$ module help compilers/ifort/latest # graffy@alambix-frontal:~$ module help compilers/ifort/latest
@ -199,4 +177,4 @@ class IprClusterNode(DebianHost):
if package_id == 'ifort': if package_id == 'ifort':
return self.get_latest_version_for_env_module('compilers/ifort') return self.get_latest_version_for_env_module('compilers/ifort')
else: else:
assert False, f'unhandled package: {package_id}' return super().get_package_default_version(package_id)

View File

@ -1 +1 @@
__version__ = '0.0.9' __version__ = '0.0.10'

View File

@ -5,7 +5,7 @@ import logging
import subprocess import subprocess
import json import json
from pathlib import Path from pathlib import Path
from iprbench.core import BenchmarkConfig, BenchmarkId from iprbench.core import BenchmarkConfig, BenchmarkId, HostTypeId
from shutil import rmtree from shutil import rmtree
# import importlib.resources # import importlib.resources
@ -21,7 +21,8 @@ def test_benchmark(benchmark_id: BenchmarkId, benchmark_config: BenchmarkConfig,
'type': 'tsv-files', 'type': 'tsv-files',
'tsv_results_dir': f'{results_dir / "results"}' 'tsv_results_dir': f'{results_dir / "results"}'
} }
command = f'iprbench-run --benchmark-id \'{benchmark_id}\' --config \'{json.dumps(benchmark_config)}\' --results-dir {results_dir} --resultsdb-params \'{json.dumps(resultsdb_params)}\'' target_system_type_id = HostTypeId('debian')
command = f'iprbench-run --benchmark-id \'{benchmark_id}\' --config \'{json.dumps(benchmark_config)}\' --results-dir {results_dir} --resultsdb-params \'{json.dumps(resultsdb_params)}\' --target-system-type-id "{target_system_type_id}"'
subprocess.run(command, shell=True, check=True, executable='/bin/bash') subprocess.run(command, shell=True, check=True, executable='/bin/bash')

View File

@ -3,7 +3,7 @@ import logging
import subprocess import subprocess
import json import json
from pathlib import Path from pathlib import Path
from iprbench.core import BenchmarkConfig, BenchmarkId from iprbench.core import BenchmarkConfig, BenchmarkId, HostTypeId
from shutil import rmtree from shutil import rmtree
@ -18,7 +18,8 @@ def test_clusterbench_submit_with_benchmark(benchmark_id: BenchmarkId, benchmark
'type': 'tsv-files', 'type': 'tsv-files',
'tsv_results_dir': f'{results_dir / "results"}' 'tsv_results_dir': f'{results_dir / "results"}'
} }
command = f'clusterbench-submit --cluster-id \'dummy\' --arch-regexp "intel_core.*" --benchmark-id \'{benchmark_id}\' --config \'{json.dumps(benchmark_config)}\' --results-dir {results_dir} --resultsdb-params \'{json.dumps(resultsdb_params)}\'' target_system_type_id = HostTypeId('debian')
command = f'clusterbench-submit --cluster-id \'dummy\' --arch-regexp "intel_core.*" --benchmark-id \'{benchmark_id}\' --config \'{json.dumps(benchmark_config)}\' --results-dir {results_dir} --resultsdb-params \'{json.dumps(resultsdb_params)}\' --target-system-type-id "{target_system_type_id}"'
subprocess.run(command, shell=True, check=True, executable='/bin/bash') subprocess.run(command, shell=True, check=True, executable='/bin/bash')

View File

@ -5,8 +5,8 @@ import logging
import subprocess import subprocess
import json import json
from pathlib import Path from pathlib import Path
from iprbench.core import ResultsDbParams
from shutil import rmtree from shutil import rmtree
from iprbench.core import ResultsDbParams, HostTypeId
from cocluto.SimpaDbUtil import SshAccessedMysqlDb from cocluto.SimpaDbUtil import SshAccessedMysqlDb
@ -27,7 +27,8 @@ def test_resultsdb(resultsdb_params: ResultsDbParams, results_root_path: Path):
'num_cores': 2, 'num_cores': 2,
'launcher': 'iprbench.unittest', 'launcher': 'iprbench.unittest',
} }
command = f'iprbench-run --benchmark-id \'{benchmark_id}\' --config \'{json.dumps(benchmark_config)}\' --results-dir {results_dir} --resultsdb-params \'{json.dumps(resultsdb_params)}\'' target_system_type_id = HostTypeId('debian')
command = f'iprbench-run --benchmark-id \'{benchmark_id}\' --config \'{json.dumps(benchmark_config)}\' --results-dir {results_dir} --resultsdb-params \'{json.dumps(resultsdb_params)}\' --target-system-type-id "{target_system_type_id}"'
subprocess.run(command, shell=True, check=True, executable='/bin/bash') subprocess.run(command, shell=True, check=True, executable='/bin/bash')