Made benchmark `hibench` work

- converted --cmake-path argument in clusterbench-submit into a parameter for benchmark hibench, as it's specific to some benchmarks only

work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3958]
This commit is contained in:
Guillaume Raffy 2024-10-23 14:36:22 +02:00
parent 6000e20d6b
commit 98940824e4
7 changed files with 103 additions and 28 deletions

View File

@ -1,7 +1,9 @@
from ..core import IBenchmark, BenchParam, BenchmarkConfig
from pathlib import Path from pathlib import Path
import subprocess import subprocess
import os import os
import shutil
from ..core import IBenchmark, BenchParam, BenchmarkConfig
from ..util import get_proxy_env_vars
class HiBench(IBenchmark): class HiBench(IBenchmark):
@ -10,12 +12,14 @@ class HiBench(IBenchmark):
def __init__(self): def __init__(self):
bench_params = [] bench_params = []
bench_params.append(BenchParam('num_cores', BenchParam.Type.PARAM_TYPE_INT, 'the number of cores to use by this benchmark'))
bench_params.append(BenchParam('hibridon_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of hibridon, in the form of a commit id')) bench_params.append(BenchParam('hibridon_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of hibridon, in the form of a commit id'))
bench_params.append(BenchParam('compiler_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the compiler used in the benchmark')) bench_params.append(BenchParam('compiler_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the compiler used in the benchmark'))
bench_params.append(BenchParam('compiler_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of the used compiler')) # bench_params.append(BenchParam('compiler_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of the used compiler'))
bench_params.append(BenchParam('blas_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the blas library used in the benchmark')) # bench_params.append(BenchParam('blas_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the blas library used in the benchmark'))
bench_params.append(BenchParam('blas_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of the blas library used in the benchmark')) # bench_params.append(BenchParam('blas_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of the blas library used in the benchmark'))
bench_params.append(BenchParam('test_id', BenchParam.Type.PARAM_TYPE_STRING, 'the name of the test to run (eg arch4_quick (about 2s on a core i5 8th generation) or nh3h2_qma_long (about 10min on a core i5 8th generation))')) bench_params.append(BenchParam('test_id', BenchParam.Type.PARAM_TYPE_STRING, 'the name of the test to run (eg arch4_quick (about 2s on a core i5 8th generation) or nh3h2_qma_long (about 10min on a core i5 8th generation))'))
bench_params.append(BenchParam('cmake_path', BenchParam.Type.PARAM_TYPE_STRING, 'the location of the cmake executable to use (eg "/opt/cmake/cmake-3.23.0/bin/cmake", or simply "cmake" for the one in the path)'))
super().__init__(bench_id='hibench', bench_params=bench_params) super().__init__(bench_id='hibench', bench_params=bench_params)
@ -32,12 +36,19 @@ class HiBench(IBenchmark):
return ram_per_core return ram_per_core
def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path): def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path):
git_repos_url = 'https://github.com/hibridon/hibridon' git_repos_url = 'https://github.com/hibridon/hibridon'
git_user = 'g-raffy' # os.environ['HIBRIDON_REPOS_USER'] git_user = 'g-raffy' # os.environ['HIBRIDON_REPOS_USER']
git_pass_file = f'{os.getenv("HOME")}/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat' git_pass_file = f'{os.getenv("HOME")}/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat'
hibridon_version = config['hibridon_version'] hibridon_version = config['hibridon_version']
test_id = config['test_id'] # eg arch4_quick or nh3h2_qma_long test_id = config['test_id'] # eg arch4_quick or nh3h2_qma_long
compiler_id = config['compiler_id'] compiler_id = config['compiler_id']
cmake_path = config['cmake_path']
num_cores = config['num_cores']
if benchmark_output_dir.exists():
shutil.rmtree(benchmark_output_dir)
benchmark_output_dir.mkdir(parents=True)
src_dir = benchmark_output_dir / 'hibridon.git' src_dir = benchmark_output_dir / 'hibridon.git'
output_dir = benchmark_output_dir / 'output' output_dir = benchmark_output_dir / 'output'
@ -62,14 +73,8 @@ class HiBench(IBenchmark):
else: else:
assert f'unhandled compiler_id : {compiler_id}' assert f'unhandled compiler_id : {compiler_id}'
ur1_proxy_url = 'http://proxy-nt.univ-rennes1.fr:3128' shell_command = ''
proxy_env_vars = '' if len(env_vars_bash_commands) > 0:
proxy_env_vars = f'{proxy_env_vars} HTTP_PROXY={ur1_proxy_url}' shell_command += f'{env_vars_bash_commands} && '
proxy_env_vars = f'{proxy_env_vars} HTTPS_PROXY={ur1_proxy_url}' shell_command += f'{get_proxy_env_vars()} starbench --source-tree-provider \'{source_tree_provider}\' --num-cores {num_cores} --output-dir={output_dir} --cmake-path={cmake_path} {" ".join([f"--cmake-option={option}" for option in cmake_options])} --benchmark-command=\'{benchmark_command}\''
proxy_env_vars = f'{proxy_env_vars} FTP_PROXY={ur1_proxy_url}' subprocess.run(shell_command, shell=True, check=True, executable='/bin/bash')
proxy_env_vars = f'{proxy_env_vars} http_proxy={ur1_proxy_url}'
proxy_env_vars = f'{proxy_env_vars} https_proxy={ur1_proxy_url}'
proxy_env_vars = f'{proxy_env_vars} ftp_proxy={ur1_proxy_url}'
shell_command = f'{env_vars_bash_commands} && {proxy_env_vars} starbench --source-tree-provider \'{source_tree_provider}\' --num-cores 2 --output-dir={output_dir} --cmake-path=/usr/bin/cmake {" ".join([f"--cmake-option={option}" for option in cmake_options])} --benchmark-command=\'{benchmark_command}\''
subprocess.run(shell_command, shell=True, check=True, encoding='/bin/bash')

View File

@ -271,7 +271,7 @@ def archive_this_virtualenv_to(venv_archive_path: Path, venv_hardcoded_path: Pat
subprocess.run(f'tar czvf {venv_archive_path} {venv_hardcoded_path.relative_to(venv_hardcoded_path.parent)}', shell=True, check=True, cwd=venv_hardcoded_path.parent, stdout=subprocess.DEVNULL) subprocess.run(f'tar czvf {venv_archive_path} {venv_hardcoded_path.relative_to(venv_hardcoded_path.parent)}', shell=True, check=True, cwd=venv_hardcoded_path.parent, stdout=subprocess.DEVNULL)
def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, host_group_id: HostGroupId, results_dir: Path, cluster: ICluster, cmake_path: str): def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, host_group_id: HostGroupId, results_dir: Path, cluster: ICluster):
compiler_id: CompilerId = benchmark_config['compiler_id'] compiler_id: CompilerId = benchmark_config['compiler_id']
@ -308,7 +308,8 @@ def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: Benchmark
'<iprbench_venv_hardcoded_path>': str(iprbench_venv_hardcoded_path), '<iprbench_venv_hardcoded_path>': str(iprbench_venv_hardcoded_path),
'<iprbench_venv_archive_path>': str(job_venv_archive_path), '<iprbench_venv_archive_path>': str(job_venv_archive_path),
'<benchmark_config>': json.dumps(benchmark_config).replace('"', r'\"'), '<benchmark_config>': json.dumps(benchmark_config).replace('"', r'\"'),
'<results_dir>': str(results_dir) '<results_dir>': str(results_dir),
'<num_cores>': num_cores,
} }
logging.debug('tags_dict = %s', str(tags_dict)) logging.debug('tags_dict = %s', str(tags_dict))
with importlib.resources.path('iprbench.resources', 'clusterbench-template.job') as job_template_path: with importlib.resources.path('iprbench.resources', 'clusterbench-template.job') as job_template_path:
@ -337,7 +338,7 @@ def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: Benchmark
cluster.submit_job(qsub_args, exec_path, exec_args, this_bench_dir) cluster.submit_job(qsub_args, exec_path, exec_args, this_bench_dir)
def launch_perf_jobs(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, results_dir: Path, cluster: ICluster, arch_regexp: str, cmake_path: str): def launch_perf_jobs(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, results_dir: Path, cluster: ICluster, arch_regexp: str):
""" """
results_dir: where the results of the benchmark are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench) results_dir: where the results of the benchmark are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)
""" """
@ -350,7 +351,7 @@ def launch_perf_jobs(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, r
logging.info('requested host groups: %s', host_groups) logging.info('requested host groups: %s', host_groups)
for host_group in host_groups: for host_group in host_groups:
launch_job_for_host_group(benchmark, benchmark_config, host_group, results_dir, cluster, cmake_path) launch_job_for_host_group(benchmark, benchmark_config, host_group, results_dir, cluster)
def main(): def main():
@ -367,7 +368,6 @@ def main():
arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)') arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)')
arg_parser.add_argument('--config', type=str, default='cmake', help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}') arg_parser.add_argument('--config', type=str, default='cmake', help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}')
arg_parser.add_argument('--arch-regexp', type=str, default='.*', help='the regular expression for the architectures the benchmark is allowed to run on (eg "intel_xeon_.*"). By defauls, all available architectures are allowed.') arg_parser.add_argument('--arch-regexp', type=str, default='.*', help='the regular expression for the architectures the benchmark is allowed to run on (eg "intel_xeon_.*"). By defauls, all available architectures are allowed.')
arg_parser.add_argument('--cmake-path', type=str, default='cmake', help='the location of the cmake command to use (eg /opt/cmake/cmake-3.23.0/bin/cmake)')
args = arg_parser.parse_args() args = arg_parser.parse_args()
benchmark_id = ClusterId(args.benchmark_id) benchmark_id = ClusterId(args.benchmark_id)
@ -375,7 +375,6 @@ def main():
results_dir = Path(args.results_dir) results_dir = Path(args.results_dir)
arch_regexp = args.arch_regexp arch_regexp = args.arch_regexp
cmake_path = args.cmake_path
benchmark_config = json.loads(args.config) benchmark_config = json.loads(args.config)
cluster = ClusterFactory().create_cluster(args.cluster_id) cluster = ClusterFactory().create_cluster(args.cluster_id)
@ -383,4 +382,4 @@ def main():
if not cluster.path_is_reachable_by_compute_nodes(results_dir): if not cluster.path_is_reachable_by_compute_nodes(results_dir):
raise ValueError('the results path is expected to be on a disk that is accessible to all cluster nodes, and it doesn\'t seem to be the case for {results_dir}') raise ValueError('the results path is expected to be on a disk that is accessible to all cluster nodes, and it doesn\'t seem to be the case for {results_dir}')
launch_perf_jobs(benchmark, benchmark_config, results_dir, cluster, arch_regexp, cmake_path) launch_perf_jobs(benchmark, benchmark_config, results_dir, cluster, arch_regexp)

View File

@ -45,4 +45,5 @@ def main():
benchmark = BenchmarkFactory().create_benchmark(benchmark_id) benchmark = BenchmarkFactory().create_benchmark(benchmark_id)
benchmark_config = json.loads(args.config) benchmark_config = json.loads(args.config)
benchmark.validate_config(benchmark_config) benchmark.validate_config(benchmark_config)
benchmark.execute(benchmark_config, args.results_dir) results_dir = args.results_dir
benchmark.execute(benchmark_config, results_dir)

View File

@ -8,7 +8,7 @@ then
# this script is not executed by sge... set dummy values for test # this script is not executed by sge... set dummy values for test
TMPDIR=/tmp TMPDIR=/tmp
JOB_ID=666666 JOB_ID=666666
NSLOTS=2 NSLOTS=<num_cores>
else else
executed_by_sge='true' executed_by_sge='true'
fi fi

View File

@ -3,6 +3,7 @@ from pathlib import Path
import sys import sys
import shutil import shutil
import logging import logging
import socket
class Singleton(type): class Singleton(type):
@ -57,3 +58,51 @@ def extract_resource_dir(resource_package: str, resource_dir_to_extract: str, de
raise NotImplementedError() raise NotImplementedError()
else: else:
assert False, f'unexpected method : {method}' assert False, f'unexpected method : {method}'
def get_ip():
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.settimeout(0)
try:
# doesn't even have to be reachable
s.connect(('10.254.254.254', 1))
IP = s.getsockname()[0]
except Exception:
IP = '127.0.0.1'
finally:
s.close()
return IP
VlanId = str # eg 'SIMPA', 'REC_PHYS'
def get_vlan() -> VlanId:
ip_address = [int(s) for s in get_ip().split('.')]
logging.debug('ip address : %s', ip_address)
vlan_id = 'UNKNOWN'
if ip_address[0] == 129 and ip_address[1] == 20:
vlan_id = {
27: 'SIMPA',
79: 'REC-PHYS',
}[ip_address[2]]
else:
assert False
return vlan_id
def get_proxy_env_vars() -> str:
# fatal: unable to access 'https://github.com/hibridon/hibridon/': Failed to connect to proxy-nt.univ-rennes1.fr port 3128: Connection timed out
proxy_is_required = get_vlan() in ['SIMPA']
if proxy_is_required:
ur1_proxy_url = 'http://proxy-nt.univ-rennes1.fr:3128'
proxy_env_vars = ''
proxy_env_vars = f'{proxy_env_vars} HTTP_PROXY={ur1_proxy_url}'
proxy_env_vars = f'{proxy_env_vars} HTTPS_PROXY={ur1_proxy_url}'
proxy_env_vars = f'{proxy_env_vars} FTP_PROXY={ur1_proxy_url}'
proxy_env_vars = f'{proxy_env_vars} http_proxy={ur1_proxy_url}'
proxy_env_vars = f'{proxy_env_vars} https_proxy={ur1_proxy_url}'
proxy_env_vars = f'{proxy_env_vars} ftp_proxy={ur1_proxy_url}'
else:
proxy_env_vars = ''
return proxy_env_vars

View File

@ -1,6 +1,7 @@
import unittest import unittest
import logging import logging
import subprocess import subprocess
import json
class ClusterBenchTestCase(unittest.TestCase): class ClusterBenchTestCase(unittest.TestCase):
@ -16,10 +17,17 @@ class ClusterBenchTestCase(unittest.TestCase):
command = 'clusterbench-submit --cluster-id \'dummy\' --arch-regexp "intel_core.*" --benchmark-id \'mamul1\' --config \'{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10}\' --results-dir /tmp/mamul1_out' command = 'clusterbench-submit --cluster-id \'dummy\' --arch-regexp "intel_core.*" --benchmark-id \'mamul1\' --config \'{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10}\' --results-dir /tmp/mamul1_out'
subprocess.run(command, shell=True, check=True, executable='/bin/bash') subprocess.run(command, shell=True, check=True, executable='/bin/bash')
# def test_clusterbench_hibench(self): def test_clusterbench_hibench(self):
# logging.info('test_clusterbench_hibench') logging.info('test_clusterbench_hibench')
# command = 'clusterbench-submit --benchmark-id \'hibench\' --config \'{"compiler_id": "gfortran", "test_id": "arch4_quick"}\' --results-dir /tmp/mamul1_out' config = {
# subprocess.run(command, shell=True, check=True, executable='/bin/bash') 'compiler_id': 'gfortran',
'test_id': 'arch4_quick',
'hibridon_version': 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad',
'cmake_path': 'cmake',
'num_cores': 2,
}
command = f'clusterbench-submit --cluster-id \'dummy\' --benchmark-id \'hibench\' --config \'{json.dumps(config)}\' --results-dir /tmp/hibench_out'
subprocess.run(command, shell=True, check=True, executable='/bin/bash')
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -1,6 +1,7 @@
import unittest import unittest
import logging import logging
import subprocess import subprocess
import json
# import importlib.resources # import importlib.resources
@ -22,6 +23,18 @@ class IprBenchTestCase(unittest.TestCase):
command = 'iprbench-run --benchmark-id \'mamul1\' --config \'{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10, "num_cores":2}\' --results-dir /tmp/mamul1_out' command = 'iprbench-run --benchmark-id \'mamul1\' --config \'{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10, "num_cores":2}\' --results-dir /tmp/mamul1_out'
subprocess.run(command, shell=True, check=True, executable='/bin/bash') subprocess.run(command, shell=True, check=True, executable='/bin/bash')
def test_iprbench_hibench(self):
logging.info('test_iprbench_hibench')
config = {
'compiler_id': 'gfortran',
'test_id': 'arch4_quick',
'hibridon_version': 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad',
'cmake_path': 'cmake',
'num_cores': 2,
}
command = f'iprbench-run --benchmark-id \'hibench\' --config \'{json.dumps(config)}\' --results-dir /tmp/hibench_out'
subprocess.run(command, shell=True, check=True, executable='/bin/bash')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()