Made benchmark `hibench` work

- converted --cmake-path argument in clusterbench-submit into a parameter for benchmark hibench, as it's specific to some benchmarks only work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3958]
2024-10-23 14:36:22 +02:00 · 2024-10-23 14:36:22 +02:00 · 98940824e4
parent 6000e20d6b
commit 98940824e4
7 changed files with 103 additions and 28 deletions
--- a/iprbench/benchmarks/hibench.py
+++ b/iprbench/benchmarks/hibench.py
@ -1,7 +1,9 @@
-from ..core import IBenchmark, BenchParam, BenchmarkConfig
 from pathlib import Path
 import subprocess
 import os
+import shutil
+from ..core import IBenchmark, BenchParam, BenchmarkConfig
+from ..util import get_proxy_env_vars


 class HiBench(IBenchmark):
@ -10,12 +12,14 @@ class HiBench(IBenchmark):

    def __init__(self):
        bench_params = []
+        bench_params.append(BenchParam('num_cores', BenchParam.Type.PARAM_TYPE_INT, 'the number of cores to use by this benchmark'))
        bench_params.append(BenchParam('hibridon_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of hibridon, in the form of a commit id'))
        bench_params.append(BenchParam('compiler_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the compiler used in the benchmark'))
-        bench_params.append(BenchParam('compiler_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of the used compiler'))
-        bench_params.append(BenchParam('blas_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the blas library used in the benchmark'))
-        bench_params.append(BenchParam('blas_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of the blas library used in the benchmark'))
+        # bench_params.append(BenchParam('compiler_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of the used compiler'))
+        # bench_params.append(BenchParam('blas_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the blas library used in the benchmark'))
+        # bench_params.append(BenchParam('blas_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of the blas library used in the benchmark'))
        bench_params.append(BenchParam('test_id', BenchParam.Type.PARAM_TYPE_STRING, 'the name of the test to run (eg arch4_quick (about 2s on a core i5 8th generation) or nh3h2_qma_long (about 10min on a core i5 8th generation))'))
+        bench_params.append(BenchParam('cmake_path', BenchParam.Type.PARAM_TYPE_STRING, 'the location of the cmake executable to use (eg "/opt/cmake/cmake-3.23.0/bin/cmake", or simply "cmake" for the one in the path)'))

        super().__init__(bench_id='hibench', bench_params=bench_params)

@ -32,12 +36,19 @@ class HiBench(IBenchmark):
        return ram_per_core

    def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path):
+
        git_repos_url = 'https://github.com/hibridon/hibridon'
        git_user = 'g-raffy'  # os.environ['HIBRIDON_REPOS_USER']
        git_pass_file = f'{os.getenv("HOME")}/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat'
        hibridon_version = config['hibridon_version']
        test_id = config['test_id']  # eg arch4_quick or nh3h2_qma_long
        compiler_id = config['compiler_id']
+        cmake_path = config['cmake_path']
+        num_cores = config['num_cores']
+
+        if benchmark_output_dir.exists():
+            shutil.rmtree(benchmark_output_dir)
+        benchmark_output_dir.mkdir(parents=True)

        src_dir = benchmark_output_dir / 'hibridon.git'
        output_dir = benchmark_output_dir / 'output'
@ -62,14 +73,8 @@ class HiBench(IBenchmark):
        else:
            assert f'unhandled compiler_id : {compiler_id}'

-        ur1_proxy_url = 'http://proxy-nt.univ-rennes1.fr:3128'
-        proxy_env_vars = ''
-        proxy_env_vars = f'{proxy_env_vars} HTTP_PROXY={ur1_proxy_url}'
-        proxy_env_vars = f'{proxy_env_vars} HTTPS_PROXY={ur1_proxy_url}'
-        proxy_env_vars = f'{proxy_env_vars} FTP_PROXY={ur1_proxy_url}'
-        proxy_env_vars = f'{proxy_env_vars} http_proxy={ur1_proxy_url}'
-        proxy_env_vars = f'{proxy_env_vars} https_proxy={ur1_proxy_url}'
-        proxy_env_vars = f'{proxy_env_vars} ftp_proxy={ur1_proxy_url}'
-
-        shell_command = f'{env_vars_bash_commands} && {proxy_env_vars} starbench --source-tree-provider \'{source_tree_provider}\' --num-cores 2 --output-dir={output_dir} --cmake-path=/usr/bin/cmake {" ".join([f"--cmake-option={option}" for option in cmake_options])} --benchmark-command=\'{benchmark_command}\''
-        subprocess.run(shell_command, shell=True, check=True, encoding='/bin/bash')
+        shell_command = ''
+        if len(env_vars_bash_commands) > 0:
+            shell_command += f'{env_vars_bash_commands} && '
+        shell_command += f'{get_proxy_env_vars()} starbench --source-tree-provider \'{source_tree_provider}\' --num-cores {num_cores} --output-dir={output_dir} --cmake-path={cmake_path} {" ".join([f"--cmake-option={option}" for option in cmake_options])} --benchmark-command=\'{benchmark_command}\''
+        subprocess.run(shell_command, shell=True, check=True, executable='/bin/bash')
--- a/iprbench/clusterbench.py
+++ b/iprbench/clusterbench.py
@ -271,7 +271,7 @@ def archive_this_virtualenv_to(venv_archive_path: Path, venv_hardcoded_path: Pat
    subprocess.run(f'tar czvf {venv_archive_path} {venv_hardcoded_path.relative_to(venv_hardcoded_path.parent)}', shell=True, check=True, cwd=venv_hardcoded_path.parent, stdout=subprocess.DEVNULL)


-def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, host_group_id: HostGroupId, results_dir: Path, cluster: ICluster, cmake_path: str):
+def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, host_group_id: HostGroupId, results_dir: Path, cluster: ICluster):

    compiler_id: CompilerId = benchmark_config['compiler_id']

@ -308,7 +308,8 @@ def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: Benchmark
        '<iprbench_venv_hardcoded_path>': str(iprbench_venv_hardcoded_path),
        '<iprbench_venv_archive_path>': str(job_venv_archive_path),
        '<benchmark_config>': json.dumps(benchmark_config).replace('"', r'\"'),
-        '<results_dir>': str(results_dir)
+        '<results_dir>': str(results_dir),
+        '<num_cores>': num_cores,
    }
    logging.debug('tags_dict = %s', str(tags_dict))
    with importlib.resources.path('iprbench.resources', 'clusterbench-template.job') as job_template_path:
@ -337,7 +338,7 @@ def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: Benchmark
    cluster.submit_job(qsub_args, exec_path, exec_args, this_bench_dir)


-def launch_perf_jobs(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, results_dir: Path, cluster: ICluster, arch_regexp: str, cmake_path: str):
+def launch_perf_jobs(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, results_dir: Path, cluster: ICluster, arch_regexp: str):
    """
    results_dir: where the results of the benchmark are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)
    """
@ -350,7 +351,7 @@ def launch_perf_jobs(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, r
    logging.info('requested host groups: %s', host_groups)

    for host_group in host_groups:
-        launch_job_for_host_group(benchmark, benchmark_config, host_group, results_dir, cluster, cmake_path)
+        launch_job_for_host_group(benchmark, benchmark_config, host_group, results_dir, cluster)


 def main():
@ -367,7 +368,6 @@ def main():
    arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)')
    arg_parser.add_argument('--config', type=str, default='cmake', help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}')
    arg_parser.add_argument('--arch-regexp', type=str, default='.*', help='the regular expression for the architectures the benchmark is allowed to run on (eg "intel_xeon_.*"). By defauls, all available architectures are allowed.')
-    arg_parser.add_argument('--cmake-path', type=str, default='cmake', help='the location of the cmake command to use (eg /opt/cmake/cmake-3.23.0/bin/cmake)')

    args = arg_parser.parse_args()
    benchmark_id = ClusterId(args.benchmark_id)
@ -375,7 +375,6 @@ def main():

    results_dir = Path(args.results_dir)
    arch_regexp = args.arch_regexp
-    cmake_path = args.cmake_path
    benchmark_config = json.loads(args.config)

    cluster = ClusterFactory().create_cluster(args.cluster_id)
@ -383,4 +382,4 @@ def main():
    if not cluster.path_is_reachable_by_compute_nodes(results_dir):
        raise ValueError('the results path is expected to be on a disk that is accessible to all cluster nodes, and it doesn\'t seem to be the case for {results_dir}')

-    launch_perf_jobs(benchmark, benchmark_config, results_dir, cluster, arch_regexp, cmake_path)
+    launch_perf_jobs(benchmark, benchmark_config, results_dir, cluster, arch_regexp)
--- a/iprbench/main.py
+++ b/iprbench/main.py
@ -45,4 +45,5 @@ def main():
    benchmark = BenchmarkFactory().create_benchmark(benchmark_id)
    benchmark_config = json.loads(args.config)
    benchmark.validate_config(benchmark_config)
-    benchmark.execute(benchmark_config, args.results_dir)
+    results_dir = args.results_dir
+    benchmark.execute(benchmark_config, results_dir)
--- a/iprbench/resources/clusterbench-template.job
+++ b/iprbench/resources/clusterbench-template.job
@ -8,7 +8,7 @@ then
 	# this script is not executed by sge... set dummy values for test
 	TMPDIR=/tmp
 	JOB_ID=666666
-	NSLOTS=2
+	NSLOTS=<num_cores>
 else
 	executed_by_sge='true'
 fi
--- a/iprbench/util.py
+++ b/iprbench/util.py
@ -3,6 +3,7 @@ from pathlib import Path
 import sys
 import shutil
 import logging
+import socket


 class Singleton(type):
@ -57,3 +58,51 @@ def extract_resource_dir(resource_package: str, resource_dir_to_extract: str, de
        raise NotImplementedError()
    else:
        assert False, f'unexpected method : {method}'
+
+
+def get_ip():
+    s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+    s.settimeout(0)
+    try:
+        # doesn't even have to be reachable
+        s.connect(('10.254.254.254', 1))
+        IP = s.getsockname()[0]
+    except Exception:
+        IP = '127.0.0.1'
+    finally:
+        s.close()
+    return IP
+
+
+VlanId = str  # eg 'SIMPA', 'REC_PHYS'
+
+
+def get_vlan() -> VlanId:
+    ip_address = [int(s) for s in get_ip().split('.')]
+    logging.debug('ip address : %s', ip_address)
+    vlan_id = 'UNKNOWN'
+    if ip_address[0] == 129 and ip_address[1] == 20:
+        vlan_id = {
+            27: 'SIMPA',
+            79: 'REC-PHYS',
+        }[ip_address[2]]
+    else:
+        assert False
+    return vlan_id
+
+
+def get_proxy_env_vars() -> str:
+    # fatal: unable to access 'https://github.com/hibridon/hibridon/': Failed to connect to proxy-nt.univ-rennes1.fr port 3128: Connection timed out
+    proxy_is_required = get_vlan() in ['SIMPA']
+    if proxy_is_required:
+        ur1_proxy_url = 'http://proxy-nt.univ-rennes1.fr:3128'
+        proxy_env_vars = ''
+        proxy_env_vars = f'{proxy_env_vars} HTTP_PROXY={ur1_proxy_url}'
+        proxy_env_vars = f'{proxy_env_vars} HTTPS_PROXY={ur1_proxy_url}'
+        proxy_env_vars = f'{proxy_env_vars} FTP_PROXY={ur1_proxy_url}'
+        proxy_env_vars = f'{proxy_env_vars} http_proxy={ur1_proxy_url}'
+        proxy_env_vars = f'{proxy_env_vars} https_proxy={ur1_proxy_url}'
+        proxy_env_vars = f'{proxy_env_vars} ftp_proxy={ur1_proxy_url}'
+    else:
+        proxy_env_vars = ''
+    return proxy_env_vars
--- a/test/test_clusterbench.py
+++ b/test/test_clusterbench.py
@ -1,6 +1,7 @@
 import unittest
 import logging
 import subprocess
+import json


 class ClusterBenchTestCase(unittest.TestCase):
@ -16,10 +17,17 @@ class ClusterBenchTestCase(unittest.TestCase):
        command = 'clusterbench-submit --cluster-id \'dummy\' --arch-regexp "intel_core.*" --benchmark-id \'mamul1\' --config \'{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10}\' --results-dir /tmp/mamul1_out'
        subprocess.run(command, shell=True, check=True, executable='/bin/bash')

-    # def test_clusterbench_hibench(self):
-    #     logging.info('test_clusterbench_hibench')
-    #     command = 'clusterbench-submit --benchmark-id \'hibench\' --config \'{"compiler_id": "gfortran", "test_id": "arch4_quick"}\' --results-dir /tmp/mamul1_out'
-    #     subprocess.run(command, shell=True, check=True, executable='/bin/bash')
+    def test_clusterbench_hibench(self):
+        logging.info('test_clusterbench_hibench')
+        config = {
+            'compiler_id': 'gfortran',
+            'test_id': 'arch4_quick',
+            'hibridon_version': 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad',
+            'cmake_path': 'cmake',
+            'num_cores': 2,
+        }
+        command = f'clusterbench-submit --cluster-id \'dummy\' --benchmark-id \'hibench\' --config \'{json.dumps(config)}\' --results-dir /tmp/hibench_out'
+        subprocess.run(command, shell=True, check=True, executable='/bin/bash')


 if __name__ == '__main__':
--- a/test/test_iprbench.py
+++ b/test/test_iprbench.py
@ -1,6 +1,7 @@
 import unittest
 import logging
 import subprocess
+import json
 # import importlib.resources


@ -22,6 +23,18 @@ class IprBenchTestCase(unittest.TestCase):
        command = 'iprbench-run --benchmark-id \'mamul1\' --config \'{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10, "num_cores":2}\' --results-dir /tmp/mamul1_out'
        subprocess.run(command, shell=True, check=True, executable='/bin/bash')

+    def test_iprbench_hibench(self):
+        logging.info('test_iprbench_hibench')
+        config = {
+            'compiler_id': 'gfortran',
+            'test_id': 'arch4_quick',
+            'hibridon_version': 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad',
+            'cmake_path': 'cmake',
+            'num_cores': 2,
+        }
+        command = f'iprbench-run --benchmark-id \'hibench\' --config \'{json.dumps(config)}\' --results-dir /tmp/hibench_out'
+        subprocess.run(command, shell=True, check=True, executable='/bin/bash')
+

 if __name__ == '__main__':
    unittest.main()