iprbench/iprbench/clusterbench.py

#!/usr/bin/env python3
# this script launches jobs to run hibridon benchmarks on physix cluster for the given version of hibridon (commit number)
from typing import List, Tuple, Dict
import argparse
from os import getenv, makedirs
import shutil
from pathlib import Path
import subprocess
import re
import logging
import importlib.resources
import venv
import json
import abc
from .core import IBenchmark, BenchmarkConfig, BenchmarkId, ResultsDbParams, BenchParam, HostTypeId
from .main import BenchmarkFactory
from .util import Singleton


HostFqdn = str  # eg 'physix90.ipr.univ-rennes1.fr'
GitCommitTag = str  # commit number eg 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad'
HostGroupId = str  # eg 'xeon_gold_6140'
CompilerId = str  # eg 'gfortran'
ClusterId = str  # eg 'alambix'


def substitute_tag_with_filecontents(input_file_path: Path, tag: str, contents_file: Path, output_file_path: Path):
    contents = open(contents_file, 'rt', encoding='utf8').read()
    with open(input_file_path, 'rt', encoding='utf8') as template_file, open(output_file_path, 'wt', encoding='utf8') as out_file:
        for template_line in template_file.readlines():
            line = template_line.replace(tag, contents)
            out_file.write(line)


def substitute_tags(input_file_path: Path, tags_dict: Dict[str, str], output_file_path: Path):
    with open(input_file_path, 'rt', encoding='utf8') as template_file, open(output_file_path, 'wt', encoding='utf8') as out_file:
        for template_line in template_file.readlines():
            line = template_line
            for tag, value in tags_dict.items():
                if re.match(r'<include:', tag) is not None:
                    contents = open(value, 'rt', encoding='utf8').read()
                else:
                    contents = value
                line = line.replace(tag, contents)
            out_file.write(line)


class ClusterNodeDef:
    host_fqdn: str
    cpu_id: str
    num_cpus: int

    def __init__(self, host_fqdn: str, cpu_id: str, num_cpus: int):
        self.host_fqdn = host_fqdn
        self.cpu_id = cpu_id
        self.num_cpus = num_cpus


class CpuDef:
    cpu_id: str
    num_cores: int

    def __init__(self, cpu_id: str, num_cores: int):
        self.cpu_id = cpu_id
        self.num_cores = num_cores


class ClusterNodeDb:
    cluster_nodes_defs: List[ClusterNodeDef]
    cpu_defs: Dict[str, int]

    def __init__(self, cluster_id: ClusterId = 'alambix'):
        self.cluster_nodes_defs = []
        include_multiqueue_nodes = False  # at the moment hibench only works on nodes that have all their cores in the same queue
        if cluster_id == 'alambix':
            self.add_cluster_node_def(ClusterNodeDef('alambix50.ipr.univ-rennes.fr', 'intel_xeon_x5650', 2))
            if include_multiqueue_nodes:
                self.add_cluster_node_def(ClusterNodeDef('physix90.ipr.univ-rennes1.fr', 'intel_xeon_gold_6154', 4))  # also has some cores reserved for gpuonly.q
            self.add_cluster_node_def(ClusterNodeDef('alambix103.ipr.univ-rennes.fr', 'amd_epyc_7452', 2))
            if include_multiqueue_nodes:
                self.add_cluster_node_def(ClusterNodeDef('alambix104.ipr.univ-rennes.fr', 'intel_xeon_gold_6248r', 2))  # also has some cores reserved for gpuonly.q
            self.add_cluster_node_def(ClusterNodeDef('alambix105.ipr.univ-rennes.fr', 'intel_xeon_gold_6348', 2))
            self.add_cluster_node_def(ClusterNodeDef('alambix106.ipr.univ-rennes.fr', 'intel_xeon_gold_6348', 2))
            self.add_cluster_node_def(ClusterNodeDef('alambix107.ipr.univ-rennes.fr', 'intel_xeon_gold_6348', 2))
            self.add_cluster_node_def(ClusterNodeDef('alambix108.ipr.univ-rennes.fr', 'intel_xeon_gold_6348', 2))
        elif cluster_id == 'physix':
            self.add_cluster_node_def(ClusterNodeDef('physix12.ipr.univ-rennes1.fr', 'amd_epyc_7282', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix13.ipr.univ-rennes1.fr', 'amd_epyc_7282', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix14.ipr.univ-rennes1.fr', 'amd_epyc_7282', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix15.ipr.univ-rennes1.fr', 'amd_epyc_7282', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix48.ipr.univ-rennes1.fr', 'intel_xeon_x5550', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix49.ipr.univ-rennes1.fr', 'intel_xeon_x5650', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix51.ipr.univ-rennes1.fr', 'intel_xeon_x5650', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix52.ipr.univ-rennes1.fr', 'intel_xeon_x5650', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix53.ipr.univ-rennes1.fr', 'intel_xeon_x5650', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix54.ipr.univ-rennes1.fr', 'intel_xeon_x5650', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix55.ipr.univ-rennes1.fr', 'intel_xeon_x5650', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix56.ipr.univ-rennes1.fr', 'intel_xeon_x5650', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix57.ipr.univ-rennes1.fr', 'intel_xeon_x5650', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix58.ipr.univ-rennes1.fr', 'intel_xeon_x5650', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix59.ipr.univ-rennes1.fr', 'intel_xeon_x5650', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix60.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix61.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix62.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix63.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix64.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix65.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix66.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix67.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix68.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix69.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix70.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix71.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix72.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660v2', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix73.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660v2', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix74.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660v2', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix76.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660v2', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix77.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660v2', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix78.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660v2', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix79.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660v2', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix80.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660v2', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix81.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660v2', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix82.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660v2', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix83.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660v2', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix84.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660v4', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix85.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660v4', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix86.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660v4', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix87.ipr.univ-rennes1.fr', 'intel_xeon_e5-2660v4', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix88.ipr.univ-rennes1.fr', 'intel_xeon_gold_6140', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix89.ipr.univ-rennes1.fr', 'intel_xeon_gold_6140', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix91.ipr.univ-rennes1.fr', 'intel_xeon_gold_6140', 4))
            self.add_cluster_node_def(ClusterNodeDef('physix92.ipr.univ-rennes1.fr', 'intel_xeon_gold_5220', 1))
            self.add_cluster_node_def(ClusterNodeDef('physix93.ipr.univ-rennes1.fr', 'intel_xeon_gold_6226r', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix94.ipr.univ-rennes1.fr', 'intel_xeon_gold_6226r', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix95.ipr.univ-rennes1.fr', 'intel_xeon_gold_6248r', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix96.ipr.univ-rennes1.fr', 'intel_xeon_gold_6248r', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix97.ipr.univ-rennes1.fr', 'intel_xeon_gold_6248r', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix98.ipr.univ-rennes1.fr', 'intel_xeon_gold_6248r', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix99.ipr.univ-rennes1.fr', 'intel_xeon_gold_6240r', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix100.ipr.univ-rennes1.fr', 'intel_xeon_gold_6248r', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix101.ipr.univ-rennes1.fr', 'intel_xeon_gold_6248r', 2))
            self.add_cluster_node_def(ClusterNodeDef('physix102.ipr.univ-rennes1.fr', 'intel_xeon_gold_6248r', 2))
        elif cluster_id == 'dummy':
            self.add_cluster_node_def(ClusterNodeDef('graffy-ws2.ipr.univ-rennes.fr', 'intel_core_i5_8350u', 1))
        else:
            assert False

        self.cpu_defs = {}
        self.add_cpu_def(CpuDef('intel_core_i5_8350u', 4))
        self.add_cpu_def(CpuDef('intel_xeon_x5550', 4))
        self.add_cpu_def(CpuDef('intel_xeon_x5650', 6))
        self.add_cpu_def(CpuDef('intel_xeon_e5-2660', 8))
        self.add_cpu_def(CpuDef('intel_xeon_e5-2660v2', 10))
        self.add_cpu_def(CpuDef('intel_xeon_e5-2660v4', 14))
        self.add_cpu_def(CpuDef('intel_xeon_gold_6140', 18))
        self.add_cpu_def(CpuDef('intel_xeon_gold_6154', 18))
        self.add_cpu_def(CpuDef('intel_xeon_gold_5220', 4))
        self.add_cpu_def(CpuDef('intel_xeon_gold_6226r', 16))
        self.add_cpu_def(CpuDef('intel_xeon_gold_6248r', 24))
        self.add_cpu_def(CpuDef('intel_xeon_gold_6348', 28))
        self.add_cpu_def(CpuDef('amd_epyc_7282', 16))
        self.add_cpu_def(CpuDef('amd_epyc_7452', 32))

    def add_cluster_node_def(self, cluster_node_def: ClusterNodeDef):
        self.cluster_nodes_defs.append(cluster_node_def)

    def add_cpu_def(self, cpu_def: CpuDef):
        self.cpu_defs[cpu_def.cpu_id] = cpu_def

    def get_host_group_info(self, host_group_id: HostGroupId) -> Tuple[List[HostFqdn], int]:
        hosts = [cluster_node_def.host_fqdn for cluster_node_def in self.cluster_nodes_defs if cluster_node_def.cpu_id == host_group_id]
        num_cpus_set = set([cluster_node_def.num_cpus for cluster_node_def in self.cluster_nodes_defs if cluster_node_def.cpu_id == host_group_id])
        assert len(num_cpus_set) <= 1, f'the number of cpus for the host group {host_group_id} is not homogen ({num_cpus_set})'
        if len(num_cpus_set) > 0:
            num_cpus = num_cpus_set.pop()
            num_cores = self.cpu_defs[host_group_id].num_cores * num_cpus
        else:
            num_cores = 0
        return (hosts, num_cores)


class ICluster(abc.ABC):
    cluster_db: ClusterNodeDb
    cluster_id: ClusterId

    def __init__(self, cluster_id: ClusterId, cluster_db: ClusterNodeDb):
        self.cluster_id = cluster_id
        self.cluster_db = cluster_db

    @abc.abstractmethod
    def path_is_reachable_by_compute_nodes(self, path: Path):
        pass

    @abc.abstractmethod
    def submit_job(self, qsub_args: List[str], exec_path: Path, exec_args: List[str], working_dir: Path):
        """
        qsub_args: the arguments sent to qsub, eg ['-pe', 'smp', '12', 'gaussian.job', 'h2o.gjf']
        """

    def get_cluster_db(self) -> ClusterNodeDb:
        return self.cluster_db


class IprCluster(ICluster):

    def __init__(self, cluster_id: ClusterId):
        super().__init__(cluster_id, ClusterNodeDb(cluster_id))

    def path_is_reachable_by_compute_nodes(self, path: Path):
        path_is_reachable = False
        for shared_disk_path in [Path('/opt/ipr/cluster/work.global')]:
            try:
                _ = path.relative_to(shared_disk_path)
            except ValueError:
                continue
            path_is_reachable = True
            break
        return path_is_reachable

    def submit_job(self, qsub_args: List[str], exec_path: Path, exec_args: List[str], working_dir: Path):
        qsub_command = f'qsub {" ".join(qsub_args)} {exec_path} {" ".join(exec_args)}'
        logging.debug('qsub_command = %s, working_dir=%s', qsub_command, working_dir)
        subprocess.run(qsub_command, cwd=working_dir, check=True, shell=True)


class DummyCluster(ICluster):

    def __init__(self):
        cluster_id = 'dummy'
        super().__init__(cluster_id, ClusterNodeDb(cluster_id))

    def path_is_reachable_by_compute_nodes(self, path: Path):
        return True

    def submit_job(self, qsub_args: List[str], exec_path: Path, exec_args: List[str], working_dir: Path):
        qsub_command = f'qsub {" ".join(qsub_args)} {exec_path} {" ".join(exec_args)}'
        logging.info('executing %s as a replacement of qsub_command %s, working_dir=%s', exec_path, qsub_command, working_dir)
        subprocess.run(exec_path, check=True, cwd=working_dir)


class ClusterFactory(metaclass=Singleton):

    def __init__(self):
        pass

    def create_cluster(self, cluster_id: ClusterId) -> ICluster:
        cluster = {
            'dummy': DummyCluster(),
            'physix': IprCluster('physix'),
            'alambix': IprCluster('alambix')
        }[cluster_id]
        return cluster


def duplicate_this_virtualenv_to(duplicate_virtualenv_path: Path):
    this_virtualenv_path = Path(getenv('VIRTUAL_ENV'))  # eg /home/graffy/work/starbench/iprbench.git/iprbench.venv
    assert this_virtualenv_path.exists(), f'failed to find the root the virtual environment in use (VIRTUAL_ENV environment variable has the value {this_virtualenv_path})'

    if duplicate_virtualenv_path.exists():
        shutil.rmtree(duplicate_virtualenv_path)

    cloner_virtualenv_path = Path('/tmp/venv_cloner.venv')
    venv.create(cloner_virtualenv_path, with_pip=True)
    subprocess.run(f'source {cloner_virtualenv_path}/bin/activate; pip install virtualenv-clone', shell=True, check=True, executable='/bin/bash')
    subprocess.run(f'source {cloner_virtualenv_path}/bin/activate; virtualenv-clone {this_virtualenv_path} {duplicate_virtualenv_path}', shell=True, check=True, executable='/bin/bash')
    shutil.rmtree(cloner_virtualenv_path)


def archive_this_virtualenv_to(venv_archive_path: Path, venv_hardcoded_path: Path):
    duplicate_this_virtualenv_to(venv_hardcoded_path)
    subprocess.run(f'tar czvf {venv_archive_path} {venv_hardcoded_path.relative_to(venv_hardcoded_path.parent)}', shell=True, check=True, cwd=venv_hardcoded_path.parent, stdout=subprocess.DEVNULL)


def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, host_group_id: HostGroupId, results_dir: Path, cluster: ICluster, resultsdb_params: ResultsDbParams, target_system_type_id: HostTypeId):

    compiler_id: CompilerId = benchmark_config['fortran_compiler']

    cluster_db = cluster.get_cluster_db()

    (hosts, num_cores) = cluster_db.get_host_group_info(host_group_id)
    if len(hosts) == 0:
        logging.warning('skipping benchmarks with compiler %s on architecture %s because no hosts are available for it', compiler_id, host_group_id)
        return

    benchmark_config['num_cores'] = num_cores  # we expect the benchmark to have the parameter num_cores

    makedirs(results_dir, exist_ok=True)

    this_bench_dir = Path(f'{results_dir}/{host_group_id}')
    makedirs(this_bench_dir, exist_ok=True)

    starbench_job_path = this_bench_dir / 'starbench.job'

    job_venv_archive_path = results_dir / 'iprbench.venv.tgz'
    iprbench_venv_hardcoded_path = Path('/tmp') / 'iprbench.venv'
    if job_venv_archive_path.exists():
        logging.info('skipping the creation of %s because it already exists (probably created for other jobs of the same bench)', job_venv_archive_path)
    else:
        # freeze this virtualenv so that all jobs related to this benchmark will use the same version of iprbench
        logging.info('creating %s (the virtual environment that will be used in this bench by all its jobs at some point)', job_venv_archive_path)
        archive_this_virtualenv_to(job_venv_archive_path, iprbench_venv_hardcoded_path)

    logging.debug("type of resultsdb_params = %s", type(resultsdb_params))
    logging.debug("resultsdb_params = %s", resultsdb_params)
    logging.debug("resultsdb_params = %s", json.dumps(resultsdb_params))

    # create the job file (which embeds starbench.py)
    tags_dict = {
        # '<include:starbench.py>': scripts_dir / 'starbench.py',
        '<benchmark_id>': str(benchmark.bench_id),
        '<starbench_job_path>': str(starbench_job_path),
        '<iprbench_venv_hardcoded_path>': str(iprbench_venv_hardcoded_path),
        '<iprbench_venv_archive_path>': str(job_venv_archive_path),
        '<benchmark_config>': json.dumps(benchmark_config).replace('"', r'\"'),
        '<results_dir>': str(results_dir),
        '<resultsdb_params>': json.dumps(resultsdb_params).replace('"', r'\"'),
        '<num_cores>': str(num_cores),
        '<target_system_type_id>': str(target_system_type_id),
    }
    logging.debug('tags_dict = %s', str(tags_dict))
    with importlib.resources.path('iprbench.resources', 'clusterbench-template.job') as job_template_path:
        # job_template_path = importlib.resources..files('iprbench.resources') / 'hibench' / 'starbench-template.job'
        substitute_tags(input_file_path=job_template_path, tags_dict=tags_dict, output_file_path=starbench_job_path)
    subprocess.run(['chmod', 'a+x', starbench_job_path], check=True)

    ram_requirements = benchmark.get_ram_requirements(benchmark_config)
    ram_per_core = f'{ram_requirements / num_cores / 1.e9}G'

    qsub_args = []
    qsub_args += ['-pe', 'smp', f'{num_cores}']
    qsub_args += ['-l', f'"hostname={"|".join(hosts)}"']
    qsub_args += ['-S', '/bin/bash']
    qsub_args += ['-cwd']
    qsub_args += ['-m', 'ae']
    qsub_args += ['-l', f'mem_available={ram_per_core}']
    qsub_args += ['-j', 'y']  # merge stderr file into stdout file for easier reading of history of events
    qsub_args += ['-N', f'hibench_{host_group_id}']

    logging.debug('qsub_args = %s', str(qsub_args))

    exec_path = starbench_job_path
    exec_args = []

    cluster.submit_job(qsub_args, exec_path, exec_args, this_bench_dir)


def launch_perf_jobs(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, results_dir: Path, cluster: ICluster, arch_regexp: str, resultsdb_params: ResultsDbParams, target_system_type_id: HostTypeId):
    """
    results_dir: where the results of the benchmark are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)
    """

    cluster_db = ClusterNodeDb()
    all_host_groups = cluster_db.cpu_defs.keys()

    logging.info('available host groups: %s', all_host_groups)
    host_groups = [host_group for host_group in all_host_groups if re.match(arch_regexp, host_group) is not None]
    logging.info('requested host groups: %s', host_groups)

    for host_group in host_groups:
        launch_job_for_host_group(benchmark, benchmark_config, host_group, results_dir, cluster, resultsdb_params, target_system_type_id)


def main():
    logging.basicConfig(level=logging.DEBUG)
    example_text = '''example:

    %(prog)s --benchmark-id 'mamul1' --config '{"matrix_size": 1024, "num_loops":10}' --results-dir /tmp/mamul1_out

    '''

    arg_parser = argparse.ArgumentParser(description='submits a benchmark on the compute cluster (assuming this is running from a sge cluster machine where qsub command is available)', epilog=example_text, formatter_class=argparse.RawDescriptionHelpFormatter)
    arg_parser.add_argument('--cluster-id', type=ClusterId, required=True, help='the identifier of cluster on which to submit the benchmark eg (\'dummy\', \'alambix\', etc.)')
    arg_parser.add_argument('--benchmark-id', type=BenchmarkId, required=True, help='the benchmark id of the benchmark to perform (eg mamul1)')
    arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)')
    arg_parser.add_argument('--config', type=str, default='cmake', help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}')
    arg_parser.add_argument('--arch-regexp', type=str, default='.*', help='the regular expression for the architectures the benchmark is allowed to run on (eg "intel_xeon_.*"). By defauls, all available architectures are allowed.')
    arg_parser.add_argument('--resultsdb-params', type=str, required=True, help='the resultsdb configuration in json format, eg {"type": "tsv-files", "tsv_results_dir": "/tmp/toto"}')
    arg_parser.add_argument('--target-system-type-id', type=str, required=True, help='id of the operating system type to use. This is used to get the list installed packages, how to activate them, etc, eg "debian", "fr.univ-rennes.ipr.cluster-node".')

    args = arg_parser.parse_args()
    benchmark_id = ClusterId(args.benchmark_id)

    results_dir = Path(args.results_dir)
    arch_regexp = args.arch_regexp
    benchmark_config = json.loads(args.config)

    cluster = ClusterFactory().create_cluster(args.cluster_id)
    resultsdb_params = json.loads(args.resultsdb_params)

    # TODO: put the declaration of common params in a common function so that there is only one set of common parameters
    common_params: List[BenchParam] = []
    common_params.append(BenchParam('launcher', BenchParam.Type.PARAM_TYPE_STRING, description='what triggered the benchmark (eg "alambix.job.12345", or "manual")'))

    benchmark = BenchmarkFactory().create_benchmark(benchmark_id, common_params)

    target_system_type_id = HostTypeId(args.target_system_type_id)

    if not cluster.path_is_reachable_by_compute_nodes(results_dir):
        raise ValueError('the results path is expected to be on a disk that is accessible to all cluster nodes, and it doesn\'t seem to be the case for {results_dir}')

    launch_perf_jobs(benchmark, benchmark_config, results_dir, cluster, arch_regexp, resultsdb_params, target_system_type_id)