added the ability for the user to choose the resultsdb backend.

This is needed to use a different backend for tests and for production

work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3958]
This commit is contained in:
Guillaume Raffy 2024-10-25 19:19:28 +02:00
parent 12cc0c0c8a
commit 2a91af37ff
7 changed files with 109 additions and 18 deletions

View File

@ -3,7 +3,7 @@
from typing import List, Tuple, Dict
import argparse
from os import getenv, makedirs
from .core import IBenchmark, BenchmarkConfig, BenchmarkId
from .core import IBenchmark, BenchmarkConfig, BenchmarkId, ResultsDbParams
from .main import BenchmarkFactory
from .util import Singleton
import shutil
@ -271,7 +271,7 @@ def archive_this_virtualenv_to(venv_archive_path: Path, venv_hardcoded_path: Pat
subprocess.run(f'tar czvf {venv_archive_path} {venv_hardcoded_path.relative_to(venv_hardcoded_path.parent)}', shell=True, check=True, cwd=venv_hardcoded_path.parent, stdout=subprocess.DEVNULL)
def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, host_group_id: HostGroupId, results_dir: Path, cluster: ICluster):
def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, host_group_id: HostGroupId, results_dir: Path, cluster: ICluster, resultsdb_params: ResultsDbParams):
compiler_id: CompilerId = benchmark_config['compiler_id']
@ -300,6 +300,10 @@ def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: Benchmark
logging.info('creating %s (the virtual environment that will be used in this bench by all its jobs at some point)', job_venv_archive_path)
archive_this_virtualenv_to(job_venv_archive_path, iprbench_venv_hardcoded_path)
logging.debug("type of resultsdb_params = %s", type(resultsdb_params))
logging.debug("resultsdb_params = %s", resultsdb_params)
logging.debug("resultsdb_params = %s", json.dumps(resultsdb_params))
# create the job file (which embeds starbench.py)
tags_dict = {
# '<include:starbench.py>': scripts_dir / 'starbench.py',
@ -309,6 +313,7 @@ def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: Benchmark
'<iprbench_venv_archive_path>': str(job_venv_archive_path),
'<benchmark_config>': json.dumps(benchmark_config).replace('"', r'\"'),
'<results_dir>': str(results_dir),
'<resultsdb_params>': json.dumps(resultsdb_params).replace('"', r'\"'),
'<num_cores>': str(num_cores),
}
logging.debug('tags_dict = %s', str(tags_dict))
@ -338,7 +343,7 @@ def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: Benchmark
cluster.submit_job(qsub_args, exec_path, exec_args, this_bench_dir)
def launch_perf_jobs(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, results_dir: Path, cluster: ICluster, arch_regexp: str):
def launch_perf_jobs(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, results_dir: Path, cluster: ICluster, arch_regexp: str, resultsdb_params: ResultsDbParams):
"""
results_dir: where the results of the benchmark are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)
"""
@ -351,7 +356,7 @@ def launch_perf_jobs(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, r
logging.info('requested host groups: %s', host_groups)
for host_group in host_groups:
launch_job_for_host_group(benchmark, benchmark_config, host_group, results_dir, cluster)
launch_job_for_host_group(benchmark, benchmark_config, host_group, results_dir, cluster, resultsdb_params)
def main():
@ -368,6 +373,7 @@ def main():
arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)')
arg_parser.add_argument('--config', type=str, default='cmake', help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}')
arg_parser.add_argument('--arch-regexp', type=str, default='.*', help='the regular expression for the architectures the benchmark is allowed to run on (eg "intel_xeon_.*"). By defauls, all available architectures are allowed.')
arg_parser.add_argument('--resultsdb-params', type=str, required=True, help='the resultsdb configuration in json format, eg {"type": "tsv-files", "tsv_results_dir": "/tmp/toto"}')
args = arg_parser.parse_args()
benchmark_id = ClusterId(args.benchmark_id)
@ -378,8 +384,9 @@ def main():
benchmark_config = json.loads(args.config)
cluster = ClusterFactory().create_cluster(args.cluster_id)
resultsdb_params = json.loads(args.resultsdb_params)
if not cluster.path_is_reachable_by_compute_nodes(results_dir):
raise ValueError('the results path is expected to be on a disk that is accessible to all cluster nodes, and it doesn\'t seem to be the case for {results_dir}')
launch_perf_jobs(benchmark, benchmark_config, results_dir, cluster, arch_regexp)
launch_perf_jobs(benchmark, benchmark_config, results_dir, cluster, arch_regexp, resultsdb_params)

View File

@ -1,10 +1,12 @@
from typing import List, Dict, Union
from typing import List, Dict, Union, Any
from enum import Enum
import abc
from pathlib import Path
from datetime import datetime
from .util import Singleton
BenchmarkId = str # a unique name for a benchmark, eg 'matmul1'
ResultsDbId = str # a unique name for a results database, eg 'tsv-files'
BenchParamId = str
BenchParamType = Union[int, str, float, datetime]
BenchmarkConfig = Dict[BenchParamId, BenchParamType] # eg { 'compiler_id': 'gfortran', 'matrix_size': 1024 }
@ -41,11 +43,14 @@ BenchmarkOutputParams = List[BenchParam]
class IBenchmark(abc.ABC):
# resultsdb_id: ResultsDbId
bench_id: BenchmarkId # a unique name for this benchmark, eg 'matmul1'
bench_params: BenchmarkInputParams
out_params: BenchmarkOutputParams
# def __init__(self, resultsdb_id: ResultsDbId, bench_id: str, bench_params: BenchmarkInputParams, out_params: BenchmarkOutputParams):
def __init__(self, bench_id: str, bench_params: BenchmarkInputParams, out_params: BenchmarkOutputParams):
# self.resultsdb_id = resultsdb_id
self.bench_id = bench_id
self.bench_params = bench_params
self.out_params = out_params
@ -139,3 +144,35 @@ class IResultsDb(abc.ABC):
@abc.abstractmethod
def get_table(self, benchmark: IBenchmark) -> IResultsTable:
pass
ResultsDbParams = Dict[str, Any]
class IResultsDbCreator(abc.ABC):
resultsdb_id: ResultsDbId
def __init__(self, resultsdb_id: ResultsDbId):
self.resultsdb_id = resultsdb_id
def get_resultsdb_id(self) -> ResultsDbId:
return self.resultsdb_id
@abc.abstractmethod
def create_resultsdb(self, resultsdb_config: ResultsDbParams) -> IResultsDb:
pass
class ResultsDbFactory(metaclass=Singleton):
resultsdb_creators: Dict[ResultsDbId, IResultsDbCreator]
def __init__(self):
self.resultsdb_creators = {}
def register_resultsdb_creator(self, resultsdb_creator: IResultsDbCreator):
self.resultsdb_creators[resultsdb_creator.get_resultsdb_id()] = resultsdb_creator
def create_resultsdb(self, resultsdb_id: ResultsDbId, params: ResultsDbParams) -> IResultsDb:
return self.resultsdb_creators[resultsdb_id].create_resultsdb(params)

View File

@ -1,7 +1,7 @@
from .core import BenchmarkId, IBenchmark
from .core import BenchmarkId, IBenchmark, ResultsDbFactory
from .benchmarks.hibench import HiBench
from .benchmarks.mamul1 import MaMul1
from .resultsdb.tsvresultsdb import TsvResultsDb
from .resultsdb.tsvresultsdb import TsvResultsDbCreator
from .util import Singleton
from .autoparams import MeasurementTime
import logging
@ -40,7 +40,8 @@ def main():
arg_parser = argparse.ArgumentParser(description='executes a benchmark in a cluster job environment', epilog=example_text, formatter_class=argparse.RawDescriptionHelpFormatter)
arg_parser.add_argument('--benchmark-id', type=BenchmarkId, required=True, help='the benchmark id of the benchmark to perform (eg mamul1)')
arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)')
arg_parser.add_argument('--config', type=str, default='cmake', help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}')
arg_parser.add_argument('--config', type=str, required=True, help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}')
arg_parser.add_argument('--resultsdb-params', type=str, required=True, help='the resultsdb configuration in json format, eg {"type": "tsv-files", "tsv_results_dir": "/tmp/toto"}')
args = arg_parser.parse_args()
@ -50,7 +51,10 @@ def main():
benchmark.validate_config(benchmark_config)
results_dir = args.results_dir
results_db = TsvResultsDb(results_dir / 'results')
ResultsDbFactory().register_resultsdb_creator(TsvResultsDbCreator())
resultsdb_params = json.loads(args.resultsdb_params)
results_db = ResultsDbFactory().create_resultsdb(resultsdb_params['type'], resultsdb_params)
results_db.add_auto_param(MeasurementTime())
results_table = results_db.get_table(benchmark)

View File

@ -55,7 +55,7 @@ num_cores=${NSLOTS}
# launch the benchmark
command="iprbench-run --benchmark-id '<benchmark_id>' --config '<benchmark_config>' --results-dir '${output_dir}'"
command="iprbench-run --benchmark-id '<benchmark_id>' --config '<benchmark_config>' --results-dir '${output_dir}' --resultsdb-params '<resultsdb_params>'"
echo "command: ${command}"
eval ${command}

View File

@ -1,7 +1,7 @@
import logging
import pandas as pd
from pathlib import Path
from ..core import IResultsDb, IResultsTable, BenchmarkParamValues, IBenchmark
from ..core import IResultsDb, IResultsTable, BenchmarkParamValues, IBenchmark, IResultsDbCreator, ResultsDbParams
class TsvResultsTable(IResultsTable):
@ -39,3 +39,12 @@ class TsvResultsDb(IResultsDb):
def get_table(self, benchmark: IBenchmark) -> IResultsTable:
table = TsvResultsTable(benchmark, self, self.tsv_results_dir)
return table
class TsvResultsDbCreator(IResultsDbCreator):
def __init__(self):
super().__init__('tsv-files')
def create_resultsdb(self, resultsdb_config: ResultsDbParams) -> IResultsDb:
return TsvResultsDb(Path(resultsdb_config['tsv_results_dir']))

View File

@ -2,11 +2,12 @@ import unittest
import logging
import subprocess
import json
from pathlib import Path
class ClusterBenchTestCase(unittest.TestCase):
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
def setUp(self) -> None: # pylint: disable=useless-parent-delegation
return super().setUp()
@ -14,18 +15,34 @@ class ClusterBenchTestCase(unittest.TestCase):
def test_clusterbench_submit(self):
logging.info('test_clusterbench_submit')
subprocess.run('pip list', shell=True, check=True, executable='/bin/bash')
command = 'clusterbench-submit --cluster-id \'dummy\' --arch-regexp "intel_core.*" --benchmark-id \'mamul1\' --config \'{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10}\' --results-dir /tmp/mamul1_out'
results_dir = Path('/tmp/mamul1_out')
config = {
'compiler_id': 'gfortran',
'matrix_size': 1024,
'num_loops': 10,
}
resultsdb_params = {
'type': 'tsv-files',
'tsv_results_dir': f'{results_dir / "results"}'
}
command = f'clusterbench-submit --cluster-id \'dummy\' --arch-regexp "intel_core.*" --benchmark-id \'mamul1\' --config \'{json.dumps(config)}\' --results-dir {results_dir} --resultsdb-params \'{json.dumps(resultsdb_params)}\''
subprocess.run(command, shell=True, check=True, executable='/bin/bash')
def test_clusterbench_hibench(self):
logging.info('test_clusterbench_hibench')
results_dir = Path('/tmp/hibench_out')
config = {
'compiler_id': 'gfortran',
'test_id': 'arch4_quick',
'hibridon_version': 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad',
'cmake_path': 'cmake',
}
command = f'clusterbench-submit --cluster-id \'dummy\' --benchmark-id \'hibench\' --config \'{json.dumps(config)}\' --results-dir /tmp/hibench_out'
resultsdb_params = {
'type': 'tsv-files',
'tsv_results_dir': f'{results_dir / "results"}'
}
command = f'clusterbench-submit --cluster-id \'dummy\' --benchmark-id \'hibench\' --config \'{json.dumps(config)}\' --results-dir {results_dir} --resultsdb-params \'{json.dumps(resultsdb_params)}\''
subprocess.run(command, shell=True, check=True, executable='/bin/bash')

View File

@ -2,12 +2,13 @@ import unittest
import logging
import subprocess
import json
from pathlib import Path
# import importlib.resources
class IprBenchTestCase(unittest.TestCase):
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
def setUp(self) -> None: # pylint: disable=useless-parent-delegation
return super().setUp()
@ -20,11 +21,23 @@ class IprBenchTestCase(unittest.TestCase):
# with open(src_dir / 'CMakeLists.txt', encoding='utf8') as f:
# print(f.readlines())
# subprocess.run(f'cat {src_dir / "CMakeLists.txt"}', check=True)
command = 'iprbench-run --benchmark-id \'mamul1\' --config \'{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10, "num_cores":2}\' --results-dir /tmp/mamul1_out'
results_dir = Path('/tmp/mamul1_out')
config = {
'compiler_id': 'gfortran',
'matrix_size': 1024,
'num_loops': 10,
'num_cores': 2
}
resultsdb_params = {
'type': 'tsv-files',
'tsv_results_dir': f'{results_dir / "results"}'
}
command = f'iprbench-run --benchmark-id \'mamul1\' --config \'{json.dumps(config)}\' --results-dir {results_dir} --resultsdb-params \'{json.dumps(resultsdb_params)}\''
subprocess.run(command, shell=True, check=True, executable='/bin/bash')
def test_iprbench_hibench(self):
logging.info('test_iprbench_hibench')
results_dir = Path('/tmp/hibench_out')
config = {
'compiler_id': 'gfortran',
'test_id': 'arch4_quick',
@ -32,7 +45,11 @@ class IprBenchTestCase(unittest.TestCase):
'cmake_path': 'cmake',
'num_cores': 2,
}
command = f'iprbench-run --benchmark-id \'hibench\' --config \'{json.dumps(config)}\' --results-dir /tmp/hibench_out'
resultsdb_params = {
'type': 'tsv-files',
'tsv_results_dir': f'{results_dir / "results"}'
}
command = f'iprbench-run --benchmark-id \'hibench\' --config \'{json.dumps(config)}\' --results-dir {results_dir} --resultsdb-params \'{json.dumps(resultsdb_params)}\''
subprocess.run(command, shell=True, check=True, executable='/bin/bash')