From 2a91af37ffa878c9a78a0c21468835b73c32711b Mon Sep 17 00:00:00 2001
From: Guillaume Raffy <guillaume.raffy@univ-rennes1.fr>
Date: Fri, 25 Oct 2024 19:19:28 +0200
Subject: [PATCH] added the ability for the user to choose the resultsdb
 backend.

This is needed to use a different backend for tests and for production

work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3958]
---
 iprbench/clusterbench.py                     | 17 ++++++---
 iprbench/core.py                             | 39 +++++++++++++++++++-
 iprbench/main.py                             | 12 ++++--
 iprbench/resources/clusterbench-template.job |  2 +-
 iprbench/resultsdb/tsvresultsdb.py           | 11 +++++-
 test/test_clusterbench.py                    | 23 ++++++++++--
 test/test_iprbench.py                        | 23 ++++++++++--
 7 files changed, 109 insertions(+), 18 deletions(-)

diff --git a/iprbench/clusterbench.py b/iprbench/clusterbench.py
index 98f856b..b9d6e38 100755
--- a/iprbench/clusterbench.py
+++ b/iprbench/clusterbench.py
@@ -3,7 +3,7 @@
 from typing import List, Tuple, Dict
 import argparse
 from os import getenv, makedirs
-from .core import IBenchmark, BenchmarkConfig, BenchmarkId
+from .core import IBenchmark, BenchmarkConfig, BenchmarkId, ResultsDbParams
 from .main import BenchmarkFactory
 from .util import Singleton
 import shutil
@@ -271,7 +271,7 @@ def archive_this_virtualenv_to(venv_archive_path: Path, venv_hardcoded_path: Pat
     subprocess.run(f'tar czvf {venv_archive_path} {venv_hardcoded_path.relative_to(venv_hardcoded_path.parent)}', shell=True, check=True, cwd=venv_hardcoded_path.parent, stdout=subprocess.DEVNULL)
 
 
-def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, host_group_id: HostGroupId, results_dir: Path, cluster: ICluster):
+def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, host_group_id: HostGroupId, results_dir: Path, cluster: ICluster, resultsdb_params: ResultsDbParams):
 
     compiler_id: CompilerId = benchmark_config['compiler_id']
 
@@ -300,6 +300,10 @@ def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: Benchmark
         logging.info('creating %s (the virtual environment that will be used in this bench by all its jobs at some point)', job_venv_archive_path)
         archive_this_virtualenv_to(job_venv_archive_path, iprbench_venv_hardcoded_path)
 
+    logging.debug("type of resultsdb_params = %s", type(resultsdb_params))
+    logging.debug("resultsdb_params = %s", resultsdb_params)
+    logging.debug("resultsdb_params = %s", json.dumps(resultsdb_params))
+
     # create the job file (which embeds starbench.py)
     tags_dict = {
         # '<include:starbench.py>': scripts_dir / 'starbench.py',
@@ -309,6 +313,7 @@ def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: Benchmark
         '<iprbench_venv_archive_path>': str(job_venv_archive_path),
         '<benchmark_config>': json.dumps(benchmark_config).replace('"', r'\"'),
         '<results_dir>': str(results_dir),
+        '<resultsdb_params>': json.dumps(resultsdb_params).replace('"', r'\"'),
         '<num_cores>': str(num_cores),
     }
     logging.debug('tags_dict = %s', str(tags_dict))
@@ -338,7 +343,7 @@ def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: Benchmark
     cluster.submit_job(qsub_args, exec_path, exec_args, this_bench_dir)
 
 
-def launch_perf_jobs(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, results_dir: Path, cluster: ICluster, arch_regexp: str):
+def launch_perf_jobs(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, results_dir: Path, cluster: ICluster, arch_regexp: str, resultsdb_params: ResultsDbParams):
     """
     results_dir: where the results of the benchmark are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)
     """
@@ -351,7 +356,7 @@ def launch_perf_jobs(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, r
     logging.info('requested host groups: %s', host_groups)
 
     for host_group in host_groups:
-        launch_job_for_host_group(benchmark, benchmark_config, host_group, results_dir, cluster)
+        launch_job_for_host_group(benchmark, benchmark_config, host_group, results_dir, cluster, resultsdb_params)
 
 
 def main():
@@ -368,6 +373,7 @@ def main():
     arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)')
     arg_parser.add_argument('--config', type=str, default='cmake', help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}')
     arg_parser.add_argument('--arch-regexp', type=str, default='.*', help='the regular expression for the architectures the benchmark is allowed to run on (eg "intel_xeon_.*"). By defauls, all available architectures are allowed.')
+    arg_parser.add_argument('--resultsdb-params', type=str, required=True, help='the resultsdb configuration in json format, eg {"type": "tsv-files", "tsv_results_dir": "/tmp/toto"}')
 
     args = arg_parser.parse_args()
     benchmark_id = ClusterId(args.benchmark_id)
@@ -378,8 +384,9 @@ def main():
     benchmark_config = json.loads(args.config)
 
     cluster = ClusterFactory().create_cluster(args.cluster_id)
+    resultsdb_params = json.loads(args.resultsdb_params)
 
     if not cluster.path_is_reachable_by_compute_nodes(results_dir):
         raise ValueError('the results path is expected to be on a disk that is accessible to all cluster nodes, and it doesn\'t seem to be the case for {results_dir}')
 
-    launch_perf_jobs(benchmark, benchmark_config, results_dir, cluster, arch_regexp)
+    launch_perf_jobs(benchmark, benchmark_config, results_dir, cluster, arch_regexp, resultsdb_params)
diff --git a/iprbench/core.py b/iprbench/core.py
index f61f87f..12356db 100644
--- a/iprbench/core.py
+++ b/iprbench/core.py
@@ -1,10 +1,12 @@
-from typing import List, Dict, Union
+from typing import List, Dict, Union, Any
 from enum import Enum
 import abc
 from pathlib import Path
 from datetime import datetime
+from .util import Singleton
 
 BenchmarkId = str  # a unique name for a benchmark, eg 'matmul1'
+ResultsDbId = str  # a unique name for a results database, eg 'tsv-files'
 BenchParamId = str
 BenchParamType = Union[int, str, float, datetime]
 BenchmarkConfig = Dict[BenchParamId, BenchParamType]  # eg { 'compiler_id': 'gfortran', 'matrix_size': 1024 }
@@ -41,11 +43,14 @@ BenchmarkOutputParams = List[BenchParam]
 
 class IBenchmark(abc.ABC):
 
+    # resultsdb_id: ResultsDbId
     bench_id: BenchmarkId  # a unique name for this benchmark, eg 'matmul1'
     bench_params: BenchmarkInputParams
     out_params: BenchmarkOutputParams
 
+    # def __init__(self, resultsdb_id: ResultsDbId, bench_id: str, bench_params: BenchmarkInputParams, out_params: BenchmarkOutputParams):
     def __init__(self, bench_id: str, bench_params: BenchmarkInputParams, out_params: BenchmarkOutputParams):
+        # self.resultsdb_id = resultsdb_id
         self.bench_id = bench_id
         self.bench_params = bench_params
         self.out_params = out_params
@@ -139,3 +144,35 @@ class IResultsDb(abc.ABC):
     @abc.abstractmethod
     def get_table(self, benchmark: IBenchmark) -> IResultsTable:
         pass
+
+
+ResultsDbParams = Dict[str, Any]
+
+
+class IResultsDbCreator(abc.ABC):
+    resultsdb_id: ResultsDbId
+
+    def __init__(self, resultsdb_id: ResultsDbId):
+        self.resultsdb_id = resultsdb_id
+
+    def get_resultsdb_id(self) -> ResultsDbId:
+        return self.resultsdb_id
+
+    @abc.abstractmethod
+    def create_resultsdb(self, resultsdb_config: ResultsDbParams) -> IResultsDb:
+        pass
+
+
+class ResultsDbFactory(metaclass=Singleton):
+
+    resultsdb_creators: Dict[ResultsDbId, IResultsDbCreator]
+
+    def __init__(self):
+        self.resultsdb_creators = {}
+
+    def register_resultsdb_creator(self, resultsdb_creator: IResultsDbCreator):
+        self.resultsdb_creators[resultsdb_creator.get_resultsdb_id()] = resultsdb_creator
+
+    def create_resultsdb(self, resultsdb_id: ResultsDbId, params: ResultsDbParams) -> IResultsDb:
+        return self.resultsdb_creators[resultsdb_id].create_resultsdb(params)
+
diff --git a/iprbench/main.py b/iprbench/main.py
index 5608ef9..50d4b0c 100644
--- a/iprbench/main.py
+++ b/iprbench/main.py
@@ -1,7 +1,7 @@
-from .core import BenchmarkId, IBenchmark
+from .core import BenchmarkId, IBenchmark, ResultsDbFactory
 from .benchmarks.hibench import HiBench
 from .benchmarks.mamul1 import MaMul1
-from .resultsdb.tsvresultsdb import TsvResultsDb
+from .resultsdb.tsvresultsdb import TsvResultsDbCreator 
 from .util import Singleton
 from .autoparams import MeasurementTime
 import logging
@@ -40,7 +40,8 @@ def main():
     arg_parser = argparse.ArgumentParser(description='executes a benchmark in a cluster job environment', epilog=example_text, formatter_class=argparse.RawDescriptionHelpFormatter)
     arg_parser.add_argument('--benchmark-id', type=BenchmarkId, required=True, help='the benchmark id of the benchmark to perform (eg mamul1)')
     arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)')
-    arg_parser.add_argument('--config', type=str, default='cmake', help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}')
+    arg_parser.add_argument('--config', type=str, required=True, help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}')
+    arg_parser.add_argument('--resultsdb-params', type=str, required=True, help='the resultsdb configuration in json format, eg {"type": "tsv-files", "tsv_results_dir": "/tmp/toto"}')
 
     args = arg_parser.parse_args()
 
@@ -50,7 +51,10 @@ def main():
     benchmark.validate_config(benchmark_config)
     results_dir = args.results_dir
 
-    results_db = TsvResultsDb(results_dir / 'results')
+    ResultsDbFactory().register_resultsdb_creator(TsvResultsDbCreator())
+    resultsdb_params = json.loads(args.resultsdb_params)
+    results_db = ResultsDbFactory().create_resultsdb(resultsdb_params['type'], resultsdb_params)
+
     results_db.add_auto_param(MeasurementTime())
     results_table = results_db.get_table(benchmark)
 
diff --git a/iprbench/resources/clusterbench-template.job b/iprbench/resources/clusterbench-template.job
index be9e905..5a95206 100644
--- a/iprbench/resources/clusterbench-template.job
+++ b/iprbench/resources/clusterbench-template.job
@@ -55,7 +55,7 @@ num_cores=${NSLOTS}
 
 
 # launch the benchmark
-command="iprbench-run --benchmark-id '<benchmark_id>' --config '<benchmark_config>' --results-dir '${output_dir}'"
+command="iprbench-run --benchmark-id '<benchmark_id>' --config '<benchmark_config>' --results-dir '${output_dir}' --resultsdb-params '<resultsdb_params>'"
 
 echo "command: ${command}"
 eval ${command}
diff --git a/iprbench/resultsdb/tsvresultsdb.py b/iprbench/resultsdb/tsvresultsdb.py
index a1971b8..12d22fd 100644
--- a/iprbench/resultsdb/tsvresultsdb.py
+++ b/iprbench/resultsdb/tsvresultsdb.py
@@ -1,7 +1,7 @@
 import logging
 import pandas as pd
 from pathlib import Path
-from ..core import IResultsDb, IResultsTable, BenchmarkParamValues, IBenchmark
+from ..core import IResultsDb, IResultsTable, BenchmarkParamValues, IBenchmark, IResultsDbCreator, ResultsDbParams
 
 
 class TsvResultsTable(IResultsTable):
@@ -39,3 +39,12 @@ class TsvResultsDb(IResultsDb):
     def get_table(self, benchmark: IBenchmark) -> IResultsTable:
         table = TsvResultsTable(benchmark, self, self.tsv_results_dir)
         return table
+
+
+class TsvResultsDbCreator(IResultsDbCreator):
+
+    def __init__(self):
+        super().__init__('tsv-files')
+
+    def create_resultsdb(self, resultsdb_config: ResultsDbParams) -> IResultsDb:
+        return TsvResultsDb(Path(resultsdb_config['tsv_results_dir']))
diff --git a/test/test_clusterbench.py b/test/test_clusterbench.py
index f8a2971..b3edecf 100644
--- a/test/test_clusterbench.py
+++ b/test/test_clusterbench.py
@@ -2,11 +2,12 @@ import unittest
 import logging
 import subprocess
 import json
+from pathlib import Path
 
 
 class ClusterBenchTestCase(unittest.TestCase):
 
-    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+    logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
 
     def setUp(self) -> None:  # pylint: disable=useless-parent-delegation
         return super().setUp()
@@ -14,18 +15,34 @@ class ClusterBenchTestCase(unittest.TestCase):
     def test_clusterbench_submit(self):
         logging.info('test_clusterbench_submit')
         subprocess.run('pip list', shell=True, check=True, executable='/bin/bash')
-        command = 'clusterbench-submit --cluster-id \'dummy\' --arch-regexp "intel_core.*" --benchmark-id \'mamul1\' --config \'{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10}\' --results-dir /tmp/mamul1_out'
+
+        results_dir = Path('/tmp/mamul1_out')
+        config = {
+            'compiler_id': 'gfortran',
+            'matrix_size': 1024,
+            'num_loops': 10,
+        }
+        resultsdb_params = {
+            'type': 'tsv-files',
+            'tsv_results_dir': f'{results_dir / "results"}'
+        }
+        command = f'clusterbench-submit --cluster-id \'dummy\' --arch-regexp "intel_core.*" --benchmark-id \'mamul1\' --config \'{json.dumps(config)}\' --results-dir {results_dir} --resultsdb-params \'{json.dumps(resultsdb_params)}\''
         subprocess.run(command, shell=True, check=True, executable='/bin/bash')
 
     def test_clusterbench_hibench(self):
         logging.info('test_clusterbench_hibench')
+        results_dir = Path('/tmp/hibench_out')
         config = {
             'compiler_id': 'gfortran',
             'test_id': 'arch4_quick',
             'hibridon_version': 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad',
             'cmake_path': 'cmake',
         }
-        command = f'clusterbench-submit --cluster-id \'dummy\' --benchmark-id \'hibench\' --config \'{json.dumps(config)}\' --results-dir /tmp/hibench_out'
+        resultsdb_params = {
+            'type': 'tsv-files',
+            'tsv_results_dir': f'{results_dir / "results"}'
+        }
+        command = f'clusterbench-submit --cluster-id \'dummy\' --benchmark-id \'hibench\' --config \'{json.dumps(config)}\' --results-dir {results_dir} --resultsdb-params \'{json.dumps(resultsdb_params)}\''
         subprocess.run(command, shell=True, check=True, executable='/bin/bash')
 
 
diff --git a/test/test_iprbench.py b/test/test_iprbench.py
index db00f37..87ec39f 100644
--- a/test/test_iprbench.py
+++ b/test/test_iprbench.py
@@ -2,12 +2,13 @@ import unittest
 import logging
 import subprocess
 import json
+from pathlib import Path
 # import importlib.resources
 
 
 class IprBenchTestCase(unittest.TestCase):
 
-    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+    logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
 
     def setUp(self) -> None:  # pylint: disable=useless-parent-delegation
         return super().setUp()
@@ -20,11 +21,23 @@ class IprBenchTestCase(unittest.TestCase):
         #     with open(src_dir / 'CMakeLists.txt', encoding='utf8') as f:
         #         print(f.readlines())
         #     subprocess.run(f'cat {src_dir / "CMakeLists.txt"}', check=True)
-        command = 'iprbench-run --benchmark-id \'mamul1\' --config \'{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10, "num_cores":2}\' --results-dir /tmp/mamul1_out'
+        results_dir = Path('/tmp/mamul1_out')
+        config = {
+            'compiler_id': 'gfortran',
+            'matrix_size': 1024,
+            'num_loops': 10,
+            'num_cores': 2
+        }
+        resultsdb_params = {
+            'type': 'tsv-files',
+            'tsv_results_dir': f'{results_dir / "results"}'
+        }
+        command = f'iprbench-run --benchmark-id \'mamul1\' --config \'{json.dumps(config)}\' --results-dir {results_dir} --resultsdb-params \'{json.dumps(resultsdb_params)}\''
         subprocess.run(command, shell=True, check=True, executable='/bin/bash')
 
     def test_iprbench_hibench(self):
         logging.info('test_iprbench_hibench')
+        results_dir = Path('/tmp/hibench_out')
         config = {
             'compiler_id': 'gfortran',
             'test_id': 'arch4_quick',
@@ -32,7 +45,11 @@ class IprBenchTestCase(unittest.TestCase):
             'cmake_path': 'cmake',
             'num_cores': 2,
         }
-        command = f'iprbench-run --benchmark-id \'hibench\' --config \'{json.dumps(config)}\' --results-dir /tmp/hibench_out'
+        resultsdb_params = {
+            'type': 'tsv-files',
+            'tsv_results_dir': f'{results_dir / "results"}'
+        }
+        command = f'iprbench-run --benchmark-id \'hibench\' --config \'{json.dumps(config)}\' --results-dir {results_dir}  --resultsdb-params \'{json.dumps(resultsdb_params)}\''
         subprocess.run(command, shell=True, check=True, executable='/bin/bash')