From 7976d67860b1ab394638c48685cd9d70115de070 Mon Sep 17 00:00:00 2001
From: Guillaume Raffy <guillaume.raffy@univ-rennes1.fr>
Date: Fri, 22 Nov 2024 13:40:22 +0100
Subject: [PATCH] v0.0.8

- added support for common parameters (parameters that are common to all benchmarks but which value have to be provided by the user)
- added the num_threads_per_run out parameter in hibench, in prevision to the addition of support for parallel scaling measurements in starbench
- added the autoparameter `user`

work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3958]
---
 iprbench/benchmarks/hibench.py | 12 ++++++++----
 iprbench/benchmarks/mamul1.py  |  5 +++--
 iprbench/core.py               | 24 ++++++++++++++++--------
 iprbench/main.py               | 20 ++++++++++++--------
 iprbench/version.py            |  2 +-
 5 files changed, 40 insertions(+), 23 deletions(-)

diff --git a/iprbench/benchmarks/hibench.py b/iprbench/benchmarks/hibench.py
index 6fd8ff2..2793cd0 100644
--- a/iprbench/benchmarks/hibench.py
+++ b/iprbench/benchmarks/hibench.py
@@ -1,3 +1,4 @@
+from typing import List
 import pandas as pd
 from pathlib import Path
 import subprocess
@@ -8,12 +9,12 @@ from ..util import get_proxy_env_vars
 
 
 class HiBench(IBenchmark):
-    """Hibridon benchmark
+    """Hibridon benchmark using starbench
     """
 
-    def __init__(self):
+    def __init__(self, common_params: List[BenchParam]):
         bench_params = []
-        bench_params.append(BenchParam('num_cores', BenchParam.Type.PARAM_TYPE_INT, 'the number of cores to use by this benchmark'))
+        bench_params.append(BenchParam('num_cores', BenchParam.Type.PARAM_TYPE_INT, 'the total number of cores to use by this benchmark'))
         bench_params.append(BenchParam('hibridon_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of hibridon, in the form of a commit id'))
         bench_params.append(BenchParam('fortran_compiler', BenchParam.Type.PARAM_TYPE_PACKAGE, 'the compiler used in the benchmark'))
         bench_params.append(BenchParam('blas_library', BenchParam.Type.PARAM_TYPE_PACKAGE, 'the blas compatible linear algebra library used in the benchmark'))
@@ -22,8 +23,9 @@ class HiBench(IBenchmark):
 
         out_params = []
         out_params.append(BenchParam('duration', BenchParam.Type.PARAM_TYPE_FLOAT, 'the average duration of one test, in seconds'))
+        out_params.append(BenchParam('num_threads_per_run', BenchParam.Type.PARAM_TYPE_INT, 'the number of cores to use by each concurrent run of the app (must be a divider of num_cores)'))
 
-        super().__init__(bench_id='hibench', bench_params=bench_params, out_params=out_params)
+        super().__init__(bench_id='hibench', bench_params=bench_params, out_params=out_params, common_params=common_params)
 
     def get_ram_requirements(self, config: BenchmarkConfig) -> int:
         GIBIBYTE_TO_BYTE = 1024 * 1024 * 1024
@@ -86,6 +88,8 @@ class HiBench(IBenchmark):
         assert len(selected_rows) == 1
         row = selected_rows.loc[0]
         duration = row["duration"]
+        num_threads_per_run = 1  # at the moment starbench always allocates 1 core per process, but in the future, starbench will support multiple cores per process (useful to measure the how the app scales with increasing parallelism)
+        measurements['num_threads_per_run'] = num_threads_per_run
         measurements['duration'] = duration
         return measurements
 
diff --git a/iprbench/benchmarks/mamul1.py b/iprbench/benchmarks/mamul1.py
index 818dcaf..9bbfafe 100644
--- a/iprbench/benchmarks/mamul1.py
+++ b/iprbench/benchmarks/mamul1.py
@@ -1,3 +1,4 @@
+from typing import List
 from pathlib import Path
 import pandas as pd
 import subprocess
@@ -10,7 +11,7 @@ from iprbench.util import extract_resource_dir
 class MaMul1(IBenchmark):
     """Matrix multiplication benchmark
     """
-    def __init__(self):
+    def __init__(self, common_params: List[BenchParam]):
         bench_params = []
         bench_params.append(BenchParam('fortran_compiler', BenchParam.Type.PARAM_TYPE_PACKAGE, 'the compiler used in the benchmark'))
         bench_params.append(BenchParam('blas_library', BenchParam.Type.PARAM_TYPE_PACKAGE, 'the blas compatible linear algebra library used in the benchmark'))
@@ -22,7 +23,7 @@ class MaMul1(IBenchmark):
         out_params = []
         out_params.append(BenchParam('duration', BenchParam.Type.PARAM_TYPE_FLOAT, 'the average duration of one matrix multiplication, in seconds'))
 
-        super().__init__(bench_id='mamul1', bench_params=bench_params, out_params=out_params)
+        super().__init__(bench_id='mamul1', bench_params=bench_params, out_params=out_params, common_params=common_params)
 
     def get_ram_requirements(self, config: BenchmarkConfig) -> int:
         GIBIBYTE_TO_BYTE = 1024 * 1024 * 1024
diff --git a/iprbench/core.py b/iprbench/core.py
index e1c6c44..d934a4a 100644
--- a/iprbench/core.py
+++ b/iprbench/core.py
@@ -104,13 +104,15 @@ class IBenchmark(abc.ABC):
 
     # resultsdb_id: ResultsDbId
     bench_id: BenchmarkId  # a unique name for this benchmark, eg 'matmul1'
+    common_params: List[BenchParam]
     bench_params: BenchmarkInputParams
     out_params: BenchmarkOutputParams
 
     # def __init__(self, resultsdb_id: ResultsDbId, bench_id: str, bench_params: BenchmarkInputParams, out_params: BenchmarkOutputParams):
-    def __init__(self, bench_id: str, bench_params: BenchmarkInputParams, out_params: BenchmarkOutputParams):
+    def __init__(self, bench_id: str, bench_params: BenchmarkInputParams, out_params: BenchmarkOutputParams, common_params: List[BenchParam]):
         # self.resultsdb_id = resultsdb_id
         self.bench_id = bench_id
+        self.common_params = common_params
         self.bench_params = bench_params
         self.out_params = out_params
 
@@ -131,12 +133,12 @@ class IBenchmark(abc.ABC):
 
     def load_config(self, config_as_json: str, target_host: ITargetHost) -> BenchmarkConfig:
         benchmark_config = json.loads(config_as_json)
-        self.validate_config(benchmark_config)
+        IBenchmark.validate_config(benchmark_config, self.bench_params + self.common_params, self.bench_id)
         resolved_benchmark_config = self.resolve_config(benchmark_config, target_host)
         return resolved_benchmark_config
 
     def get_bench_param(self, param_name: str) -> Optional[BenchParam]:
-        for bench_param in self.bench_params:
+        for bench_param in self.bench_params + self.common_params:
             if bench_param.name == param_name:
                 return bench_param
         return None
@@ -154,9 +156,10 @@ class IBenchmark(abc.ABC):
             resolved_benchmark_config[param_name] = resolved_value
         return resolved_benchmark_config
 
-    def validate_config(self, config: BenchmarkConfig):
+    @staticmethod
+    def validate_config(config: BenchmarkConfig, params: List[BenchParam], benchid: BenchmarkId):
         """checks that all benchmark parameters have been set in the given config"""
-        for bench_param in self.bench_params:
+        for bench_param in params:
             try:
                 _ = config[bench_param.name]
             except KeyError:
@@ -164,11 +167,11 @@ class IBenchmark(abc.ABC):
         # check that all parameters in benchmark config exist as parameters for this benchmark
         for param_name in config.keys():
             param_exists = False
-            for bench_param in self.bench_params:
+            for bench_param in params:
                 if bench_param.name == param_name:
                     param_exists = True
                     break
-            assert param_exists, f'parameter {param_name} doesn\'t exist for benchmark {self.bench_id}'
+            assert param_exists, f'parameter {param_name} doesn\'t exist for benchmark {benchid}'
 
 
 class IResultsTable(abc.ABC):
@@ -194,7 +197,7 @@ class IResultsTable(abc.ABC):
 
     def get_params(self) -> List[BenchParam]:
         """returns the ordered list of all columns in this table (a column is described by a parameter)"""
-        params = [auto_param.bench_param for auto_param in self.results_db.auto_params] + self.benchmark.bench_params + self.benchmark.out_params
+        params = [auto_param.bench_param for auto_param in self.results_db.auto_params] + self.results_db.common_params + self.benchmark.bench_params + self.benchmark.out_params
         return params
 
 
@@ -212,13 +215,18 @@ class IAutoParam(abc.ABC):
 class IResultsDb(abc.ABC):
     """the results database (contains IResultsTable instances)"""
     auto_params: List[IAutoParam]  # parameters that are common to all benchmarks and that are filled automatically
+    common_params: List[BenchParam]  # parameters that are common to all benchmarks but are not filled automatically (they have to be filled by the user)
 
     def __init__(self):
         self.auto_params = []
+        self.common_params = []
 
     def add_auto_param(self, auto_param: IAutoParam):
         self.auto_params.append(auto_param)
 
+    def add_common_param(self, param: BenchParam):
+        self.common_params.append(param)
+
     def get_auto_param_values(self) -> BenchmarkParamValues:
         param_values = {}
         for auto_param in self.auto_params:
diff --git a/iprbench/main.py b/iprbench/main.py
index 52bee58..2b15721 100644
--- a/iprbench/main.py
+++ b/iprbench/main.py
@@ -1,4 +1,5 @@
-from .core import BenchmarkId, IBenchmark, ResultsDbFactory
+from typing import List
+from .core import BenchmarkId, IBenchmark, ResultsDbFactory, BenchParam
 from .targethosts import GraffyWs2
 from .benchmarks.hibench import HiBench
 from .benchmarks.mamul1 import MaMul1
@@ -6,7 +7,7 @@ from .resultsdb.tsvresultsdb import TsvResultsDbCreator
 from .resultsdb.sqlresultsdb import SqliteResultsDbCreator, SqlServerResultsDbCreator
 
 from .util import Singleton
-from .autoparams import MeasurementTime, HostFqdn, NumCpus, CpuModel, IprBenchVersion, HostId
+from .autoparams import MeasurementTime, HostFqdn, User, NumCpus, CpuModel, IprBenchVersion, HostId
 import logging
 import argparse
 from pathlib import Path
@@ -18,10 +19,10 @@ class BenchmarkFactory(metaclass=Singleton):
     def __init__(self):
         pass
 
-    def create_benchmark(self, bench_id: BenchmarkId) -> IBenchmark:
+    def create_benchmark(self, bench_id: BenchmarkId, common_params: List[BenchParam]) -> IBenchmark:
         benchmark = {
-            'hibench': HiBench(),
-            'mamul1': MaMul1()
+            'hibench': HiBench(common_params),
+            'mamul1': MaMul1(common_params)
         }[bench_id]
         return benchmark
 
@@ -46,10 +47,7 @@ def main():
 
     args = arg_parser.parse_args()
 
-    benchmark_id = BenchmarkId(args.benchmark_id)
-    benchmark = BenchmarkFactory().create_benchmark(benchmark_id)
     target_host = GraffyWs2()
-    benchmark_config = benchmark.load_config(args.config, target_host)
     results_dir = args.results_dir
 
     ResultsDbFactory().register_resultsdb_creator(TsvResultsDbCreator())
@@ -62,8 +60,14 @@ def main():
     results_db.add_auto_param(IprBenchVersion())
     results_db.add_auto_param(HostId())
     results_db.add_auto_param(HostFqdn())
+    results_db.add_auto_param(User())
     results_db.add_auto_param(NumCpus())
     results_db.add_auto_param(CpuModel())
+    results_db.add_common_param(BenchParam('launcher', BenchParam.Type.PARAM_TYPE_STRING, description='what triggered the benchmark (eg "alambix.job.12345", or "manual")'))
+
+    benchmark_id = BenchmarkId(args.benchmark_id)
+    benchmark = BenchmarkFactory().create_benchmark(benchmark_id, results_db.common_params)
+    benchmark_config = benchmark.load_config(args.config, target_host)
 
     results_table = results_db.get_table(benchmark)
 
diff --git a/iprbench/version.py b/iprbench/version.py
index 2792152..9123cf0 100644
--- a/iprbench/version.py
+++ b/iprbench/version.py
@@ -1 +1 @@
-__version__ = '0.0.7'
+__version__ = '0.0.8'