added a tool to parse benchmark output files to summarize it into a table, so that it can be exploited to create graphs

work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3372]
2024-09-27 23:58:14 +02:00 · 2024-09-27 23:58:14 +02:00 · 1d66fc1edb
parent c4732fee87
commit 1d66fc1edb
2 changed files with 90 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,3 +2,5 @@
 dist/
 src/starbench.egg-info/
 tmp/
+starbench.venv/
+usecases/ipr/hibench/results/
--- a/usecases/ipr/hibench/showresults.py
+++ b/usecases/ipr/hibench/showresults.py
@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+from typing import Dict, List
+from pathlib import Path
+import re
+import pandas as pd
+
+
+CpuId = str  # eg 'intel_xeon_gold_6248r'
+Speed = float  # the execution speed for a given job (1.0/job duration), in s^-1
+Duration = float  # the duration of a run for a given job (in seconds)
+
+
+class StarbenchMeasure():
+    worker_durations: List[Duration]
+
+    def __init__(self):
+        self.worker_durations = []
+
+    def get_average_duration(self) -> Speed:
+        return sum(self.worker_durations) / len(self.worker_durations)
+
+    def get_average_speed(self) -> Speed:
+        return 1.0 / self.get_average_duration()
+
+
+class HibenchResultsParser():
+
+    @staticmethod
+    def parse_bench_stdout(bench_stdout_file_path: Path) -> Duration:
+        """
+        bench_stdout_file_path: eg '/home/graffy/work/starbench/starbench.git/usecases/ipr/hibench/results/53894da48505892bfa05693a52312bacb12c70c9/nh3h2_qma_long/intel_xeon_x5550/ifort/worker000/bench_stdout.txt'
+        """
+        duration = None
+        with open(bench_stdout_file_path, 'rt', encoding='utf8') as f:
+            for line in f.readlines():
+                match = re.match(r'Total Test time \(real\) = (?P<duration>[0-9.]+) sec', line)
+                if match:
+                    duration = float(match['duration'])
+                    break
+        return duration
+
+    @staticmethod
+    def parse_results(starbench_results_root: Path) -> pd.DataFrame:
+        """reads the output files of a starbench_results_root
+        """
+        results = pd.DataFrame(columns=['commit-id', 'test-id', 'cpu-id', 'compiler-id', 'avg-duration'])
+        for commit_path in starbench_results_root.iterdir():
+            if not commit_path.is_dir():
+                continue
+            commit_id = commit_path.name  # eg dd0f413b85cf0f727a5a4e88b2b02d75a28b377f
+            for test_path in commit_path.iterdir():
+                if not test_path.is_dir():
+                    continue
+                test_id = test_path.name  # eg nh3h2_qma_long
+                for cpu_path in test_path.iterdir():
+                    if not cpu_path.is_dir():
+                        continue
+                    cpu_id = cpu_path.name  # eg intel_xeon_gold_6248r
+                    for compiler_path in cpu_path.iterdir():
+                        if not compiler_path.is_dir():
+                            continue
+                        compiler_id = compiler_path.name  # eg ifort
+                        measure = StarbenchMeasure()
+                        for worker_path in compiler_path.iterdir():
+                            if not worker_path.is_dir():
+                                continue
+                            worker_id = worker_path.name
+                            match = re.match(r'worker(?P<worker_index>[0-9][0-9][0-9])', worker_id)
+                            if match is None:
+                                print(f'unexpected path : {worker_path}')
+                                continue
+                            # worker_index = int(match['worker_index'])
+
+                            duration = HibenchResultsParser.parse_bench_stdout(worker_path / 'bench_stdout.txt')
+                            measure.worker_durations.append(duration)
+                        if len(measure.worker_durations) > 0:
+                            results.loc[results.shape[0]] = [commit_id, test_id, cpu_id, compiler_id, measure.get_average_duration()]
+        return results
+
+
+def main():
+
+    # 20240927-20:03:18 graffy@graffy-ws2:~/work/starbench/starbench.git$ rsync -va graffy@physix.ipr.univ-rennes1.fr:/opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/ ./usecases/ipr/hibench/results/
+    hiperf = HibenchResultsParser.parse_results(Path('/home/graffy/work/starbench/starbench.git/usecases/ipr/hibench/results'))
+    print(hiperf)
+
+
+main()