From 1d66fc1edb520caed670ae509d69632642c78b69 Mon Sep 17 00:00:00 2001 From: Guillaume Raffy Date: Fri, 27 Sep 2024 23:58:14 +0200 Subject: [PATCH] added a tool to parse benchmark output files to summarize it into a table, so that it can be exploited to create graphs work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3372] --- .gitignore | 2 + usecases/ipr/hibench/showresults.py | 88 +++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100755 usecases/ipr/hibench/showresults.py diff --git a/.gitignore b/.gitignore index 7ddddf1..c6a7dff 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ dist/ src/starbench.egg-info/ tmp/ +starbench.venv/ +usecases/ipr/hibench/results/ diff --git a/usecases/ipr/hibench/showresults.py b/usecases/ipr/hibench/showresults.py new file mode 100755 index 0000000..dad382d --- /dev/null +++ b/usecases/ipr/hibench/showresults.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +from typing import Dict, List +from pathlib import Path +import re +import pandas as pd + + +CpuId = str # eg 'intel_xeon_gold_6248r' +Speed = float # the execution speed for a given job (1.0/job duration), in s^-1 +Duration = float # the duration of a run for a given job (in seconds) + + +class StarbenchMeasure(): + worker_durations: List[Duration] + + def __init__(self): + self.worker_durations = [] + + def get_average_duration(self) -> Speed: + return sum(self.worker_durations) / len(self.worker_durations) + + def get_average_speed(self) -> Speed: + return 1.0 / self.get_average_duration() + + +class HibenchResultsParser(): + + @staticmethod + def parse_bench_stdout(bench_stdout_file_path: Path) -> Duration: + """ + bench_stdout_file_path: eg '/home/graffy/work/starbench/starbench.git/usecases/ipr/hibench/results/53894da48505892bfa05693a52312bacb12c70c9/nh3h2_qma_long/intel_xeon_x5550/ifort/worker000/bench_stdout.txt' + """ + duration = None + with open(bench_stdout_file_path, 'rt', encoding='utf8') as f: + for line in f.readlines(): + match = re.match(r'Total Test time \(real\) = (?P[0-9.]+) sec', line) + if match: + duration = float(match['duration']) + break + return duration + + @staticmethod + def parse_results(starbench_results_root: Path) -> pd.DataFrame: + """reads the output files of a starbench_results_root + """ + results = pd.DataFrame(columns=['commit-id', 'test-id', 'cpu-id', 'compiler-id', 'avg-duration']) + for commit_path in starbench_results_root.iterdir(): + if not commit_path.is_dir(): + continue + commit_id = commit_path.name # eg dd0f413b85cf0f727a5a4e88b2b02d75a28b377f + for test_path in commit_path.iterdir(): + if not test_path.is_dir(): + continue + test_id = test_path.name # eg nh3h2_qma_long + for cpu_path in test_path.iterdir(): + if not cpu_path.is_dir(): + continue + cpu_id = cpu_path.name # eg intel_xeon_gold_6248r + for compiler_path in cpu_path.iterdir(): + if not compiler_path.is_dir(): + continue + compiler_id = compiler_path.name # eg ifort + measure = StarbenchMeasure() + for worker_path in compiler_path.iterdir(): + if not worker_path.is_dir(): + continue + worker_id = worker_path.name + match = re.match(r'worker(?P[0-9][0-9][0-9])', worker_id) + if match is None: + print(f'unexpected path : {worker_path}') + continue + # worker_index = int(match['worker_index']) + + duration = HibenchResultsParser.parse_bench_stdout(worker_path / 'bench_stdout.txt') + measure.worker_durations.append(duration) + if len(measure.worker_durations) > 0: + results.loc[results.shape[0]] = [commit_id, test_id, cpu_id, compiler_id, measure.get_average_duration()] + return results + + +def main(): + + # 20240927-20:03:18 graffy@graffy-ws2:~/work/starbench/starbench.git$ rsync -va graffy@physix.ipr.univ-rennes1.fr:/opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/ ./usecases/ipr/hibench/results/ + hiperf = HibenchResultsParser.parse_results(Path('/home/graffy/work/starbench/starbench.git/usecases/ipr/hibench/results')) + print(hiperf) + + +main()