added a tool to parse benchmark output files to summarize it into a table, so that it can be exploited to create graphs

work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3372]
This commit is contained in:
Guillaume Raffy 2024-09-27 23:58:14 +02:00
parent c4732fee87
commit 1d66fc1edb
2 changed files with 90 additions and 0 deletions

2
.gitignore vendored
View File

@ -2,3 +2,5 @@
dist/
src/starbench.egg-info/
tmp/
starbench.venv/
usecases/ipr/hibench/results/

View File

@ -0,0 +1,88 @@
#!/usr/bin/env python3
from typing import Dict, List
from pathlib import Path
import re
import pandas as pd
CpuId = str # eg 'intel_xeon_gold_6248r'
Speed = float # the execution speed for a given job (1.0/job duration), in s^-1
Duration = float # the duration of a run for a given job (in seconds)
class StarbenchMeasure():
worker_durations: List[Duration]
def __init__(self):
self.worker_durations = []
def get_average_duration(self) -> Speed:
return sum(self.worker_durations) / len(self.worker_durations)
def get_average_speed(self) -> Speed:
return 1.0 / self.get_average_duration()
class HibenchResultsParser():
@staticmethod
def parse_bench_stdout(bench_stdout_file_path: Path) -> Duration:
"""
bench_stdout_file_path: eg '/home/graffy/work/starbench/starbench.git/usecases/ipr/hibench/results/53894da48505892bfa05693a52312bacb12c70c9/nh3h2_qma_long/intel_xeon_x5550/ifort/worker000/bench_stdout.txt'
"""
duration = None
with open(bench_stdout_file_path, 'rt', encoding='utf8') as f:
for line in f.readlines():
match = re.match(r'Total Test time \(real\) = (?P<duration>[0-9.]+) sec', line)
if match:
duration = float(match['duration'])
break
return duration
@staticmethod
def parse_results(starbench_results_root: Path) -> pd.DataFrame:
"""reads the output files of a starbench_results_root
"""
results = pd.DataFrame(columns=['commit-id', 'test-id', 'cpu-id', 'compiler-id', 'avg-duration'])
for commit_path in starbench_results_root.iterdir():
if not commit_path.is_dir():
continue
commit_id = commit_path.name # eg dd0f413b85cf0f727a5a4e88b2b02d75a28b377f
for test_path in commit_path.iterdir():
if not test_path.is_dir():
continue
test_id = test_path.name # eg nh3h2_qma_long
for cpu_path in test_path.iterdir():
if not cpu_path.is_dir():
continue
cpu_id = cpu_path.name # eg intel_xeon_gold_6248r
for compiler_path in cpu_path.iterdir():
if not compiler_path.is_dir():
continue
compiler_id = compiler_path.name # eg ifort
measure = StarbenchMeasure()
for worker_path in compiler_path.iterdir():
if not worker_path.is_dir():
continue
worker_id = worker_path.name
match = re.match(r'worker(?P<worker_index>[0-9][0-9][0-9])', worker_id)
if match is None:
print(f'unexpected path : {worker_path}')
continue
# worker_index = int(match['worker_index'])
duration = HibenchResultsParser.parse_bench_stdout(worker_path / 'bench_stdout.txt')
measure.worker_durations.append(duration)
if len(measure.worker_durations) > 0:
results.loc[results.shape[0]] = [commit_id, test_id, cpu_id, compiler_id, measure.get_average_duration()]
return results
def main():
# 20240927-20:03:18 graffy@graffy-ws2:~/work/starbench/starbench.git$ rsync -va graffy@physix.ipr.univ-rennes1.fr:/opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/ ./usecases/ipr/hibench/results/
hiperf = HibenchResultsParser.parse_results(Path('/home/graffy/work/starbench/starbench.git/usecases/ipr/hibench/results'))
print(hiperf)
main()