#!/usr/bin/env python3 from typing import Dict, List from pathlib import Path import re import pandas as pd CpuId = str # eg 'intel_xeon_gold_6248r' Speed = float # the execution speed for a given job (1.0/job duration), in s^-1 Duration = float # the duration of a run for a given job (in seconds) class StarbenchMeasure(): worker_durations: List[Duration] def __init__(self): self.worker_durations = [] def get_average_duration(self) -> Speed: return sum(self.worker_durations) / len(self.worker_durations) def get_average_speed(self) -> Speed: return 1.0 / self.get_average_duration() class HibenchResultsParser(): @staticmethod def parse_bench_stdout(bench_stdout_file_path: Path) -> Duration: """ bench_stdout_file_path: eg '/home/graffy/work/starbench/starbench.git/usecases/ipr/hibench/results/53894da48505892bfa05693a52312bacb12c70c9/nh3h2_qma_long/intel_xeon_x5550/ifort/worker000/bench_stdout.txt' """ duration = None with open(bench_stdout_file_path, 'rt', encoding='utf8') as f: for line in f.readlines(): match = re.match(r'Total Test time \(real\) = (?P[0-9.]+) sec', line) if match: duration = float(match['duration']) break return duration @staticmethod def parse_results(starbench_results_root: Path) -> pd.DataFrame: """reads the output files of a starbench_results_root """ results = pd.DataFrame(columns=['commit-id', 'test-id', 'cpu-id', 'compiler-id', 'avg-duration']) for commit_path in starbench_results_root.iterdir(): if not commit_path.is_dir(): continue commit_id = commit_path.name # eg dd0f413b85cf0f727a5a4e88b2b02d75a28b377f for test_path in commit_path.iterdir(): if not test_path.is_dir(): continue test_id = test_path.name # eg nh3h2_qma_long for cpu_path in test_path.iterdir(): if not cpu_path.is_dir(): continue cpu_id = cpu_path.name # eg intel_xeon_gold_6248r for compiler_path in cpu_path.iterdir(): if not compiler_path.is_dir(): continue compiler_id = compiler_path.name # eg ifort measure = StarbenchMeasure() for worker_path in compiler_path.iterdir(): if not worker_path.is_dir(): continue worker_id = worker_path.name match = re.match(r'worker(?P[0-9][0-9][0-9])', worker_id) if match is None: print(f'unexpected path : {worker_path}') continue # worker_index = int(match['worker_index']) duration = HibenchResultsParser.parse_bench_stdout(worker_path / 'bench_stdout.txt') measure.worker_durations.append(duration) if len(measure.worker_durations) > 0: results.loc[results.shape[0]] = [commit_id, test_id, cpu_id, compiler_id, measure.get_average_duration()] return results def main(): # 20240927-20:03:18 graffy@graffy-ws2:~/work/starbench/starbench.git$ rsync -va graffy@physix.ipr.univ-rennes1.fr:/opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/ ./usecases/ipr/hibench/results/ hiperf = HibenchResultsParser.parse_results(Path('/home/graffy/work/starbench/starbench.git/usecases/ipr/hibench/results')) print(hiperf) main()