added support for various stop conditions in `StarBencher`

added an extra stop condition in which the command is simply executed once however, I found out that `measure_hibridon_perf` doesn't currently work because it suffers from racing conditions : if multiple tests are runat the same time, the output file gets writtent by multiple processes, which cause some tests to fail because they can't find the output file if the timings is wrong....
2022-05-31 16:50:38 +02:00 · 2022-05-31 16:50:38 +02:00 · 35d271ff63
parent f136861893
commit 35d271ff63
1 changed files with 54 additions and 15 deletions
--- a/.ipr/starbench.py
+++ b/.ipr/starbench.py
@ -6,6 +6,8 @@ import os
 from typing import List, Dict  # , Set, , Tuple, Optional
 from datetime import datetime
 from pathlib import Path
 from abc import ABC, abstractmethod
 from typing import ForwardRef
 class Run():
@ -25,19 +27,58 @@ class Run():
        return (self.end_time - self.start_time).total_seconds()
 StarBencher = ForwardRef('StarBencher')
 class IStarBencherStopCondition(ABC):
    @abstractmethod
    def should_stop(self, star_bencher: StarBencher):
        pass
 class StopAfterSingleRun(IStarBencherStopCondition):
    def __init__(self):
        pass
    def should_stop(self, star_bencher: StarBencher):
        # never start a new run
        return True
 class StopWhenConverged(IStarBencherStopCondition):
    def __init__(self, max_error: float = 0.01):
        self.max_error = max_error
        self._last_mean_duration = None
    def should_stop(self, star_bencher: StarBencher):
        do_stop = False
        mean_duration, num_runs = star_bencher._get_run_mean_duration()
        print('mean_duration = %f' % mean_duration)
        if self._last_mean_duration is not None:
            diff = abs(mean_duration - self._last_mean_duration)
            print('diff = %f' % diff)
            if diff < self.max_error:
                do_stop = True
        self._last_mean_duration = mean_duration
        return do_stop
 class StarBencher():
    '''
    the 'star' term comes from hpl's stadgemm benchmark, where we launch `n` independent programs on `n cores`
    '''
-    def __init__(self, run_command: List[str], num_cores_per_run: int, num_parallel_runs: int, max_num_cores: int, max_error: float = 0.01, stop_on_error=True, run_command_cwd: Path = None):
+    def __init__(self, run_command: List[str], num_cores_per_run: int, num_parallel_runs: int, max_num_cores: int, stop_condition: IStarBencherStopCondition, stop_on_error=True, run_command_cwd: Path = None):
        assert num_cores_per_run * num_parallel_runs <= max_num_cores
        self.run_command: List[str] = run_command
        self.run_command_cwd = run_command_cwd
        self.num_cores_per_run = num_cores_per_run
        self.num_parallel_runs = num_parallel_runs
        self.max_num_cores: int = max_num_cores
-        self.max_error: float = max_error
+        self.stop_condition: IStarBencherStopCondition = stop_condition
        self.stop_on_error = stop_on_error
        self._next_run_id: int = 0
        self._runs: Dict(int, Run) = {}
@ -94,15 +135,7 @@ class StarBencher():
        if self.stop_on_error and run.return_code != 0:
            do_stop = True
        else:
-            mean_duration, num_runs = self._get_run_mean_duration()
+            do_stop = self.stop_condition.should_stop(self)
            print('mean_duration = %f' % mean_duration)
            if self._last_mean_duration is not None:
                diff = abs(mean_duration - self._last_mean_duration)
                print('diff = %f' % diff)
                if diff < self.max_error:
                    do_stop = True
                    self._num_runs = num_runs
            self._last_mean_duration = mean_duration
        if not do_stop:
            print('adding a run')
            self._start_run()
@ -126,9 +159,10 @@ class StarBencher():
        with self._runs_lock:
            if not all([run.return_code == 0 for run in self._runs.values()]):
                raise Exception('at least one run failed')
-        print('mean duration : %.3f s (%d runs)' % (self._last_mean_duration, self._num_runs))
+        mean_duration, num_runs = self._get_run_mean_duration()
        print('mean duration : %.3f s (%d runs)' % (mean_duration, num_runs))
        print('finished')
-        return self._last_mean_duration
+        return mean_duration
 def measure_hibridon_perf(hibridon_version: str, tmp_dir: Path, num_cores: int, github_username: str, github_personal_access_token: str):
@ -138,12 +172,15 @@ def measure_hibridon_perf(hibridon_version: str, tmp_dir: Path, num_cores: int,
    src_dir = tmp_dir / 'hibridon'
    subprocess.run(['git', 'checkout', '%s' % (hibridon_version)], cwd=src_dir)
    assert src_dir.exists()
    for compiler in ['gfortran']:  # , 'ifort']:
        build_dir = tmp_dir / compiler
        build_dir.mkdir(exist_ok=True)
        subprocess.run(['cmake', '-DCMAKE_BUILD_TYPE=Release', '-DBUILD_TESTING=ON', src_dir], cwd=build_dir)
        subprocess.run(['make'], cwd=build_dir)
-        bench = StarBencher(run_command=['ctest', '-L', '^arch4_quick$'], num_cores_per_run=1, num_parallel_runs=num_cores, max_num_cores=num_cores, max_error=0.0001, run_command_cwd=build_dir)
+
        stop_condition = StopAfterSingleRun()
        bench = StarBencher(run_command=['ctest', '--output-on-failure', '-L', '^arch4_quick$'], num_cores_per_run=1, num_parallel_runs=num_cores, max_num_cores=num_cores, stop_condition=stop_condition, run_command_cwd=build_dir)
        mean_duration = bench.run()
        print('duration for compiler %s : %.3f s' % (compiler, mean_duration))
@ -159,7 +196,9 @@ if __name__ == '__main__':
        measure_hibridon_perf(hibridon_version, tmp_dir, num_cores=2, github_username=github_username, github_personal_access_token=github_personal_access_token)
    if False:
-        bench = StarBencher(run_command=['sleep', '0.1415927'], num_cores_per_run=1, num_parallel_runs=2, max_num_cores=2, max_error=0.0001)
+        stop_condition = StopAfterSingleRun()
        # stop_condition = StopWhenConverged(max_error=0.0001)
        bench = StarBencher(run_command=['sleep', '0.1415927'], num_cores_per_run=1, num_parallel_runs=2, max_num_cores=2, stop_condition=stop_condition)
        mean_duration = bench.run()
    if False: