From fe4a07a67ed0043da521c6ffec03586cc7d28598 Mon Sep 17 00:00:00 2001
From: Guillaume Raffy <guillaume.raffy@univ-rennes1.fr>
Date: Thu, 10 Oct 2024 18:06:09 +0200
Subject: [PATCH] refactored all iprbench code found in `usecases/ipr/hibench`
 into a `iprbench` python package

The main motivation for this is to allow the code executed by jobs to benefit from multiple packages (eg iprbench, [stargemm](https://github.com/g-raffy/starbench), cocluto) to perform common missing tasks such as registering the results output in the iprbench database.

work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3958] and [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3372]
---
 .gitignore                                    |   9 +-
 README.md                                     | 158 +++++---
 {test => iprbench}/__init__.py                |   0
 iprbench/hibench/__init__.py                  |   0
 .../hibench/hibenchonphysix.py                |  64 ++--
 .../ipr => iprbench}/hibench/showresults.py   |   2 -
 iprbench/main.py                              |   1 +
 .../resources}/starbench-template.job         |  35 +-
 pyproject.toml                                |  19 +-
 src/starbench/__init__.py                     |   2 -
 src/starbench/core.py                         | 284 ---------------
 src/starbench/main.py                         | 166 ---------
 test/mamul1/CMakeLists.txt                    |  43 ---
 test/mamul1/mamul1.F90                        | 339 ------------------
 test/test_starbench.py                        |  25 --
 tests/__init__.py                             |   0
 usecases/ipr/hibench/readme.md                |  27 --
 17 files changed, 198 insertions(+), 976 deletions(-)
 rename {test => iprbench}/__init__.py (100%)
 create mode 100644 iprbench/hibench/__init__.py
 rename {usecases/ipr => iprbench}/hibench/hibenchonphysix.py (85%)
 rename {usecases/ipr => iprbench}/hibench/showresults.py (99%)
 create mode 100644 iprbench/main.py
 rename {usecases/ipr/hibench => iprbench/resources}/starbench-template.job (72%)
 delete mode 100644 src/starbench/__init__.py
 delete mode 100755 src/starbench/core.py
 delete mode 100755 src/starbench/main.py
 delete mode 100644 test/mamul1/CMakeLists.txt
 delete mode 100644 test/mamul1/mamul1.F90
 delete mode 100644 test/test_starbench.py
 create mode 100644 tests/__init__.py
 delete mode 100644 usecases/ipr/hibench/readme.md

diff --git a/.gitignore b/.gitignore
index c6a7dff..65fbaf7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,3 @@
-**/__pycache__/
-dist/
-src/starbench.egg-info/
-tmp/
-starbench.venv/
-usecases/ipr/hibench/results/
+iprbench.venv/
+results/
+iprbench/__pycache__/__init__.cpython-38.pyc
diff --git a/README.md b/README.md
index ba03043..48aabe7 100644
--- a/README.md
+++ b/README.md
@@ -1,53 +1,129 @@
-# starbench
-a tool to benchmark a git cmake application using embarassingly parallel runs
+# iprbenchmark
 
-`starbench` is a tool designed to build and test the performance of an application versioned in a `git` repository and using the `cmake` build system.
+This example illustrates how `starbench` is used at IPR (Institut de Physique de Rennes) to measure the performance of [hibridon](https://github.com/hibridon/hibridon) on IPR's cluster (`alambix`)
 
-In order to measure the performance of the code in *hpc* (high performance computing) environment, `starbench` is designed to make all the cores busy. For this, it uses the same technique as in `hpl`'s `stardgemm` test (that's where the 'star' prefix comes from): the same code is run on each `CPU` core. This way, we performances measures are expected to be more realistic, as the cores won't benefit from the unrealistic boost provided by the memory cache of unused cores.
 
-If the user provides:
-- the `url` of the repository
-- the commit number of the version to test
-- the number of cores the benchmark should use (usually the number of cores of the machine that executes the benchmark)
-- the benchmark command to use
-
-then `starbench` will do the rest:
-1. clone the repository to a temporary location
-2. checkout the requested version
-3. configure the build
-4. build the code
-5. run the benchmark command for each core
-6. output the average duration of the benchmark
-
-## example
+usage:
 
 ```sh
-bob@bob-ws2:~/work/starbench$ python3 -m venv ./starbench.venv
-bob@bob-ws2:~/work/starbench$ source ./starbench.venv/bin/activate
-bob@bob-ws2:~/work/starbench$ pip install wheel
-Collecting wheel
-  Using cached wheel-0.43.0-py3-none-any.whl (65 kB)
-Installing collected packages: wheel
-Successfully installed wheel-0.43.0
-bob@bob-ws2:~/work/starbench$ pip install ./starbench.git
-Processing ./starbench.git
+20241007-15:08:10 graffy@graffy-ws2:~/work/starbench/starbench.git$ rsync --exclude .git --exclude starbench.venv --exclude tmp --exclude usecases/ipr/hibench/results  -va ./ graffy@alambix.ipr.univ-rennes.fr:/opt/ipr/cluster/work.global/graffy/starbench.git/
+sending incremental file list
+
+sent 1,416 bytes  received 25 bytes  960.67 bytes/sec
+total size is 140,225  speedup is 97.31
+last command status : [0]
+```
+## install iprbench
+
+```sh
+graffy@alambix-frontal:/opt/ipr/cluster/work.local/graffy/bug3372$ python3 -m venv iprbench.venv
+graffy@alambix-frontal:/opt/ipr/cluster/work.local/graffy/bug3372$ source ./iprbench.venv/bin/activate
+(iprbench.venv) graffy@alambix-frontal:/opt/ipr/cluster/work.local/graffy/bug3372$ pip install ./iprbench.git
+Processing ./iprbench.git
   Installing build dependencies ... done
-  WARNING: Missing build requirements in pyproject.toml for file:///home/bob/work/starbench/starbench.git.
-  WARNING: The project does not specify a build backend, and pip cannot fall back to setuptools without 'wheel'.
   Getting requirements to build wheel ... done
-    Preparing wheel metadata ... done
-Building wheels for collected packages: starbench
-  Building wheel for starbench (PEP 517) ... done
-  Created wheel for starbench: filename=starbench-1.0.0-py3-none-any.whl size=8011 sha256=a98c590fbc481722aed3512ae6345cce741615a17c24e67dc88070f85b616c4c
-  Stored in directory: /tmp/pip-ephem-wheel-cache-m_0xpm10/wheels/67/41/37/debf4c9251b719f84456398e144dffaa34d18ab336b529dc53
-Successfully built starbench
-Installing collected packages: starbench
-Successfully installed starbench-1.0.0
-bob@bob-ws2:~/work/starbench$ starbench --git-repos-url https://github.com/hibridon/hibridon --code-version a3bed1c3ccfbca572003020d3e3d3b1ff3934fad --git-user g-raffy --git-pass-file "$HOME/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" --num-cores 2 --output-dir=/tmp/hibench --cmake-path=/opt/cmake/cmake-3.23.0/bin/cmake --cmake-option=-DCMAKE_BUILD_TYPE=Release --cmake-option=-DBUILD_TESTING=ON --benchmark-command='ctest --output-on-failure -L ^arch4_quick$'
+  Preparing metadata (pyproject.toml) ... done
+Collecting starbench@ git+https://github.com/g-raffy/starbench
+  Cloning https://github.com/g-raffy/starbench to /tmp/user/59825/pip-install-uw5i22k1/starbench_890d53070dec47738060b57fdd29b001
+  Running command git clone --filter=blob:none --quiet https://github.com/g-raffy/starbench /tmp/user/59825/pip-install-uw5i22k1/starbench_890d53070dec47738060b57fdd29b001
+  Resolved https://github.com/g-raffy/starbench to commit 3ca66d00636ad055506f6b4e2781b498cc7487ac
+  Installing build dependencies ... done
+  Getting requirements to build wheel ... done
+  Preparing metadata (pyproject.toml) ... done
+Collecting pandas
+  Downloading pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 13.1/13.1 MB 23.6 MB/s eta 0:00:00
+Collecting matplotlib
+  Downloading matplotlib-3.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.3 MB)
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 8.3/8.3 MB 20.1 MB/s eta 0:00:00
+Collecting sqlalchemy
+  Downloading SQLAlchemy-2.0.35-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.2/3.2 MB 17.6 MB/s eta 0:00:00
+Collecting contourpy>=1.0.1
+  Downloading contourpy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (323 kB)
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 323.2/323.2 kB 3.4 MB/s eta 0:00:00
+Collecting cycler>=0.10
+  Downloading cycler-0.12.1-py3-none-any.whl (8.3 kB)
+Collecting fonttools>=4.22.0
+  Downloading fonttools-4.54.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.9 MB)
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.9/4.9 MB 20.9 MB/s eta 0:00:00
+Collecting kiwisolver>=1.3.1
+  Downloading kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.4/1.4 MB 8.9 MB/s eta 0:00:00
+Collecting numpy>=1.23
+  Downloading numpy-2.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.3 MB)
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 16.3/16.3 MB 19.9 MB/s eta 0:00:00
+Collecting packaging>=20.0
+  Downloading packaging-24.1-py3-none-any.whl (53 kB)
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 54.0/54.0 kB 1.0 MB/s eta 0:00:00
+Collecting pillow>=8
+  Downloading pillow-10.4.0-cp311-cp311-manylinux_2_28_x86_64.whl (4.5 MB)
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.5/4.5 MB 20.9 MB/s eta 0:00:00
+Collecting pyparsing>=2.3.1
+  Downloading pyparsing-3.1.4-py3-none-any.whl (104 kB)
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 104.1/104.1 kB 1.3 MB/s eta 0:00:00
+Collecting python-dateutil>=2.7
+  Downloading python_dateutil-2.9.0.post0-py2.py3-none-any.whl (229 kB)
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 229.9/229.9 kB 933.1 kB/s eta 0:00:00
+Collecting pytz>=2020.1
+  Downloading pytz-2024.2-py2.py3-none-any.whl (508 kB)
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 508.0/508.0 kB 3.9 MB/s eta 0:00:00
+Collecting tzdata>=2022.7
+  Downloading tzdata-2024.2-py2.py3-none-any.whl (346 kB)
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 346.6/346.6 kB 2.0 MB/s eta 0:00:00
+Collecting typing-extensions>=4.6.0
+  Downloading typing_extensions-4.12.2-py3-none-any.whl (37 kB)
+Collecting greenlet!=0.4.17
+  Downloading greenlet-3.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (602 kB)
+     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 602.4/602.4 kB 4.9 MB/s eta 0:00:00
+Collecting six>=1.5
+  Using cached six-1.16.0-py2.py3-none-any.whl (11 kB)
+Building wheels for collected packages: iprbench, starbench
+  Building wheel for iprbench (pyproject.toml) ... done
+  Created wheel for iprbench: filename=iprbench-0.0.1-py3-none-any.whl size=19188 sha256=0ece4a9b1b44434c0f033a253aae301a5d53039955f1f6b182c0b833d44c3e93
+  Stored in directory: /tmp/user/59825/pip-ephem-wheel-cache-8yw7rwk6/wheels/84/c9/82/e72d13fb7df12a8004ca6383b185a00f9ab3ddd8695e9e6cd8
+  Building wheel for starbench (pyproject.toml) ... done
+  Created wheel for starbench: filename=starbench-1.0.0-py3-none-any.whl size=9612 sha256=18968f356bb3d6f6c2337b4bbaf709510c50a6a9902c3363f4b568c409846ac0
+  Stored in directory: /tmp/user/59825/pip-ephem-wheel-cache-8yw7rwk6/wheels/cf/73/d3/14e4830d3e06c2c3ab71fdf68e0f14b50132ec23eaa6b2aa65
+Successfully built iprbench starbench
+Installing collected packages: pytz, tzdata, typing-extensions, starbench, six, pyparsing, pillow, packaging, numpy, kiwisolver, greenlet, fonttools, cycler, sqlalchemy, python-dateutil, contourpy, pandas, matplotlib, iprbench
+Successfully installed contourpy-1.3.0 cycler-0.12.1 fonttools-4.54.1 greenlet-3.1.1 iprbench-0.0.1 kiwisolver-1.4.7 matplotlib-3.9.2 numpy-2.1.2 packaging-24.1 pandas-2.2.3 pillow-10.4.0 pyparsing-3.1.4 python-dateutil-2.9.0.post0 pytz-2024.2 six-1.16.0 sqlalchemy-2.0.35 starbench-1.0.0 typing-extensions-4.12.2 tzdata-2024.2
 ```
 
-## how to test
+## launch benchmark jobs on alambix cluster
 
 ```sh
-(starbench.venv) graffy@graffy-ws2:~/work/starbench/starbench.git$ python3 -m unittest test.test_starbench
+(iprbench.venv) graffy@alambix-frontal:/opt/ipr/cluster/work.local/graffy/bug3372$ hibenchonphysix --commit-id 53894da48505892bfa05693a52312bacb12c70c9 --results-dir $GLOBAL_WORK_DIR/graffy/hibridon/benchmarks/starbench/hibench/$(date --iso=seconds) --arch-regexp 'intel_xeon_x5650' --cmake-path /usr/bin/cmake
+INFO:root:available host groups: dict_keys(['intel_xeon_x5550', 'intel_xeon_x5650', 'intel_xeon_e5-2660', 'intel_xeon_e5-2660v2', 'intel_xeon_e5-2660v4', 'intel_xeon_gold_6140', 'intel_xeon_gold_6154', 'intel_xeon_gold_5220', 'intel_xeon_gold_6226r', 'intel_xeon_gold_6248r', 'intel_xeon_gold_6348', 'amd_epyc_7282', 'amd_epyc_7452'])
+INFO:root:requested host groups: ['intel_xeon_x5650']
+INFO:root:using test arch4_quick for benchmarking
+INFO:root:creating /opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibench/2024-10-10T12:11:44+02:00/iprbench.venv.tgz (the virtual environment that will be used in this bench by all its jobs at some point)
+Collecting virtualenv-clone
+  Using cached virtualenv_clone-0.5.7-py3-none-any.whl (6.6 kB)
+Installing collected packages: virtualenv-clone
+Successfully installed virtualenv-clone-0.5.7
+DEBUG:root:command = /opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibench/2024-10-10T12:11:44+02:00/53894da48505892bfa05693a52312bacb12c70c9/arch4_quick/intel_xeon_x5650/gfortran/starbench.job "https://github.com/hibridon/hibridon" "g-raffy" "/mnt/home.ipr/graffy/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" "53894da48505892bfa05693a52312bacb12c70c9" "-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON -DCMAKE_Fortran_COMPILER=gfortran" "ctest --output-on-failure -L ^arch4_quick$" "" "/usr/bin/cmake"
+DEBUG:root:qsub_command = qsub -pe smp 12 -l "hostname=alambix50.ipr.univ-rennes.fr" -S /bin/bash -cwd -m ae -l mem_available=1G -j y -N hibench_intel_xeon_x5650_gfortran_53894da48505892bfa05693a52312bacb12c70c9 /opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibench/2024-10-10T12:11:44+02:00/53894da48505892bfa05693a52312bacb12c70c9/arch4_quick/intel_xeon_x5650/gfortran/starbench.job "https://github.com/hibridon/hibridon" "g-raffy" "/mnt/home.ipr/graffy/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" "53894da48505892bfa05693a52312bacb12c70c9" "-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON -DCMAKE_Fortran_COMPILER=gfortran" "ctest --output-on-failure -L ^arch4_quick$" "" "/usr/bin/cmake"
+Your job 17357 ("hibench_intel_xeon_x5650_gfortran_53894da48505892bfa05693a52312bacb12c70c9") has been submitted
+INFO:root:using test arch4_quick for benchmarking
+INFO:root:skipping the creation of /opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibench/2024-10-10T12:11:44+02:00/iprbench.venv.tgz because it already exists (probably created for other jobs of the same bench)
+DEBUG:root:command = /opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibench/2024-10-10T12:11:44+02:00/53894da48505892bfa05693a52312bacb12c70c9/arch4_quick/intel_xeon_x5650/ifort/starbench.job "https://github.com/hibridon/hibridon" "g-raffy" "/mnt/home.ipr/graffy/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" "53894da48505892bfa05693a52312bacb12c70c9" "-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON -DCMAKE_Fortran_COMPILER=ifort -DBLA_VENDOR=Intel10_64lp" "ctest --output-on-failure -L ^arch4_quick$" "module load compilers/ifort/latest" "/usr/bin/cmake"
+DEBUG:root:qsub_command = qsub -pe smp 12 -l "hostname=alambix50.ipr.univ-rennes.fr" -S /bin/bash -cwd -m ae -l mem_available=1G -j y -N hibench_intel_xeon_x5650_ifort_53894da48505892bfa05693a52312bacb12c70c9 /opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibench/2024-10-10T12:11:44+02:00/53894da48505892bfa05693a52312bacb12c70c9/arch4_quick/intel_xeon_x5650/ifort/starbench.job "https://github.com/hibridon/hibridon" "g-raffy" "/mnt/home.ipr/graffy/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" "53894da48505892bfa05693a52312bacb12c70c9" "-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON -DCMAKE_Fortran_COMPILER=ifort -DBLA_VENDOR=Intel10_64lp" "ctest --output-on-failure -L ^arch4_quick$" "module load compilers/ifort/latest" "/usr/bin/cmake"
+Your job 17358 ("hibench_intel_xeon_x5650_ifort_53894da48505892bfa05693a52312bacb12c70c9") has been submitted
 ```
+
+`hibenchonphysix` script launches two `sge` jobs for each machine type in `alambix` cluster:
+- one job that performs a benchmark of hibridon with `gfortran` compiler
+- one job that performs a benchmark of hibridon with `ifort` compiler
+
+When the job successfully completes, it puts the results of the benchmark on `alambix`'s global work directory (eg `/opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibench/2024-10-10T12:11:44+02:00/53894da48505892bfa05693a52312bacb12c70c9/arch4_quick/intel_xeon_x5650/gfortran`)
+
+
+## graph the results of benchmarks
+
+`showresults` is a command line tool that graphs the results after they've been downloaded from the results directory (for example `/opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibench/2024-10-10T12:11:44+02:00`) to the hardcoded (at the moment) path `/home/graffy/work/starbench/starbench.git/usecases/ipr/hibench/results`
+
+```sh
+20241010-16:30:54 graffy@graffy-ws2:~/work/starbench/iprbench.git$ showresults
+```
+
diff --git a/test/__init__.py b/iprbench/__init__.py
similarity index 100%
rename from test/__init__.py
rename to iprbench/__init__.py
diff --git a/iprbench/hibench/__init__.py b/iprbench/hibench/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/usecases/ipr/hibench/hibenchonphysix.py b/iprbench/hibench/hibenchonphysix.py
similarity index 85%
rename from usecases/ipr/hibench/hibenchonphysix.py
rename to iprbench/hibench/hibenchonphysix.py
index 3ff8850..eaea373 100755
--- a/usecases/ipr/hibench/hibenchonphysix.py
+++ b/iprbench/hibench/hibenchonphysix.py
@@ -2,13 +2,14 @@
 # this script launches jobs to run hibridon benchmarks on physix cluster for the given version of hibridon (commit number)
 from typing import List, Tuple, Dict
 from argparse import ArgumentParser
-import os
 from os import getenv, makedirs
 import shutil
 from pathlib import Path
 import subprocess
 import re
 import logging
+import importlib.resources
+import venv
 
 
 HostFqdn = str  # eg 'physix90.ipr.univ-rennes1.fr'
@@ -168,6 +169,25 @@ class ClusterNodeDb:
         return (hosts, num_cores)
 
 
+def duplicate_this_virtualenv_to(duplicate_virtualenv_path: Path):
+    this_virtualenv_path = Path(getenv('VIRTUAL_ENV'))  # eg /home/graffy/work/starbench/iprbench.git/iprbench.venv
+    assert this_virtualenv_path.exists(), f'failed to find the root the virtual environment in use (VIRTUAL_ENV environment variable has the value {this_virtualenv_path})'
+
+    if duplicate_virtualenv_path.exists():
+        shutil.rmtree(duplicate_virtualenv_path)
+
+    cloner_virtualenv_path = Path('/tmp/venv_cloner.venv')
+    venv.create(cloner_virtualenv_path, with_pip=True)
+    subprocess.run(f'source {cloner_virtualenv_path}/bin/activate; pip install virtualenv-clone', shell=True, check=True, executable='/bin/bash')
+    subprocess.run(f'source {cloner_virtualenv_path}/bin/activate; virtualenv-clone {this_virtualenv_path} {duplicate_virtualenv_path}', shell=True, check=True, executable='/bin/bash')
+    shutil.rmtree(cloner_virtualenv_path)
+
+
+def archive_this_virtualenv_to(venv_archive_path: Path, venv_hardcoded_path: Path):
+    duplicate_this_virtualenv_to(venv_hardcoded_path)
+    subprocess.run(f'tar czvf {venv_archive_path} {venv_hardcoded_path.relative_to(venv_hardcoded_path.parent)}', shell=True, check=True, cwd=venv_hardcoded_path.parent, stdout=subprocess.DEVNULL)
+
+
 def launch_job_for_host_group(hibridon_version: GitCommitTag, host_group_id: HostGroupId, results_dir: Path, compiler_id: CompilerId, cmake_path: str):
 
     cluster_db = ClusterNodeDb()
@@ -178,8 +198,8 @@ def launch_job_for_host_group(hibridon_version: GitCommitTag, host_group_id: Hos
         return
 
     quick_test = 'arch4_quick'  # about 2s on a core i5 8th generation
-    representative_test = 'nh3h2_qma_long'  # about 10min on a core i5 8th generation    
-    use_test_mode = False
+    representative_test = 'nh3h2_qma_long'  # about 10min on a core i5 8th generation
+    use_test_mode = True
     if use_test_mode:
         benchmark_test = quick_test
     else:
@@ -220,24 +240,30 @@ def launch_job_for_host_group(hibridon_version: GitCommitTag, host_group_id: Hos
     makedirs(this_bench_dir, exist_ok=True)
 
     starbench_job_path = this_bench_dir / 'starbench.job'
-    this_file_path = Path(os.path.realpath(__file__))
-    scripts_dir = this_file_path.parent
-    starbench_root_path = scripts_dir.parent.parent.parent  # TODO: beurk
 
-    # create a copy of stargemm for use by the jobs (so that starbench_root_path can be modified without affecting the jobs)
-    jobs_starbench_dir = results_dir / 'starbench'  # the location of starbench source code for use by the jobs run by this command
-    shutil.copytree(starbench_root_path, jobs_starbench_dir, dirs_exist_ok=True)
+    job_venv_archive_path = results_dir / 'iprbench.venv.tgz'
+    iprbench_venv_hardcoded_path = Path('/tmp') / 'iprbench.venv'
+    if job_venv_archive_path.exists():
+        logging.info('skipping the creation of %s because it already exists (probably created for other jobs of the same bench)', job_venv_archive_path)
+    else:
+        # freeze this virtualenv so that all jobs related to this benchmark will use the same version of iprbench
+        logging.info('creating %s (the virtual environment that will be used in this bench by all its jobs at some point)', job_venv_archive_path)
+        archive_this_virtualenv_to(job_venv_archive_path, iprbench_venv_hardcoded_path)
 
     # create the job file (which embeds starbench.py)
     tags_dict = {
         # '<include:starbench.py>': scripts_dir / 'starbench.py',
-        '<starbench_job_path>': str(starbench_job_path)
+        '<starbench_job_path>': str(starbench_job_path),
+        '<iprbench_venv_hardcoded_path>': str(iprbench_venv_hardcoded_path),
+        '<iprbench_venv_archive_path>': str(job_venv_archive_path)
     }
-    substitute_tags(input_file_path=scripts_dir / 'starbench-template.job', tags_dict=tags_dict, output_file_path=starbench_job_path)
+    with importlib.resources.path('iprbench.resources', 'starbench-template.job') as job_template_path:
+        # job_template_path = importlib.resources..files('iprbench.resources') / 'hibench' / 'starbench-template.job'
+        substitute_tags(input_file_path=job_template_path, tags_dict=tags_dict, output_file_path=starbench_job_path)
     subprocess.run(['chmod', 'a+x', starbench_job_path], check=True)
 
-    command = f'{starbench_job_path} "{git_repos_url}" "{git_user}" "{git_pass_file}" "{hibridon_version}" "{" ".join(cmake_options)}" "{benchmark_command}" "{env_vars_bash_commands}" "{starbench_root_path}" "{cmake_path}"'
-    print(f'command = {command}')
+    command = f'{starbench_job_path} "{git_repos_url}" "{git_user}" "{git_pass_file}" "{hibridon_version}" "{" ".join(cmake_options)}" "{benchmark_command}" "{env_vars_bash_commands}" "{cmake_path}"'
+    logging.debug('command = %s', command)
 
     qsub_command = 'qsub'
     qsub_command += f' -pe smp {num_cores}'
@@ -249,7 +275,7 @@ def launch_job_for_host_group(hibridon_version: GitCommitTag, host_group_id: Hos
     qsub_command += ' -j y'  # merge stderr file into stdout file for easier reading of history of events
     qsub_command += f' -N hibench_{host_group_id}_{compiler_id}_{hibridon_version}'
     qsub_command += f' {command}'
-    print(f'qsub_command = {qsub_command}')
+    logging.debug('qsub_command = %s', qsub_command)
 
     subprocess.run(qsub_command, cwd=this_bench_dir, check=True, shell=True)
 
@@ -267,10 +293,10 @@ def launch_perf_jobs(hibridon_version: GitCommitTag, results_dir: Path, arch_reg
 
     cluster_db = ClusterNodeDb()
     all_host_groups = cluster_db.cpu_defs.keys()
-    
-    print(f'available host groups: {all_host_groups}')
+
+    logging.info('available host groups: %s', all_host_groups)
     host_groups = [host_group for host_group in all_host_groups if re.match(arch_regexp, host_group) is not None]
-    print(f'requested host groups: {host_groups}')
+    logging.info('requested host groups: %s', host_groups)
 
     for compiler in compilers:
         for host_group in host_groups:
@@ -290,6 +316,7 @@ def path_is_reachable_by_compute_nodes(path: Path):
 
 
 def main():
+    logging.basicConfig(level=logging.DEBUG)
     arg_parser = ArgumentParser(description='launches hibridon benchmark jobs on IPR\'s physix cluster', epilog='example:\n    --commit-id a3bed1c3ccfbca572003020d3e3d3b1ff3934fad')
     arg_parser.add_argument('--commit-id', type=str, required=True, help='the commit id of the version of code to benchmark')
     arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)')
@@ -311,6 +338,3 @@ def main():
         raise ValueError('the results path is expected to be on a disk that is accessible to all cluster nodes, and it doesn\'t seem to be the case for {results_dir}')
 
     launch_perf_jobs(hibridon_version, results_dir, arch_regexp, cmake_path)
-
-
-main()
diff --git a/usecases/ipr/hibench/showresults.py b/iprbench/hibench/showresults.py
similarity index 99%
rename from usecases/ipr/hibench/showresults.py
rename to iprbench/hibench/showresults.py
index 4af1d9a..5880662 100755
--- a/usecases/ipr/hibench/showresults.py
+++ b/iprbench/hibench/showresults.py
@@ -397,5 +397,3 @@ def main():
 
     create_graphs(engine)
 
-
-main()
diff --git a/iprbench/main.py b/iprbench/main.py
new file mode 100644
index 0000000..99c4176
--- /dev/null
+++ b/iprbench/main.py
@@ -0,0 +1 @@
+__version__ = '0.0.1'
\ No newline at end of file
diff --git a/usecases/ipr/hibench/starbench-template.job b/iprbench/resources/starbench-template.job
similarity index 72%
rename from usecases/ipr/hibench/starbench-template.job
rename to iprbench/resources/starbench-template.job
index bfb949d..739cb15 100644
--- a/usecases/ipr/hibench/starbench-template.job
+++ b/iprbench/resources/starbench-template.job
@@ -7,8 +7,7 @@ code_version="$4"  # git branch id or commit id eg : 'a3bed1c3ccfbca572003020d3e
 cmake_options="$5"  # eg '-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON'
 benchmark_command="$6"  # eg 'ctest -L ^arch4_quick$'
 env_vars_bash_commands="$7"  # defines extra environment variables prior to launch starbench. eg "export MKLROOT=/opt/intel/compilers_and_libraries_2020.1.217/linux/mkl"
-starbench_src_url="$8"  # location of starbench source (eg /opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibench/2024-10-08T16:39:52+02:00/starbench)
-cmake_path="$9"   # eg '/opt/cmake/cmake-3.23.0/bin/cmake'
+cmake_path="$8"   # eg '/opt/cmake/cmake-3.23.0/bin/cmake'
 executed_by_sge=''
 
 if [ "${JOB_ID}" = '' ]
@@ -33,27 +32,29 @@ then
 fi
 mkdir -p "${temp_dir}"
 
-# create a virtual environment to install starbench
-venv_path="${temp_dir}/starbench.venv"
-python3 -m virtualenv "$venv_path"
-if [ $? != 0 ]
+iprbench_venv_path='<iprbench_venv_hardcoded_path>'
+iprbench_venv_parent=$(dirname "$iprbench_venv_path")
+iprbench_venv_archive_path='<iprbench_venv_archive_path>'
+echo "unarchiving virtual environment ${iprbench_venv_archive_path} to ${iprbench_venv_parent}"
+pushd "${iprbench_venv_parent}"
+	tar xzvf "${iprbench_venv_archive_path}"
+popd
+if [ ! -d "${iprbench_venv_path}" ]
 then
-	echo "failed to create the virtual environment $venv_path"
-	exit 1
-fi
-source "$venv_path/bin/activate"
-if [ $? != 0 ]
-then
-	echo "failed to activate the virtual environment $venv_path"
+	echo "failed to find expected directory ${iprbench_venv_path}"
 	exit 1
 fi
 
-pip install $starbench_src_url
+echo "using the iprbench virtual environment that has been created for this bench: ${iprbench_venv_path}"
+source "$iprbench_venv_path/bin/activate"
 if [ $? != 0 ]
 then
-	echo "failed to install starbench ($starbench_src_url) in the virtual environment $venv_path"
+	echo "failed to activate the virtual environment $iprbench_venv_path"
 	exit 1
 fi
+echo "VIRTUAL_ENV = $VIRTUAL_ENV"
+# show the list of packages installed in the virtual environment
+pip list
 
 output_dir="${temp_dir}"
 num_cores=${NSLOTS}
@@ -94,14 +95,14 @@ eval ${command}
 if [ "$?" = '0' ]
 then
 	echo "the command ${command} succeeded"
-	rsync -va --exclude 'build' --exclude 'source.git' "${output_dir}/"  ${launch_dir}/  # exclude the source.git and build directories (one for each worker) because they are big and not that precious
+	rsync -va --exclude 'build' --exclude 'source.git' --exclude "${iprbench_venv_path}" "${output_dir}/"  ${launch_dir}/  # exclude the source.git and build directories (one for each worker) because they are big and not that precious
 	# TMPDIR will be deleted by sge at the end of the job
 else
 	if [ ${executed_by_sge} = 'true' ]
 	then
 		# TMPDIR will be deleted by sge at the end of the job. Backup data for investigation
 		backup_dir="/opt/ipr/cluster/work.local/$(whoami)/${JOB_ID}"
-		echo "moving ${output_dir} to ${backup_dir} to that it doesn't get deleted by sge at the end of the job. This way, data gets a chance to be investigated then manually deleted." 
+		echo "moving ${output_dir} to ${backup_dir} so that it doesn't get deleted by sge at the end of the job. This way, data gets a chance to be investigated then manually deleted." 
 		mv "${output_dir}" "${backup_dir}"
 	fi
 	echo "the command ${command} failed... the output data dir (${output_dir}) is expected to be cleaned up manually after investigation"
diff --git a/pyproject.toml b/pyproject.toml
index 2bff8c2..cb97dff 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,15 +3,19 @@ requires = ["setuptools"]
 build-backup = "setuptools.build_meta"
 
 [project]
-name = "starbench"
+name = "iprbench"
 dynamic = ["version"]  # the list of fields whose values are dicovered by the backend (eg __version__)
-description = "a tool to benchmark a git cmake application using embarassingly parallel runs"
+description = "benchmarks for IPR (Institut de Physique de Rennes) cluster"
 readme = "README.md"
 keywords = ["benchmark", "hpc", "parallel", 'openmp']
 license = {text = "MIT License"}
 dependencies = [
+    "pandas",
+    "matplotlib",
+    "sqlalchemy",
 #   "cocluto >= 1.2"
 #    "cocluto@git+https://git.ipr.univ-rennes.fr/cellinfo/cocluto"
+    "starbench@git+https://github.com/g-raffy/starbench"
 ]
 requires-python = ">= 3.8"
 authors = [
@@ -19,10 +23,17 @@ authors = [
 ]
 
 [project.scripts]
-starbench = "starbench.main:main"
+hibenchonphysix = "iprbench.hibench.hibenchonphysix:main"
+showresults = "iprbench.hibench.showresults:main"
 
 [project.urls]
 Repository = "https://github.com/g-raffy/starbench"
 
+[tool.setuptools]
+packages = ["iprbench", "iprbench.hibench"]
+
 [tool.setuptools.dynamic]
-version = {attr = "starbench.main.__version__"}
+version = {attr = "iprbench.main.__version__"}
+
+[tool.setuptools.package-data]
+iprbench = ["resources/**/*"]
diff --git a/src/starbench/__init__.py b/src/starbench/__init__.py
deleted file mode 100644
index 5b7a097..0000000
--- a/src/starbench/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# from .main import starbench_cmake_app
-# __all__ = [starbench_cmake_app]
diff --git a/src/starbench/core.py b/src/starbench/core.py
deleted file mode 100755
index f8cca14..0000000
--- a/src/starbench/core.py
+++ /dev/null
@@ -1,284 +0,0 @@
-#!/usr/bin/env python3
-'''starbench is an application that is able to measure the execution time of a user software suite in various conditions (different build modes and different execution modes)
-
-'''
-__version__ = '1.0.0'
-import threading
-import subprocess
-import os
-import sys
-from typing import List, Dict, Optional, Tuple, Callable
-from datetime import datetime
-from pathlib import Path
-from abc import ABC, abstractmethod
-# from typing import ForwardRef
-try:
-    from typing import ForwardRef  # type: ignore pylint: disable=ungrouped-imports
-except ImportError:
-    # python 3.6
-    from typing import _ForwardRef as ForwardRef
-
-assert sys.version_info >= (3, 5, 0), 'this code requires at least python 3.5'  # type hints in arguments
-
-
-class StarBenchException(Exception):
-    '''base exception for user errors detected by starbench'''
-
-
-RunId = int  # identifier of a run
-WorkerId = int  # identifier of a worker (a run is performed on a worker)
-DurationInSeconds = float
-ProcessId = int
-ReturnCode = int
-Url = str
-GitCommitId = str
-
-
-class Run():
-    """represents a run of a run of the benchmarked command within its CommandPerfEstimator
-    """
-    id: RunId  # uniquely identifies a run within its CommandPerfEstimator instance
-    worker_id: WorkerId  # the worker used for this run (number of workers = number of parallel runs)
-    pid: Optional[ProcessId]  # the process identifier of the process used by the command
-    start_time: datetime  # the time at which the command process has started
-    return_code: ReturnCode  # the exit code of the command process
-    end_time: Optional[datetime]  # the time at which the command process has ended. None if the process is still running
-
-    def __init__(self, run_id: RunId, worker_id: WorkerId):
-        self.id = run_id
-        self.worker_id = worker_id
-        self.pid = None
-        self.return_code = 0
-        self.start_time = datetime.now()
-        self.end_time = None
-
-    def has_finished(self) -> bool:
-        """indicates if this run has finished"""
-        return self.end_time is not None
-
-    def get_duration(self) -> DurationInSeconds:
-        """returns the duration of this run, provided it has finished
-        """
-        assert self.has_finished()
-        return (self.end_time - self.start_time).total_seconds()
-
-
-CommandPerfEstimator = ForwardRef('CommandPerfEstimator')
-
-
-class IStarBencherStopCondition(ABC):
-    """abstract handler that decides if the given CommandPerfEstimator has enough runs to estimate the performance or should trigger new runs
-
-    """
-    @abstractmethod
-    def should_stop(self, star_bencher: CommandPerfEstimator) -> bool:
-        """decides if the given CommandPerfEstimator instance should trigger new runs
-
-        This method is called at the end of each run, to decide if another run should be triggered or not.
-        """
-
-
-class StopAfterSingleRun(IStarBencherStopCondition):
-    """a stop condition that causes the given CommandPerfEstimator to never start new runs
-
-    as a result, this causes the given CommandPerfEstimator to just use one single run of the command to estimate its performance.
-    """
-    def __init__(self):
-        pass
-
-    def should_stop(self, star_bencher: CommandPerfEstimator):
-        # never start a new run
-        return True
-
-
-class StopWhenConverged(IStarBencherStopCondition):
-    """a stop condition that triggers when the just completed run doesn't have much effect on the average run's duration
-    """
-    def __init__(self, max_error: float = 0.01):
-        self.max_error = max_error
-        self._last_mean_duration = None
-
-    def should_stop(self, star_bencher: CommandPerfEstimator) -> bool:
-        do_stop = False
-        mean_duration, _num_runs = star_bencher.get_run_mean_duration()
-        print(f'mean_duration = {mean_duration}')
-        if self._last_mean_duration is not None:
-            diff = abs(mean_duration - self._last_mean_duration)
-            print(f'diff = {diff}')
-            if diff < self.max_error:
-                do_stop = True
-        self._last_mean_duration = mean_duration
-        return do_stop
-
-
-class CommandPerfEstimator():  # (false positive) pylint: disable=function-redefined
-    '''a command runner that runs a given command multiple times and measures the average execution duration
-
-    the 'star' term comes from hpl's stadgemm benchmark, where we launch `n` independent programs on `n` cores
-    '''
-    run_command: List[str]  # the command that this instance of CommandPerfEstimator is expected to run (eg: ['ctest', '--output-on-failure', '-L', '^arch4_quick$']). The command supports the following tags:
-    run_command_cwd: Path  # the current directory to use when executing run_command
-    stdout_filepath: Path  # the path of the file that records the standard output of run_command
-    stderr_filepath: Path  # the path of the file that records the standard error of run_command
-    num_cores_per_run: int  # the max number of threads used by each run
-    num_parallel_runs: int  # how many times run_command is run simultaneously
-    max_num_cores: int  # the maximum allowed number of cores for this CommandPerfEstimator
-    stop_condition: IStarBencherStopCondition  # the condition that is used so that this CommandPerfEstimator can decide to stop launching commands
-    stop_on_error: bool
-    _next_run_id: int
-    _runs: Dict[int, Run]
-    _last_mean_duration: Optional[DurationInSeconds]
-    _num_runs: int
-    _runs_lock: threading.Lock
-    _finished_event: threading.Event
-
-    def __init__(self, run_command: List[str], num_cores_per_run: int, num_parallel_runs: int, max_num_cores: int, stop_condition: IStarBencherStopCondition, stop_on_error=True, run_command_cwd: Path = None, stdout_filepath: Path = None, stderr_filepath: Path = None):
-        assert num_cores_per_run * num_parallel_runs <= max_num_cores
-        self.run_command = run_command
-        self.run_command_cwd = run_command_cwd
-        self.stdout_filepath = stdout_filepath
-        self.stderr_filepath = stderr_filepath
-        self.num_cores_per_run = num_cores_per_run
-        self.num_parallel_runs = num_parallel_runs
-        self.max_num_cores = max_num_cores
-        self.stop_condition = stop_condition
-        self.stop_on_error = stop_on_error
-        self._next_run_id = 0
-        self._runs = {}
-        self._last_mean_duration = None
-        self._num_runs = 0
-        self._runs_lock = threading.Lock()
-        self._finished_event = threading.Event()
-
-    def popen_and_call(self, popen_args: List[str], on_exit: Callable[[ProcessId, ReturnCode, RunId], None], run_id: RunId, cwd: Path, stdout_filepath: Path = None, stderr_filepath: Path = None):
-        """
-        Runs the given args in a subprocess.Popen, and then calls the function
-        on_exit when the subprocess completes.
-        on_exit is a callable object, and popen_args is a list/tuple of args that
-        would give to subprocess.Popen.
-        """
-        def run_in_thread(popen_args: List[str], on_exit: Callable[[ProcessId, ReturnCode, RunId], None]):
-            stdout = None
-            stderr = None
-            returncode = -1
-            pid = -1
-            streams_are_ok = True
-            try:
-                # with open(stdout_filepath, 'w', encoding='utf8') as stdout, open(stderr_filepath, 'w', encoding='utf8') as stderr:
-                if stdout_filepath is not None:
-                    stdout = open(stdout_filepath, 'w', encoding='utf8')
-                if stderr_filepath is not None:
-                    stderr = open(stderr_filepath, 'w', encoding='utf8')
-            except:
-                print(f'failed to open {stdout_filepath} or {stderr_filepath} in write mode')
-                streams_are_ok = False
-            if streams_are_ok:
-                try:
-                    env = os.environ.copy()
-                    # restrict the number of threads used by openmp
-                    env['OMP_NUM_THREADS'] = f'{self.num_cores_per_run}'
-                    # restrict the nu,ber of threads used by intel math kernel library
-                    env['MKL_NUM_THREADS'] = f'{self.num_cores_per_run}'
-                    proc = subprocess.Popen(popen_args, cwd=cwd, stdout=stdout, stderr=stderr, env=env)
-                    pid = proc.pid
-                    proc.wait()
-                    returncode = proc.returncode
-                except:
-                    print(f'command failed: {popen_args}')
-            on_exit(pid, returncode, run_id)
-            return
-        thread = threading.Thread(target=run_in_thread, args=(popen_args, on_exit))
-        thread.start()
-        # returns immediately after the thread starts
-        return thread
-
-    def get_run_mean_duration(self) -> Tuple[DurationInSeconds, int]:
-        """returns the average duration of all completed runs of this CommandPerfEstimator instance
-        """
-        duration_sums = 0.0  # in python3.6+, replace with duration_sums: float = 0.0
-        num_finished_runs = 0  # in python3.6+, replace with num_finished_runs: int = 0
-        with self._runs_lock:
-            for run in self._runs.values():
-                if run.has_finished():
-                    num_finished_runs += 1
-                    duration_sums += run.get_duration()
-        assert num_finished_runs > 0
-        return duration_sums / num_finished_runs, num_finished_runs
-
-    def _all_runs_have_finished(self):
-        with self._runs_lock:
-            for run in self._runs.values():
-                if not run.has_finished():
-                    return False
-        return True
-
-    def on_exit(self, pid: ProcessId, return_code: ReturnCode, run_id: RunId):
-        """method called when the command executed by a run ends. Unless the stop condition is met, a new run is started.
-
-        pid: the process identifier of the process of the run that just finished
-        return_code: the return code of the process of the run that just finished
-        run_id: the run that just completed
-        """
-        end_time = datetime.now()
-        # print(self, pid, run_id)
-        run = self._runs[run_id]
-        run.pid = pid
-        run.end_time = end_time
-        run.return_code = return_code
-
-        do_stop = False
-        if self.stop_on_error and run.return_code != 0:
-            do_stop = True
-        else:
-            do_stop = self.stop_condition.should_stop(self)
-        if not do_stop:
-            # print('adding a run')
-            self._start_run(run.worker_id)  # reuse the same worker as the run that has just finished
-        if self._all_runs_have_finished():
-            # tell the main thread that all the runs have finished
-            self._finished_event.set()
-
-    @staticmethod
-    def _interpret_tags(tagged_string: str, tags_value: Dict[str, str]) -> str:
-        untagged_string = tagged_string
-        for tag_id, tag_value in tags_value.items():
-            assert isinstance(untagged_string, str)
-            untagged_string = untagged_string.replace(tag_id, tag_value)
-        return untagged_string
-
-    def _start_run(self, worker_id: WorkerId):
-        """starts a run using the given worker"""
-        tags_value = {
-            '<worker_id>': f'{worker_id:03d}'
-        }
-        run_command = [CommandPerfEstimator._interpret_tags(s, tags_value) for s in self.run_command]
-        run_command_cwd = CommandPerfEstimator._interpret_tags(str(self.run_command_cwd), tags_value)
-        stdout_filepath = None
-        if self.stdout_filepath is not None:
-            stdout_filepath = CommandPerfEstimator._interpret_tags(str(self.stdout_filepath), tags_value)
-            Path(stdout_filepath).parent.mkdir(exist_ok=True)
-        stderr_filepath = None
-        if self.stderr_filepath is not None:
-            stderr_filepath = CommandPerfEstimator._interpret_tags(str(self.stderr_filepath), tags_value)
-            Path(stderr_filepath).parent.mkdir(exist_ok=True)
-
-        with self._runs_lock:
-            run = Run(self._next_run_id, worker_id)
-            self._next_run_id += 1
-            self._runs[run.id] = run
-            _run_thread = self.popen_and_call(popen_args=run_command, on_exit=self.on_exit, run_id=run.id, cwd=run_command_cwd, stdout_filepath=stdout_filepath, stderr_filepath=stderr_filepath)  # noqa:F841
-
-    def run(self) -> DurationInSeconds:
-        '''performs the runs of the command and returns the runs' average duration'''
-        print(f"executing the following command in parallel ({self.num_parallel_runs} parallel runs) : '{str(self.run_command)}'")
-        for worker_id in range(self.num_parallel_runs):
-            self._start_run(worker_id)
-        # wait until all runs have finished
-        self._finished_event.wait()
-        with self._runs_lock:
-            workers_success = [run.return_code == 0 for run in self._runs.values()]
-            if not all(workers_success):
-                raise StarBenchException(f'at least one run failed (workers_success = {workers_success})')
-        mean_duration, num_runs = self.get_run_mean_duration()
-        print(f'mean duration : {mean_duration:.3f} s ({num_runs} runs)')
-        return mean_duration
diff --git a/src/starbench/main.py b/src/starbench/main.py
deleted file mode 100755
index 7535ca1..0000000
--- a/src/starbench/main.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python3
-'''starbench is an application that is able to measure the execution time of a user software suite in various conditions (different build modes and different execution modes)
-
-'''
-__version__ = '1.0.0'
-import argparse
-from abc import ABC, abstractmethod
-import subprocess
-from typing import List, Optional
-from pathlib import Path
-from .core import GitCommitId, Url, CommandPerfEstimator, StopAfterSingleRun
-
-
-class IFileTreeProvider(ABC):
-
-    @abstractmethod
-    def get_source_tree_path(self) -> Path:
-        pass
-
-
-class ExistingDir(IFileTreeProvider):
-    dir_path: Path
-
-    def __init__(self, dir_path: Path):
-        self.dir_path = dir_path
-
-    def get_source_tree_path(self) -> Path:
-        return self.dir_path
-
-
-class GitRepos(IFileTreeProvider):
-    git_repos_url: Url
-    git_user: Optional[str]
-    git_password: Optional[str]
-    code_version: Optional[GitCommitId]
-    src_dir: Optional[Path]  # the temporary directory used to populate the source code
-
-    def __init__(self, git_repos_url: Url, git_user: Optional[str] = None, git_password: Optional[str] = None, code_version: Optional[GitCommitId] = None, src_dir: Optional[Path] = None):
-        self.git_repos_url = git_repos_url
-        self.git_user = git_user
-        self.git_password = git_password
-        self.code_version = code_version
-        self.src_dir = src_dir
-
-    def get_source_tree_path(self) -> Path:
-        self.src_dir.mkdir(exist_ok=True)
-        git_credentials = []
-        if self.git_user:
-            git_credentials.append(self.git_user)
-        if self.git_password:
-            git_credentials.append(self.git_password)
-        git_repos_url = self.git_repos_url
-        if len(git_credentials) != 0:
-            git_repos_url = git_repos_url.replace('https://', f"https://{':'.join(git_credentials)}@")
-        # src_dir.mkdir(exist_ok=True)
-        subprocess.run(['git', 'clone', f'{str(self.git_repos_url)}', str(self.src_dir)], cwd=str(self.src_dir), check=True)
-        if self.code_version:
-            subprocess.run(['git', 'checkout', f'{self.code_version}'], cwd=str(self.src_dir), check=True)
-        return self.src_dir
-
-
-def starbench_cmake_app(source_code_provider: IFileTreeProvider, tmp_dir: Path, num_cores: int, benchmark_command: List[str], cmake_options: Optional[List[str]] = None, cmake_exe_location: Path = None):
-    """
-    tests_to_run : regular expression as understood by ctest's -L option. eg '^arch4_quick$'
-    """
-    src_dir = source_code_provider.get_source_tree_path()
-    # we need one build for each parallel run, otherwise running ctest on parallel would overwrite the same file, which causes the test to randomly fail depnding on race conditions
-    worker_dir = tmp_dir / 'worker<worker_id>'
-    build_dir = worker_dir / 'build'
-    if cmake_options is None:
-        cmake_options = []
-    print(f'creating build directory {worker_dir}')
-    create_build_dir = CommandPerfEstimator(
-        run_command=['mkdir', '-p', str(build_dir)],
-        num_cores_per_run=1,
-        num_parallel_runs=num_cores,
-        max_num_cores=num_cores,
-        stop_condition=StopAfterSingleRun(),
-        run_command_cwd=Path('/tmp'),
-        stdout_filepath=worker_dir / 'createdir_stdout.txt',
-        stderr_filepath=worker_dir / 'createdir_stderr.txt')
-    _create_build_dir_duration = create_build_dir.run()  # noqa: F841
-    # build_dir.mkdir(exist_ok=True)
-
-    print(f'configuring {src_dir} into {build_dir} ...')
-    cmake_prog = 'cmake'
-    if cmake_exe_location:
-        cmake_prog = str(cmake_exe_location)
-    configure = CommandPerfEstimator(
-        run_command=[cmake_prog] + cmake_options + [str(src_dir)],
-        num_cores_per_run=1,
-        num_parallel_runs=num_cores,
-        max_num_cores=num_cores,
-        stop_condition=StopAfterSingleRun(),
-        run_command_cwd=build_dir,
-        stdout_filepath=worker_dir / 'configure_stdout.txt',
-        stderr_filepath=worker_dir / 'configure_stderr.txt')
-    _configure_duration = configure.run()  # noqa: F841
-
-    print(f'building {build_dir} ...')
-    build = CommandPerfEstimator(
-        run_command=['make'],
-        num_cores_per_run=1,
-        num_parallel_runs=num_cores,
-        max_num_cores=num_cores,
-        stop_condition=StopAfterSingleRun(),
-        run_command_cwd=build_dir,
-        stdout_filepath=worker_dir / 'build_stdout.txt',
-        stderr_filepath=worker_dir / 'build_stderr.txt')
-    _build_duration = build.run()  # noqa: F841
-
-    print(f'benchmarking {build_dir} ...')
-    stop_condition = StopAfterSingleRun()
-    bench = CommandPerfEstimator(
-        run_command=benchmark_command,
-        num_cores_per_run=1,
-        num_parallel_runs=num_cores,
-        max_num_cores=num_cores,
-        stop_condition=stop_condition,
-        run_command_cwd=build_dir,
-        stdout_filepath=worker_dir / 'bench_stdout.txt',
-        stderr_filepath=worker_dir / 'bench_stderr.txt')
-    mean_duration = bench.run()
-    print(f'duration : {mean_duration:.3f} s' % ())
-
-
-def main():
-    '''main program'''
-
-    example_text = '''example:
-
-    %(prog)s --git-repos-url https://github.com/hibridon/hibridon --code-version a3bed1c3ccfbca572003020d3e3d3b1ff3934fad --git-user g-raffy --git-pass-file "$HOME/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" --num-cores 2 --output-dir=/tmp/hibench --cmake-path=/opt/cmake/cmake-3.23.0/bin/cmake --cmake-option=-DCMAKE_BUILD_TYPE=Release --cmake-option=-DBUILD_TESTING=ON --benchmark-command='ctest --output-on-failure -L ^arch4_quick$'
-
-    '''
-
-    parser = argparse.ArgumentParser(description='performs a benchmark on a cmake buildable app hosted on a git repository', epilog=example_text, formatter_class=argparse.RawDescriptionHelpFormatter)
-    parser.add_argument('--git-repos-url', required=True, help='the url of the code to benchmark (eg https://github.com/hibridon/hibridon)')
-    parser.add_argument('--code-version', help='the version of the code to use; either a branch or a commit id (eg a3bed1c3ccfbca572003020d3e3d3b1ff3934fad)')
-    parser.add_argument('--git-user', help='the git user to use to clone the code repository')
-    password_group = parser.add_mutually_exclusive_group()
-    password_group.add_argument('--git-pass-file', help='the path to a file containing the password (or personal access token)')
-    password_group.add_argument('--git-pass', type=str, help='the password (or personal access token) to use (not recommended for security reasons)')
-    parser.add_argument('--num-cores', type=int, required=True, help='the number of cores that the benchmark will use')
-    parser.add_argument('--output-dir', type=Path, required=True, help='where the output files will be placed')
-    parser.add_argument('--cmake-path', type=Path, help='the path to the cmake executable to use in case a specific cmake is wanted')
-    parser.add_argument('--cmake-option', type=str, action='append', help='additional option passed to cmake in the configure step (use this flag multiple times if you need more than one cmake option)')
-    parser.add_argument('--benchmark-command', required=True, type=str, help='the command to benchmark')
-    args = parser.parse_args()
-
-    git_user = args.git_user
-    git_repos_url = args.git_repos_url
-
-    git_password = None
-    if args.git_pass:
-        git_password = args.git_pass
-    elif args.git_pass_file:
-        with open(args.git_pass_file, 'r', encoding='utf8') as f:
-            git_password = f.readline().replace('\n', '')  # os.environ['HIBRIDON_REPOS_PAT']
-
-    source_tree_provider = GitRepos(git_repos_url=git_repos_url, code_version=args.code_version, git_user=git_user, git_password=git_password, src_dir=args.output_dir / 'source.git')
-
-    starbench_cmake_app(source_tree_provider, tmp_dir=args.output_dir, num_cores=args.num_cores, cmake_options=args.cmake_option, benchmark_command=args.benchmark_command.split(' '), cmake_exe_location=args.cmake_path)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/test/mamul1/CMakeLists.txt b/test/mamul1/CMakeLists.txt
deleted file mode 100644
index 80095ea..0000000
--- a/test/mamul1/CMakeLists.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-
-enable_language (Fortran)
-
-set(MAMUL1_USE_MAGMA "OFF" CACHE BOOL "if set, mamul1 build uses magma (matrix algebra on gpu)")
-
-set(MAMUL1_MAGMA_API "CPU_MEM_API" CACHE STRING "which magma API to use when building mamul1: CPU_MEM_API for BLAS compatible API (uses matrices stored on CPU memory) or GPU_MEM_API (use matrices stored on GPU memory)")
-
-add_executable(mamul1 mamul1.F90)
-
-if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
-	# Allow arbitrary long lines. Needed as preprocessing could generate long line lengths.
-	target_compile_options(mamul1 PUBLIC -ffree-line-length-none)
-elseif (Fortran_COMPILER_NAME STREQUAL "ifort")
-	# Intel (ifort)
-	target_compile_options(mamul1 PUBLIC -no-wrap-margin)
-endif()
-
-
-if (MAMUL1_USE_MAGMA)
-	find_package( MAGMA REQUIRED )
-	if( MAMUL1_MAGMA_API STREQUAL "CPU_MEM_API" )
-		target_compile_definitions(mamul1 PUBLIC USE_MAGMA_DGEMM)
-	elseif( MAMUL1_MAGMA_API STREQUAL "GPU_MEM_API" )
-		target_compile_definitions(mamul1 PUBLIC USE_MAGMA_DGEMM_GPU)
-	else()
-		message(FATAL_ERROR "unexpected value for MAMUL1_MAGMA_API : ${MAMUL1_MAGMA_API}")
-	endif()
-	message(STATUS "MAGMA_INCLUDES=${MAGMA_INCLUDES}")
-	include_directories("${MAGMA_INCLUDES}")
-	target_link_libraries(mamul1 "${MAGMA_LIBRARIES}")
-else()
-	find_package( BLAS REQUIRED )
-	find_package( LAPACK REQUIRED )
-	# message("BLAS_LIBRARIES=${BLAS_LIBRARIES}")
-	# message("LAPACK_LIBRARIES=${LAPACK_LIBRARIES}")
-	target_compile_definitions(mamul1 PUBLIC USE_DGEMM)
-
-	# Link Blas and Lapack libraries
-	target_link_libraries(mamul1 "${LAPACK_LIBRARIES}")
-	target_link_libraries(mamul1 "${BLAS_LIBRARIES}")
-endif()
-
-install(TARGETS mamul1)
diff --git a/test/mamul1/mamul1.F90 b/test/mamul1/mamul1.F90
deleted file mode 100644
index c31447b..0000000
--- a/test/mamul1/mamul1.F90
+++ /dev/null
@@ -1,339 +0,0 @@
-#define MAMUL1_VERSION "1.0.0"
-
-#define magma_devptr_t integer(kind=8)
-subroutine print_usage(prog_path)
-    character(len=*), intent(in) :: prog_path
-    character(len=80) :: build_variant
-#if defined(USE_MAGMA_DGEMM_GPU)
-    build_variant='gpu'
-#elif defined(USE_DGEMM)
-    build_variant='cpu'
-#else
-    build_variant='unknown'
-#endif
-    write(6,'("mamul1 v",a," (variant:",a,"): benchmark performs a square matrix multiplication in double precision")') MAMUL1_VERSION, trim(build_variant);
-    write(6,'()');
-    write(6,'("Usage: ",a," <NDIM> <NUM_LOOPS>")') trim(prog_path);
-    write(6,'("   <NDIM> positive integer representing the size of the square matrices to multiply ")');
-    write(6,'("   <NUM_LOOPS> positive integer representing the number of times the multiplication is performed")');
-end subroutine
-
-program mamul1
-
-implicit none
-
-
-integer :: argc, info, ndim, num_loops
-
-character(len=32) :: arg0, arg1, arg2
-
-
-call get_command_argument(0,arg0)
-
-argc = command_argument_count()
-if (argc /= 2) then
-    call print_usage(trim(arg0))
-    ! write(6,'("Usage: ",a," NDIM NUM_LOOPS, where NDIM is a positive integer")') trim(arg0);
-    stop
-end if
-
-call get_command_argument(1,arg1,status=info)
-if (info /= 0) then
-    write(6,'("Error reading argument: info = ",i2)') info
-    call print_usage(trim(arg0))
-stop
-end if
-
-call get_command_argument(2,arg2,status=info)
-if (info /= 0) then
-    write(6,'("Error reading argument: info = ",i2)') info
-    call print_usage(trim(arg0))
-stop
-end if
-
-read(arg1,*,iostat=info) ndim
-if (info /= 0) then
-    write(6,'("Error converting ndim argument to integer: info = ",i2)') info
-    call print_usage(trim(arg0))
-stop
-end if
-
-read(arg2,*,iostat=info) num_loops
-if (info /= 0) then
-    write(6,'("Error converting num_loops argument to integer: info = ",i2)') info
-    call print_usage(trim(arg0))
-stop
-end if
-
-
-if (ndim < 1) then
-    call print_usage(trim(arg0))
-stop
-end if
-
-    call test_dgemm(ndim, num_loops)
-
-stop
-end program mamul1
-
-subroutine set_random_seed(seed)
-    integer :: seed
-    integer :: seed_array_size
-    INTEGER, ALLOCATABLE :: seed_array (:)
-    CALL RANDOM_SEED (SIZE = seed_array_size)  ! I is set to the size of
-    !                              ! the seed array
-    ALLOCATE (seed_array(seed_array_size))
-    seed_array = seed
-    CALL RANDOM_SEED (PUT=seed_array(1:seed_array_size))
-end subroutine
-
-subroutine print_matrix(mat, ndim)
-    implicit none
-    integer, parameter :: dp = kind(1.0d0)
-    real(dp), intent(in) :: mat(ndim, ndim)
-    integer, intent(in) :: ndim
-    integer :: irow
-    do irow = 1, ndim
-        write(6, *) mat(irow,:)
-    end do
-end subroutine
-
-! square matrix multiplication
-subroutine sqmatmul(amat, bmat, cmat, ndim)
-#if defined(USE_MAGMA_DGEMM_GPU)
-    use magma, only: magmaf_init, magmaf_finalize
-    use magma, only: magmaf_queue_create, magmaf_queue_destroy
-    use magma, only: magmaf_dmalloc, magmaf_free
-    use magma, only: magmaf_dsetmatrix, magmaf_dgetmatrix
-    use magma, only: magmablasf_dgemm
-#endif
-    real*8, intent(in) :: amat(ndim,ndim)
-    real*8, intent(in) :: bmat(ndim,ndim)
-    real*8, intent(out) :: cmat(ndim,ndim)
-    integer :: lda, ldb, ldc
-    integer :: info
-
-    real :: time_before, time_after
-    integer(8) :: num_ops
-    real :: gflops
-
-#ifdef USE_MAGMA_DGEMM_GPU
-    magma_devptr_t :: d_amat
-    magma_devptr_t :: d_bmat
-    magma_devptr_t :: d_cmat
-    magma_devptr_t :: queue  !! really a CPU pointer
-#endif
-    lda = ceiling(real(ndim)/32)*32
-    ldb = ceiling(real(ndim)/32)*32
-    ldc = ceiling(real(ndim)/32)*32
-
-
-#if defined(USE_MAGMA_DGEMM_GPU)
-    !! allocate GPU memory
-    write(6,'("DEBUG: before matrix A gpu memory allocation (",i0," doubles)")') lda * ndim
-    info = magmaf_dmalloc( d_amat, lda*ndim )
-    if (d_amat == 0) then
-        print "(a)", "failed to allocate d_amat"
-        return
-    endif
-    write(6,'("DEBUG: before matrix B gpu memory allocation (",i0," doubles)")') ldb * ndim
-    info = magmaf_dmalloc( d_bmat, ldb*ndim )
-    if (d_bmat == 0) then
-        print "(a)", "failed to allocate d_bmat"
-        return
-    endif
-    write(6,'("DEBUG: before matrix C gpu memory allocation (",i0," doubles)")') ldc * ndim
-    info = magmaf_dmalloc( d_cmat, ldc*ndim )
-    if (d_cmat == 0) then
-        print "(a)", "failed to allocate d_cmat"
-        return
-    endif
-
-    ! copy A to dA and B to dB
-    call magmaf_queue_create( 0, queue )
-    write(6,'("DEBUG: queue = ",i0)') queue
-    if (queue == 0) then
-        print "(a)", "failed to create a queue"
-        return
-    endif
-
-    write(6,*) 'DEBUG: copying matrix A from CPU to GPU memory'
-    call magmaf_dsetmatrix( ndim, ndim, amat, ndim, d_amat, lda, queue )
-    write(6,*) 'DEBUG: copying matrix B from CPU to GPU memory'
-    call magmaf_dsetmatrix( ndim, ndim, bmat, ndim, d_bmat, ldb, queue )
-
-    call cpu_time(time_before)
-    write (6,*) 'before magmablasf_dgemm, time=', time_before
-
-    call magmablasf_dgemm ('N', 'N', ndim, ndim, ndim, 1.0d0, d_amat, lda, d_bmat, ldb, 0.0d0, d_cmat, ldc, queue)
-    call magmaf_queue_sync(queue)
-
-    call cpu_time(time_after)
-    num_ops = real(ndim) * real(ndim) * real(ndim) * 2
-    gflops = num_ops / (time_after - time_before) / 1.0e9
-    write (6,*) 'after magmablasf_dgemm, time=', time_after
-    write (6,*) 'magmablasf_dgemm (from gpu memory to gpu memory) duration :', (time_after - time_before), '(', gflops, ' gflops)'
-
-    write(6,*) 'DEBUG: copying matrix C from GPU to CPU memory'
-    call magmaf_dgetmatrix( ndim, ndim, d_cmat, ldc, cmat, ndim, queue )
-    call magmaf_queue_destroy( queue )
-
-    info = magmaf_free(d_cmat)
-    info = magmaf_free(d_bmat)
-    info = magmaf_free(d_amat)
-
-#endif
-
-#ifdef USE_DGEMM
-    ! subroutine dgemm 	( 	character  	TRANSA,
-    ! 		character  	TRANSB,
-    ! 		integer  	M,
-    ! 		integer  	N,
-    ! 		integer  	K,
-    ! 		double precision  	ALPHA,
-    ! 		double precision, dimension(lda,*)  	A,
-    ! 		integer  	LDA,
-    ! 		double precision, dimension(ldb,*)  	B,
-    ! 		integer  	LDB,
-    ! 		double precision  	BETA,
-    ! 		double precision, dimension(ldc,*)  	C,
-    ! 		integer  	LDC 
-    ! 	) 	        
-    call dgemm('N', 'N', ndim, ndim, ndim, 1.0d0, amat, ndim, bmat, ndim, 0.0d0, cmat, ndim)
-#endif
-
-end subroutine
-
-subroutine check_cmat_element(cmat, row, col, amat, bmat, ndim)
-    real(8), intent(in) :: cmat(ndim, ndim)
-    integer, intent(in) :: row
-    integer, intent(in) :: col
-    real(8), intent(in) :: amat(ndim, ndim)
-    real(8), intent(in) :: bmat(ndim, ndim)
-    integer, intent(in) :: ndim
-
-    real(8) :: x
-    x = 0.0d0
-    do i = 1, ndim
-       x = x + amat(row, i) * bmat(i, col)
-    end do
-
-    write(6, '("expected cmat(", i0, ", ", i0, ")", e23.15e3)') row, col, x
-    write(6, '("computed cmat(", i0, ", ", i0, ")", e23.15e3)') row, col, cmat(row, col)
-    if (abs(cmat(row, col) - x) > 1.0e-8) then
-        stop 'a computed element has a wrong value'
-    end if
-end subroutine
-
-
-subroutine test_dgemm(ndim, num_loops)
-#if defined(USE_MAGMA_DGEMM_GPU)
-    use magma, only: magmaf_init, magmaf_finalize
-    use magma, only: magmablasf_dgemm  !, magmaf_dgemm_gpu
-#endif
-
-    implicit none
-    integer, intent(in) :: ndim
-    integer, intent(in) :: num_loops
-    integer, parameter :: dp = kind(1.0d0)
-    real :: ct_start, ct_stop  ! elapsed cpu time relative to an arbitrary fixed time. Expressed in seconds with the granularity of 1 microsecond
-    integer(8) :: num_ops
-    real :: gflops
-
-    integer :: sc_start, sc_stop   ! system clock time of start and stop events, expressed in ticks
-    integer :: sc_count_rate  ! number of system clock ticks per second
-    integer :: sc_count_max   ! the max possible number of system clock ticks returned by system_clock
-    integer :: s
-    REAL :: a_diff, diff
-    REAL :: num_sc_ticks_per_second  ! the number of system clock ticks per second
-
-    real*8, allocatable :: amat(:,:)
-    real*8, allocatable :: bmat(:,:)
-    real*8, allocatable :: cmat(:,:)
-    real(dp) :: x
-    integer :: i, j
-
-#if defined(USE_MAGMA_DGEMM_GPU)
-    write(6,*) 'DEBUG: init magma'
-    call magmaf_init()
-#endif
-
-    ! First initialize the system_clock
-    CALL system_clock(count_rate=sc_count_rate)
-    CALL system_clock(count_max=sc_count_max)
-    num_sc_ticks_per_second = REAL(sc_count_rate)
-    WRITE(*,*) "system_clock rate : ", num_sc_ticks_per_second, " ticks per second"
-
-    diff = 0.0
-    a_diff = 0.0
-    s = 0
-
-    allocate(amat(ndim, ndim))
-    allocate(bmat(ndim, ndim))
-    allocate(cmat(ndim, ndim))
-
-    call set_random_seed(42)
-
-    !call random_number(amat)
-    !amat = 0.5_dp*(amat + transpose(amat))
-    do j = 1, ndim
-        do i = 1, ndim
-           call random_number(x)
-           amat(i,j) = x
-           call random_number(x)
-           bmat(i,j) = x
-        end do
-    end do
-
-    call cpu_time(ct_start)
-    call system_clock(sc_start)
-
-    do j = 1, num_loops
-        ! playmat = amat
-
-        call sqmatmul(amat, bmat, cmat, ndim)
-
-    end do
-
-    call cpu_time(ct_stop)
-    call system_clock(sc_stop)
-    if ( (sc_stop - sc_start)/num_sc_ticks_per_second < (ct_stop - ct_start) ) s = s + 1
-    diff = (sc_stop - sc_start)/num_sc_ticks_per_second - (ct_stop - ct_start) + diff
-    a_diff = ABS((sc_stop - sc_start)/num_sc_ticks_per_second - (ct_stop - ct_start)) + a_diff
-
-    ! check one of the elements of cmat (the last one here: cmat(ndim, ndim))
-    call check_cmat_element(cmat,    1,    1, amat, bmat, ndim)
-    call check_cmat_element(cmat,    1, ndim, amat, bmat, ndim)
-    call check_cmat_element(cmat, ndim,    1, amat, bmat, ndim)
-    call check_cmat_element(cmat, ndim, ndim, amat, bmat, ndim)
-
-    ! write(6, *) 'amat = '
-    ! call print_matrix(amat, ndim)
-
-    ! write(6, *) 'bmat = '
-    ! call print_matrix(bmat, ndim)
-
-    ! write(6, *) 'cmat = '
-    ! call print_matrix(cmat, ndim)
-
-    num_ops = real(ndim) * real(ndim) * real(ndim) * 2 * num_loops
-    gflops = num_ops / (ct_stop-ct_start) / 1.0e9
-
-
-    write(6, '("Time taken by dgemm for matrix size ",i8," was ",f10.2," seconds")') ndim, ct_stop-ct_start
-    WRITE(*,*) "gflops (including potential memory transfers)       : ", gflops
-    
-    WRITE(*,*) "system_clock         : ",(sc_stop - sc_start)/num_sc_ticks_per_second
-    WRITE(*,*) "cpu_time             : ",(ct_stop - ct_start)
-    WRITE(*,*) "sys_clock < cpu_time : ",s
-    WRITE(*,*) "mean diff            : ",diff
-    WRITE(*,*) "abs mean diff        : ",a_diff
-
-#if defined(USE_MAGMA_DGEMM_GPU)
-    write(6,*) 'DEBUG: deinit magma'
-    call magmaf_finalize()
-#endif
-
-
-    deallocate(amat, bmat, cmat)
-    end
diff --git a/test/test_starbench.py b/test/test_starbench.py
deleted file mode 100644
index bca5e34..0000000
--- a/test/test_starbench.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import unittest
-import logging
-from pathlib import Path
-# from cocluto import ClusterController
-from starbench.main import starbench_cmake_app, ExistingDir
-
-
-class StarbenchTestCase(unittest.TestCase):
-
-    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-
-    def setUp(self) -> None:  # pylint: disable=useless-parent-delegation
-        return super().setUp()
-
-    def test_mamul1_benchmark(self):
-        logging.info('test_mamul1_benchmark')
-        source_code_provider = ExistingDir(Path('test/mamul1').absolute())
-        tmp_dir = Path('tmp').absolute()
-        benchmark_command = ['./mamul1', '3000', '10']
-        starbench_cmake_app(source_code_provider=source_code_provider, tmp_dir=tmp_dir, num_cores=2, benchmark_command=benchmark_command)
-        # self.assertIsInstance(job_state, JobsState)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/usecases/ipr/hibench/readme.md b/usecases/ipr/hibench/readme.md
deleted file mode 100644
index af7757a..0000000
--- a/usecases/ipr/hibench/readme.md
+++ /dev/null
@@ -1,27 +0,0 @@
-This example illustrates how `starbench` is used at IPR (Institut de Physique de Rennes) to measure the performance of [hibridon](https://github.com/hibridon/hibridon) on IPR's cluster (`physix`)
-
-
-
-
-usage:
-
-```sh
-20241007-15:08:10 graffy@graffy-ws2:~/work/starbench/starbench.git$ rsync --exclude .git --exclude starbench.venv --exclude tmp --exclude usecases/ipr/hibench/results  -va ./ graffy@alambix.ipr.univ-rennes.fr:/opt/ipr/cluster/work.global/graffy/starbench.git/
-sending incremental file list
-
-sent 1,416 bytes  received 25 bytes  960.67 bytes/sec
-total size is 140,225  speedup is 97.31
-last command status : [0]
-```
-
-```sh
-graffy@alambix-frontal:/opt/ipr/cluster/work.global/graffy/starbench.git/usecases/ipr/hibench$ ./hibenchonphysix.py --commit-id 53894da48505892bfa05693a52312bacb12c70c9 --results-dir $GLOBAL_WORK_DIR/graffy/hibridon/benchmarks/starbench/hibench/$(date --iso=seconds) --arch-regexp 'intel_xeon_x5650' --cmake-path /usr/bin/cmake
-```
-
-`hibenchonphysix.py` script launches two `sge` jobs for each machine type in `physix` cluster:
-- one job that performs a benchmark of hibridon with `gfortran` compiler
-- one job that performs a benchmark of hibridon with `ifort` compiler
-
-When the job successfully completes, it puts the results of the benchmark on `physix`'s global work directory (eg `/opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/53894da48505892bfa05693a52312bacb12c70c9/nh3h2_qma_long/intel_xeon_x5550/gfortran`)
-
-