From fe4a07a67ed0043da521c6ffec03586cc7d28598 Mon Sep 17 00:00:00 2001 From: Guillaume Raffy Date: Thu, 10 Oct 2024 18:06:09 +0200 Subject: [PATCH] refactored all iprbench code found in `usecases/ipr/hibench` into a `iprbench` python package The main motivation for this is to allow the code executed by jobs to benefit from multiple packages (eg iprbench, [stargemm](https://github.com/g-raffy/starbench), cocluto) to perform common missing tasks such as registering the results output in the iprbench database. work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3958] and [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3372] --- .gitignore | 9 +- README.md | 158 +++++--- {test => iprbench}/__init__.py | 0 iprbench/hibench/__init__.py | 0 .../hibench/hibenchonphysix.py | 64 ++-- .../ipr => iprbench}/hibench/showresults.py | 2 - iprbench/main.py | 1 + .../resources}/starbench-template.job | 35 +- pyproject.toml | 19 +- src/starbench/__init__.py | 2 - src/starbench/core.py | 284 --------------- src/starbench/main.py | 166 --------- test/mamul1/CMakeLists.txt | 43 --- test/mamul1/mamul1.F90 | 339 ------------------ test/test_starbench.py | 25 -- tests/__init__.py | 0 usecases/ipr/hibench/readme.md | 27 -- 17 files changed, 198 insertions(+), 976 deletions(-) rename {test => iprbench}/__init__.py (100%) create mode 100644 iprbench/hibench/__init__.py rename {usecases/ipr => iprbench}/hibench/hibenchonphysix.py (85%) rename {usecases/ipr => iprbench}/hibench/showresults.py (99%) create mode 100644 iprbench/main.py rename {usecases/ipr/hibench => iprbench/resources}/starbench-template.job (72%) delete mode 100644 src/starbench/__init__.py delete mode 100755 src/starbench/core.py delete mode 100755 src/starbench/main.py delete mode 100644 test/mamul1/CMakeLists.txt delete mode 100644 test/mamul1/mamul1.F90 delete mode 100644 test/test_starbench.py create mode 100644 tests/__init__.py delete mode 100644 usecases/ipr/hibench/readme.md diff --git a/.gitignore b/.gitignore index c6a7dff..65fbaf7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,3 @@ -**/__pycache__/ -dist/ -src/starbench.egg-info/ -tmp/ -starbench.venv/ -usecases/ipr/hibench/results/ +iprbench.venv/ +results/ +iprbench/__pycache__/__init__.cpython-38.pyc diff --git a/README.md b/README.md index ba03043..48aabe7 100644 --- a/README.md +++ b/README.md @@ -1,53 +1,129 @@ -# starbench -a tool to benchmark a git cmake application using embarassingly parallel runs +# iprbenchmark -`starbench` is a tool designed to build and test the performance of an application versioned in a `git` repository and using the `cmake` build system. +This example illustrates how `starbench` is used at IPR (Institut de Physique de Rennes) to measure the performance of [hibridon](https://github.com/hibridon/hibridon) on IPR's cluster (`alambix`) -In order to measure the performance of the code in *hpc* (high performance computing) environment, `starbench` is designed to make all the cores busy. For this, it uses the same technique as in `hpl`'s `stardgemm` test (that's where the 'star' prefix comes from): the same code is run on each `CPU` core. This way, we performances measures are expected to be more realistic, as the cores won't benefit from the unrealistic boost provided by the memory cache of unused cores. -If the user provides: -- the `url` of the repository -- the commit number of the version to test -- the number of cores the benchmark should use (usually the number of cores of the machine that executes the benchmark) -- the benchmark command to use - -then `starbench` will do the rest: -1. clone the repository to a temporary location -2. checkout the requested version -3. configure the build -4. build the code -5. run the benchmark command for each core -6. output the average duration of the benchmark - -## example +usage: ```sh -bob@bob-ws2:~/work/starbench$ python3 -m venv ./starbench.venv -bob@bob-ws2:~/work/starbench$ source ./starbench.venv/bin/activate -bob@bob-ws2:~/work/starbench$ pip install wheel -Collecting wheel - Using cached wheel-0.43.0-py3-none-any.whl (65 kB) -Installing collected packages: wheel -Successfully installed wheel-0.43.0 -bob@bob-ws2:~/work/starbench$ pip install ./starbench.git -Processing ./starbench.git +20241007-15:08:10 graffy@graffy-ws2:~/work/starbench/starbench.git$ rsync --exclude .git --exclude starbench.venv --exclude tmp --exclude usecases/ipr/hibench/results -va ./ graffy@alambix.ipr.univ-rennes.fr:/opt/ipr/cluster/work.global/graffy/starbench.git/ +sending incremental file list + +sent 1,416 bytes received 25 bytes 960.67 bytes/sec +total size is 140,225 speedup is 97.31 +last command status : [0] +``` +## install iprbench + +```sh +graffy@alambix-frontal:/opt/ipr/cluster/work.local/graffy/bug3372$ python3 -m venv iprbench.venv +graffy@alambix-frontal:/opt/ipr/cluster/work.local/graffy/bug3372$ source ./iprbench.venv/bin/activate +(iprbench.venv) graffy@alambix-frontal:/opt/ipr/cluster/work.local/graffy/bug3372$ pip install ./iprbench.git +Processing ./iprbench.git Installing build dependencies ... done - WARNING: Missing build requirements in pyproject.toml for file:///home/bob/work/starbench/starbench.git. - WARNING: The project does not specify a build backend, and pip cannot fall back to setuptools without 'wheel'. Getting requirements to build wheel ... done - Preparing wheel metadata ... done -Building wheels for collected packages: starbench - Building wheel for starbench (PEP 517) ... done - Created wheel for starbench: filename=starbench-1.0.0-py3-none-any.whl size=8011 sha256=a98c590fbc481722aed3512ae6345cce741615a17c24e67dc88070f85b616c4c - Stored in directory: /tmp/pip-ephem-wheel-cache-m_0xpm10/wheels/67/41/37/debf4c9251b719f84456398e144dffaa34d18ab336b529dc53 -Successfully built starbench -Installing collected packages: starbench -Successfully installed starbench-1.0.0 -bob@bob-ws2:~/work/starbench$ starbench --git-repos-url https://github.com/hibridon/hibridon --code-version a3bed1c3ccfbca572003020d3e3d3b1ff3934fad --git-user g-raffy --git-pass-file "$HOME/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" --num-cores 2 --output-dir=/tmp/hibench --cmake-path=/opt/cmake/cmake-3.23.0/bin/cmake --cmake-option=-DCMAKE_BUILD_TYPE=Release --cmake-option=-DBUILD_TESTING=ON --benchmark-command='ctest --output-on-failure -L ^arch4_quick$' + Preparing metadata (pyproject.toml) ... done +Collecting starbench@ git+https://github.com/g-raffy/starbench + Cloning https://github.com/g-raffy/starbench to /tmp/user/59825/pip-install-uw5i22k1/starbench_890d53070dec47738060b57fdd29b001 + Running command git clone --filter=blob:none --quiet https://github.com/g-raffy/starbench /tmp/user/59825/pip-install-uw5i22k1/starbench_890d53070dec47738060b57fdd29b001 + Resolved https://github.com/g-raffy/starbench to commit 3ca66d00636ad055506f6b4e2781b498cc7487ac + Installing build dependencies ... done + Getting requirements to build wheel ... done + Preparing metadata (pyproject.toml) ... done +Collecting pandas + Downloading pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 13.1/13.1 MB 23.6 MB/s eta 0:00:00 +Collecting matplotlib + Downloading matplotlib-3.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.3 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 8.3/8.3 MB 20.1 MB/s eta 0:00:00 +Collecting sqlalchemy + Downloading SQLAlchemy-2.0.35-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.2/3.2 MB 17.6 MB/s eta 0:00:00 +Collecting contourpy>=1.0.1 + Downloading contourpy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (323 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 323.2/323.2 kB 3.4 MB/s eta 0:00:00 +Collecting cycler>=0.10 + Downloading cycler-0.12.1-py3-none-any.whl (8.3 kB) +Collecting fonttools>=4.22.0 + Downloading fonttools-4.54.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.9 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.9/4.9 MB 20.9 MB/s eta 0:00:00 +Collecting kiwisolver>=1.3.1 + Downloading kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.4/1.4 MB 8.9 MB/s eta 0:00:00 +Collecting numpy>=1.23 + Downloading numpy-2.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.3 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 16.3/16.3 MB 19.9 MB/s eta 0:00:00 +Collecting packaging>=20.0 + Downloading packaging-24.1-py3-none-any.whl (53 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 54.0/54.0 kB 1.0 MB/s eta 0:00:00 +Collecting pillow>=8 + Downloading pillow-10.4.0-cp311-cp311-manylinux_2_28_x86_64.whl (4.5 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.5/4.5 MB 20.9 MB/s eta 0:00:00 +Collecting pyparsing>=2.3.1 + Downloading pyparsing-3.1.4-py3-none-any.whl (104 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 104.1/104.1 kB 1.3 MB/s eta 0:00:00 +Collecting python-dateutil>=2.7 + Downloading python_dateutil-2.9.0.post0-py2.py3-none-any.whl (229 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 229.9/229.9 kB 933.1 kB/s eta 0:00:00 +Collecting pytz>=2020.1 + Downloading pytz-2024.2-py2.py3-none-any.whl (508 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 508.0/508.0 kB 3.9 MB/s eta 0:00:00 +Collecting tzdata>=2022.7 + Downloading tzdata-2024.2-py2.py3-none-any.whl (346 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 346.6/346.6 kB 2.0 MB/s eta 0:00:00 +Collecting typing-extensions>=4.6.0 + Downloading typing_extensions-4.12.2-py3-none-any.whl (37 kB) +Collecting greenlet!=0.4.17 + Downloading greenlet-3.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (602 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 602.4/602.4 kB 4.9 MB/s eta 0:00:00 +Collecting six>=1.5 + Using cached six-1.16.0-py2.py3-none-any.whl (11 kB) +Building wheels for collected packages: iprbench, starbench + Building wheel for iprbench (pyproject.toml) ... done + Created wheel for iprbench: filename=iprbench-0.0.1-py3-none-any.whl size=19188 sha256=0ece4a9b1b44434c0f033a253aae301a5d53039955f1f6b182c0b833d44c3e93 + Stored in directory: /tmp/user/59825/pip-ephem-wheel-cache-8yw7rwk6/wheels/84/c9/82/e72d13fb7df12a8004ca6383b185a00f9ab3ddd8695e9e6cd8 + Building wheel for starbench (pyproject.toml) ... done + Created wheel for starbench: filename=starbench-1.0.0-py3-none-any.whl size=9612 sha256=18968f356bb3d6f6c2337b4bbaf709510c50a6a9902c3363f4b568c409846ac0 + Stored in directory: /tmp/user/59825/pip-ephem-wheel-cache-8yw7rwk6/wheels/cf/73/d3/14e4830d3e06c2c3ab71fdf68e0f14b50132ec23eaa6b2aa65 +Successfully built iprbench starbench +Installing collected packages: pytz, tzdata, typing-extensions, starbench, six, pyparsing, pillow, packaging, numpy, kiwisolver, greenlet, fonttools, cycler, sqlalchemy, python-dateutil, contourpy, pandas, matplotlib, iprbench +Successfully installed contourpy-1.3.0 cycler-0.12.1 fonttools-4.54.1 greenlet-3.1.1 iprbench-0.0.1 kiwisolver-1.4.7 matplotlib-3.9.2 numpy-2.1.2 packaging-24.1 pandas-2.2.3 pillow-10.4.0 pyparsing-3.1.4 python-dateutil-2.9.0.post0 pytz-2024.2 six-1.16.0 sqlalchemy-2.0.35 starbench-1.0.0 typing-extensions-4.12.2 tzdata-2024.2 ``` -## how to test +## launch benchmark jobs on alambix cluster ```sh -(starbench.venv) graffy@graffy-ws2:~/work/starbench/starbench.git$ python3 -m unittest test.test_starbench +(iprbench.venv) graffy@alambix-frontal:/opt/ipr/cluster/work.local/graffy/bug3372$ hibenchonphysix --commit-id 53894da48505892bfa05693a52312bacb12c70c9 --results-dir $GLOBAL_WORK_DIR/graffy/hibridon/benchmarks/starbench/hibench/$(date --iso=seconds) --arch-regexp 'intel_xeon_x5650' --cmake-path /usr/bin/cmake +INFO:root:available host groups: dict_keys(['intel_xeon_x5550', 'intel_xeon_x5650', 'intel_xeon_e5-2660', 'intel_xeon_e5-2660v2', 'intel_xeon_e5-2660v4', 'intel_xeon_gold_6140', 'intel_xeon_gold_6154', 'intel_xeon_gold_5220', 'intel_xeon_gold_6226r', 'intel_xeon_gold_6248r', 'intel_xeon_gold_6348', 'amd_epyc_7282', 'amd_epyc_7452']) +INFO:root:requested host groups: ['intel_xeon_x5650'] +INFO:root:using test arch4_quick for benchmarking +INFO:root:creating /opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibench/2024-10-10T12:11:44+02:00/iprbench.venv.tgz (the virtual environment that will be used in this bench by all its jobs at some point) +Collecting virtualenv-clone + Using cached virtualenv_clone-0.5.7-py3-none-any.whl (6.6 kB) +Installing collected packages: virtualenv-clone +Successfully installed virtualenv-clone-0.5.7 +DEBUG:root:command = /opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibench/2024-10-10T12:11:44+02:00/53894da48505892bfa05693a52312bacb12c70c9/arch4_quick/intel_xeon_x5650/gfortran/starbench.job "https://github.com/hibridon/hibridon" "g-raffy" "/mnt/home.ipr/graffy/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" "53894da48505892bfa05693a52312bacb12c70c9" "-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON -DCMAKE_Fortran_COMPILER=gfortran" "ctest --output-on-failure -L ^arch4_quick$" "" "/usr/bin/cmake" +DEBUG:root:qsub_command = qsub -pe smp 12 -l "hostname=alambix50.ipr.univ-rennes.fr" -S /bin/bash -cwd -m ae -l mem_available=1G -j y -N hibench_intel_xeon_x5650_gfortran_53894da48505892bfa05693a52312bacb12c70c9 /opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibench/2024-10-10T12:11:44+02:00/53894da48505892bfa05693a52312bacb12c70c9/arch4_quick/intel_xeon_x5650/gfortran/starbench.job "https://github.com/hibridon/hibridon" "g-raffy" "/mnt/home.ipr/graffy/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" "53894da48505892bfa05693a52312bacb12c70c9" "-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON -DCMAKE_Fortran_COMPILER=gfortran" "ctest --output-on-failure -L ^arch4_quick$" "" "/usr/bin/cmake" +Your job 17357 ("hibench_intel_xeon_x5650_gfortran_53894da48505892bfa05693a52312bacb12c70c9") has been submitted +INFO:root:using test arch4_quick for benchmarking +INFO:root:skipping the creation of /opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibench/2024-10-10T12:11:44+02:00/iprbench.venv.tgz because it already exists (probably created for other jobs of the same bench) +DEBUG:root:command = /opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibench/2024-10-10T12:11:44+02:00/53894da48505892bfa05693a52312bacb12c70c9/arch4_quick/intel_xeon_x5650/ifort/starbench.job "https://github.com/hibridon/hibridon" "g-raffy" "/mnt/home.ipr/graffy/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" "53894da48505892bfa05693a52312bacb12c70c9" "-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON -DCMAKE_Fortran_COMPILER=ifort -DBLA_VENDOR=Intel10_64lp" "ctest --output-on-failure -L ^arch4_quick$" "module load compilers/ifort/latest" "/usr/bin/cmake" +DEBUG:root:qsub_command = qsub -pe smp 12 -l "hostname=alambix50.ipr.univ-rennes.fr" -S /bin/bash -cwd -m ae -l mem_available=1G -j y -N hibench_intel_xeon_x5650_ifort_53894da48505892bfa05693a52312bacb12c70c9 /opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibench/2024-10-10T12:11:44+02:00/53894da48505892bfa05693a52312bacb12c70c9/arch4_quick/intel_xeon_x5650/ifort/starbench.job "https://github.com/hibridon/hibridon" "g-raffy" "/mnt/home.ipr/graffy/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" "53894da48505892bfa05693a52312bacb12c70c9" "-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON -DCMAKE_Fortran_COMPILER=ifort -DBLA_VENDOR=Intel10_64lp" "ctest --output-on-failure -L ^arch4_quick$" "module load compilers/ifort/latest" "/usr/bin/cmake" +Your job 17358 ("hibench_intel_xeon_x5650_ifort_53894da48505892bfa05693a52312bacb12c70c9") has been submitted ``` + +`hibenchonphysix` script launches two `sge` jobs for each machine type in `alambix` cluster: +- one job that performs a benchmark of hibridon with `gfortran` compiler +- one job that performs a benchmark of hibridon with `ifort` compiler + +When the job successfully completes, it puts the results of the benchmark on `alambix`'s global work directory (eg `/opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibench/2024-10-10T12:11:44+02:00/53894da48505892bfa05693a52312bacb12c70c9/arch4_quick/intel_xeon_x5650/gfortran`) + + +## graph the results of benchmarks + +`showresults` is a command line tool that graphs the results after they've been downloaded from the results directory (for example `/opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibench/2024-10-10T12:11:44+02:00`) to the hardcoded (at the moment) path `/home/graffy/work/starbench/starbench.git/usecases/ipr/hibench/results` + +```sh +20241010-16:30:54 graffy@graffy-ws2:~/work/starbench/iprbench.git$ showresults +``` + diff --git a/test/__init__.py b/iprbench/__init__.py similarity index 100% rename from test/__init__.py rename to iprbench/__init__.py diff --git a/iprbench/hibench/__init__.py b/iprbench/hibench/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/usecases/ipr/hibench/hibenchonphysix.py b/iprbench/hibench/hibenchonphysix.py similarity index 85% rename from usecases/ipr/hibench/hibenchonphysix.py rename to iprbench/hibench/hibenchonphysix.py index 3ff8850..eaea373 100755 --- a/usecases/ipr/hibench/hibenchonphysix.py +++ b/iprbench/hibench/hibenchonphysix.py @@ -2,13 +2,14 @@ # this script launches jobs to run hibridon benchmarks on physix cluster for the given version of hibridon (commit number) from typing import List, Tuple, Dict from argparse import ArgumentParser -import os from os import getenv, makedirs import shutil from pathlib import Path import subprocess import re import logging +import importlib.resources +import venv HostFqdn = str # eg 'physix90.ipr.univ-rennes1.fr' @@ -168,6 +169,25 @@ class ClusterNodeDb: return (hosts, num_cores) +def duplicate_this_virtualenv_to(duplicate_virtualenv_path: Path): + this_virtualenv_path = Path(getenv('VIRTUAL_ENV')) # eg /home/graffy/work/starbench/iprbench.git/iprbench.venv + assert this_virtualenv_path.exists(), f'failed to find the root the virtual environment in use (VIRTUAL_ENV environment variable has the value {this_virtualenv_path})' + + if duplicate_virtualenv_path.exists(): + shutil.rmtree(duplicate_virtualenv_path) + + cloner_virtualenv_path = Path('/tmp/venv_cloner.venv') + venv.create(cloner_virtualenv_path, with_pip=True) + subprocess.run(f'source {cloner_virtualenv_path}/bin/activate; pip install virtualenv-clone', shell=True, check=True, executable='/bin/bash') + subprocess.run(f'source {cloner_virtualenv_path}/bin/activate; virtualenv-clone {this_virtualenv_path} {duplicate_virtualenv_path}', shell=True, check=True, executable='/bin/bash') + shutil.rmtree(cloner_virtualenv_path) + + +def archive_this_virtualenv_to(venv_archive_path: Path, venv_hardcoded_path: Path): + duplicate_this_virtualenv_to(venv_hardcoded_path) + subprocess.run(f'tar czvf {venv_archive_path} {venv_hardcoded_path.relative_to(venv_hardcoded_path.parent)}', shell=True, check=True, cwd=venv_hardcoded_path.parent, stdout=subprocess.DEVNULL) + + def launch_job_for_host_group(hibridon_version: GitCommitTag, host_group_id: HostGroupId, results_dir: Path, compiler_id: CompilerId, cmake_path: str): cluster_db = ClusterNodeDb() @@ -178,8 +198,8 @@ def launch_job_for_host_group(hibridon_version: GitCommitTag, host_group_id: Hos return quick_test = 'arch4_quick' # about 2s on a core i5 8th generation - representative_test = 'nh3h2_qma_long' # about 10min on a core i5 8th generation - use_test_mode = False + representative_test = 'nh3h2_qma_long' # about 10min on a core i5 8th generation + use_test_mode = True if use_test_mode: benchmark_test = quick_test else: @@ -220,24 +240,30 @@ def launch_job_for_host_group(hibridon_version: GitCommitTag, host_group_id: Hos makedirs(this_bench_dir, exist_ok=True) starbench_job_path = this_bench_dir / 'starbench.job' - this_file_path = Path(os.path.realpath(__file__)) - scripts_dir = this_file_path.parent - starbench_root_path = scripts_dir.parent.parent.parent # TODO: beurk - # create a copy of stargemm for use by the jobs (so that starbench_root_path can be modified without affecting the jobs) - jobs_starbench_dir = results_dir / 'starbench' # the location of starbench source code for use by the jobs run by this command - shutil.copytree(starbench_root_path, jobs_starbench_dir, dirs_exist_ok=True) + job_venv_archive_path = results_dir / 'iprbench.venv.tgz' + iprbench_venv_hardcoded_path = Path('/tmp') / 'iprbench.venv' + if job_venv_archive_path.exists(): + logging.info('skipping the creation of %s because it already exists (probably created for other jobs of the same bench)', job_venv_archive_path) + else: + # freeze this virtualenv so that all jobs related to this benchmark will use the same version of iprbench + logging.info('creating %s (the virtual environment that will be used in this bench by all its jobs at some point)', job_venv_archive_path) + archive_this_virtualenv_to(job_venv_archive_path, iprbench_venv_hardcoded_path) # create the job file (which embeds starbench.py) tags_dict = { # '': scripts_dir / 'starbench.py', - '': str(starbench_job_path) + '': str(starbench_job_path), + '': str(iprbench_venv_hardcoded_path), + '': str(job_venv_archive_path) } - substitute_tags(input_file_path=scripts_dir / 'starbench-template.job', tags_dict=tags_dict, output_file_path=starbench_job_path) + with importlib.resources.path('iprbench.resources', 'starbench-template.job') as job_template_path: + # job_template_path = importlib.resources..files('iprbench.resources') / 'hibench' / 'starbench-template.job' + substitute_tags(input_file_path=job_template_path, tags_dict=tags_dict, output_file_path=starbench_job_path) subprocess.run(['chmod', 'a+x', starbench_job_path], check=True) - command = f'{starbench_job_path} "{git_repos_url}" "{git_user}" "{git_pass_file}" "{hibridon_version}" "{" ".join(cmake_options)}" "{benchmark_command}" "{env_vars_bash_commands}" "{starbench_root_path}" "{cmake_path}"' - print(f'command = {command}') + command = f'{starbench_job_path} "{git_repos_url}" "{git_user}" "{git_pass_file}" "{hibridon_version}" "{" ".join(cmake_options)}" "{benchmark_command}" "{env_vars_bash_commands}" "{cmake_path}"' + logging.debug('command = %s', command) qsub_command = 'qsub' qsub_command += f' -pe smp {num_cores}' @@ -249,7 +275,7 @@ def launch_job_for_host_group(hibridon_version: GitCommitTag, host_group_id: Hos qsub_command += ' -j y' # merge stderr file into stdout file for easier reading of history of events qsub_command += f' -N hibench_{host_group_id}_{compiler_id}_{hibridon_version}' qsub_command += f' {command}' - print(f'qsub_command = {qsub_command}') + logging.debug('qsub_command = %s', qsub_command) subprocess.run(qsub_command, cwd=this_bench_dir, check=True, shell=True) @@ -267,10 +293,10 @@ def launch_perf_jobs(hibridon_version: GitCommitTag, results_dir: Path, arch_reg cluster_db = ClusterNodeDb() all_host_groups = cluster_db.cpu_defs.keys() - - print(f'available host groups: {all_host_groups}') + + logging.info('available host groups: %s', all_host_groups) host_groups = [host_group for host_group in all_host_groups if re.match(arch_regexp, host_group) is not None] - print(f'requested host groups: {host_groups}') + logging.info('requested host groups: %s', host_groups) for compiler in compilers: for host_group in host_groups: @@ -290,6 +316,7 @@ def path_is_reachable_by_compute_nodes(path: Path): def main(): + logging.basicConfig(level=logging.DEBUG) arg_parser = ArgumentParser(description='launches hibridon benchmark jobs on IPR\'s physix cluster', epilog='example:\n --commit-id a3bed1c3ccfbca572003020d3e3d3b1ff3934fad') arg_parser.add_argument('--commit-id', type=str, required=True, help='the commit id of the version of code to benchmark') arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)') @@ -311,6 +338,3 @@ def main(): raise ValueError('the results path is expected to be on a disk that is accessible to all cluster nodes, and it doesn\'t seem to be the case for {results_dir}') launch_perf_jobs(hibridon_version, results_dir, arch_regexp, cmake_path) - - -main() diff --git a/usecases/ipr/hibench/showresults.py b/iprbench/hibench/showresults.py similarity index 99% rename from usecases/ipr/hibench/showresults.py rename to iprbench/hibench/showresults.py index 4af1d9a..5880662 100755 --- a/usecases/ipr/hibench/showresults.py +++ b/iprbench/hibench/showresults.py @@ -397,5 +397,3 @@ def main(): create_graphs(engine) - -main() diff --git a/iprbench/main.py b/iprbench/main.py new file mode 100644 index 0000000..99c4176 --- /dev/null +++ b/iprbench/main.py @@ -0,0 +1 @@ +__version__ = '0.0.1' \ No newline at end of file diff --git a/usecases/ipr/hibench/starbench-template.job b/iprbench/resources/starbench-template.job similarity index 72% rename from usecases/ipr/hibench/starbench-template.job rename to iprbench/resources/starbench-template.job index bfb949d..739cb15 100644 --- a/usecases/ipr/hibench/starbench-template.job +++ b/iprbench/resources/starbench-template.job @@ -7,8 +7,7 @@ code_version="$4" # git branch id or commit id eg : 'a3bed1c3ccfbca572003020d3e cmake_options="$5" # eg '-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON' benchmark_command="$6" # eg 'ctest -L ^arch4_quick$' env_vars_bash_commands="$7" # defines extra environment variables prior to launch starbench. eg "export MKLROOT=/opt/intel/compilers_and_libraries_2020.1.217/linux/mkl" -starbench_src_url="$8" # location of starbench source (eg /opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibench/2024-10-08T16:39:52+02:00/starbench) -cmake_path="$9" # eg '/opt/cmake/cmake-3.23.0/bin/cmake' +cmake_path="$8" # eg '/opt/cmake/cmake-3.23.0/bin/cmake' executed_by_sge='' if [ "${JOB_ID}" = '' ] @@ -33,27 +32,29 @@ then fi mkdir -p "${temp_dir}" -# create a virtual environment to install starbench -venv_path="${temp_dir}/starbench.venv" -python3 -m virtualenv "$venv_path" -if [ $? != 0 ] +iprbench_venv_path='' +iprbench_venv_parent=$(dirname "$iprbench_venv_path") +iprbench_venv_archive_path='' +echo "unarchiving virtual environment ${iprbench_venv_archive_path} to ${iprbench_venv_parent}" +pushd "${iprbench_venv_parent}" + tar xzvf "${iprbench_venv_archive_path}" +popd +if [ ! -d "${iprbench_venv_path}" ] then - echo "failed to create the virtual environment $venv_path" - exit 1 -fi -source "$venv_path/bin/activate" -if [ $? != 0 ] -then - echo "failed to activate the virtual environment $venv_path" + echo "failed to find expected directory ${iprbench_venv_path}" exit 1 fi -pip install $starbench_src_url +echo "using the iprbench virtual environment that has been created for this bench: ${iprbench_venv_path}" +source "$iprbench_venv_path/bin/activate" if [ $? != 0 ] then - echo "failed to install starbench ($starbench_src_url) in the virtual environment $venv_path" + echo "failed to activate the virtual environment $iprbench_venv_path" exit 1 fi +echo "VIRTUAL_ENV = $VIRTUAL_ENV" +# show the list of packages installed in the virtual environment +pip list output_dir="${temp_dir}" num_cores=${NSLOTS} @@ -94,14 +95,14 @@ eval ${command} if [ "$?" = '0' ] then echo "the command ${command} succeeded" - rsync -va --exclude 'build' --exclude 'source.git' "${output_dir}/" ${launch_dir}/ # exclude the source.git and build directories (one for each worker) because they are big and not that precious + rsync -va --exclude 'build' --exclude 'source.git' --exclude "${iprbench_venv_path}" "${output_dir}/" ${launch_dir}/ # exclude the source.git and build directories (one for each worker) because they are big and not that precious # TMPDIR will be deleted by sge at the end of the job else if [ ${executed_by_sge} = 'true' ] then # TMPDIR will be deleted by sge at the end of the job. Backup data for investigation backup_dir="/opt/ipr/cluster/work.local/$(whoami)/${JOB_ID}" - echo "moving ${output_dir} to ${backup_dir} to that it doesn't get deleted by sge at the end of the job. This way, data gets a chance to be investigated then manually deleted." + echo "moving ${output_dir} to ${backup_dir} so that it doesn't get deleted by sge at the end of the job. This way, data gets a chance to be investigated then manually deleted." mv "${output_dir}" "${backup_dir}" fi echo "the command ${command} failed... the output data dir (${output_dir}) is expected to be cleaned up manually after investigation" diff --git a/pyproject.toml b/pyproject.toml index 2bff8c2..cb97dff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,15 +3,19 @@ requires = ["setuptools"] build-backup = "setuptools.build_meta" [project] -name = "starbench" +name = "iprbench" dynamic = ["version"] # the list of fields whose values are dicovered by the backend (eg __version__) -description = "a tool to benchmark a git cmake application using embarassingly parallel runs" +description = "benchmarks for IPR (Institut de Physique de Rennes) cluster" readme = "README.md" keywords = ["benchmark", "hpc", "parallel", 'openmp'] license = {text = "MIT License"} dependencies = [ + "pandas", + "matplotlib", + "sqlalchemy", # "cocluto >= 1.2" # "cocluto@git+https://git.ipr.univ-rennes.fr/cellinfo/cocluto" + "starbench@git+https://github.com/g-raffy/starbench" ] requires-python = ">= 3.8" authors = [ @@ -19,10 +23,17 @@ authors = [ ] [project.scripts] -starbench = "starbench.main:main" +hibenchonphysix = "iprbench.hibench.hibenchonphysix:main" +showresults = "iprbench.hibench.showresults:main" [project.urls] Repository = "https://github.com/g-raffy/starbench" +[tool.setuptools] +packages = ["iprbench", "iprbench.hibench"] + [tool.setuptools.dynamic] -version = {attr = "starbench.main.__version__"} +version = {attr = "iprbench.main.__version__"} + +[tool.setuptools.package-data] +iprbench = ["resources/**/*"] diff --git a/src/starbench/__init__.py b/src/starbench/__init__.py deleted file mode 100644 index 5b7a097..0000000 --- a/src/starbench/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# from .main import starbench_cmake_app -# __all__ = [starbench_cmake_app] diff --git a/src/starbench/core.py b/src/starbench/core.py deleted file mode 100755 index f8cca14..0000000 --- a/src/starbench/core.py +++ /dev/null @@ -1,284 +0,0 @@ -#!/usr/bin/env python3 -'''starbench is an application that is able to measure the execution time of a user software suite in various conditions (different build modes and different execution modes) - -''' -__version__ = '1.0.0' -import threading -import subprocess -import os -import sys -from typing import List, Dict, Optional, Tuple, Callable -from datetime import datetime -from pathlib import Path -from abc import ABC, abstractmethod -# from typing import ForwardRef -try: - from typing import ForwardRef # type: ignore pylint: disable=ungrouped-imports -except ImportError: - # python 3.6 - from typing import _ForwardRef as ForwardRef - -assert sys.version_info >= (3, 5, 0), 'this code requires at least python 3.5' # type hints in arguments - - -class StarBenchException(Exception): - '''base exception for user errors detected by starbench''' - - -RunId = int # identifier of a run -WorkerId = int # identifier of a worker (a run is performed on a worker) -DurationInSeconds = float -ProcessId = int -ReturnCode = int -Url = str -GitCommitId = str - - -class Run(): - """represents a run of a run of the benchmarked command within its CommandPerfEstimator - """ - id: RunId # uniquely identifies a run within its CommandPerfEstimator instance - worker_id: WorkerId # the worker used for this run (number of workers = number of parallel runs) - pid: Optional[ProcessId] # the process identifier of the process used by the command - start_time: datetime # the time at which the command process has started - return_code: ReturnCode # the exit code of the command process - end_time: Optional[datetime] # the time at which the command process has ended. None if the process is still running - - def __init__(self, run_id: RunId, worker_id: WorkerId): - self.id = run_id - self.worker_id = worker_id - self.pid = None - self.return_code = 0 - self.start_time = datetime.now() - self.end_time = None - - def has_finished(self) -> bool: - """indicates if this run has finished""" - return self.end_time is not None - - def get_duration(self) -> DurationInSeconds: - """returns the duration of this run, provided it has finished - """ - assert self.has_finished() - return (self.end_time - self.start_time).total_seconds() - - -CommandPerfEstimator = ForwardRef('CommandPerfEstimator') - - -class IStarBencherStopCondition(ABC): - """abstract handler that decides if the given CommandPerfEstimator has enough runs to estimate the performance or should trigger new runs - - """ - @abstractmethod - def should_stop(self, star_bencher: CommandPerfEstimator) -> bool: - """decides if the given CommandPerfEstimator instance should trigger new runs - - This method is called at the end of each run, to decide if another run should be triggered or not. - """ - - -class StopAfterSingleRun(IStarBencherStopCondition): - """a stop condition that causes the given CommandPerfEstimator to never start new runs - - as a result, this causes the given CommandPerfEstimator to just use one single run of the command to estimate its performance. - """ - def __init__(self): - pass - - def should_stop(self, star_bencher: CommandPerfEstimator): - # never start a new run - return True - - -class StopWhenConverged(IStarBencherStopCondition): - """a stop condition that triggers when the just completed run doesn't have much effect on the average run's duration - """ - def __init__(self, max_error: float = 0.01): - self.max_error = max_error - self._last_mean_duration = None - - def should_stop(self, star_bencher: CommandPerfEstimator) -> bool: - do_stop = False - mean_duration, _num_runs = star_bencher.get_run_mean_duration() - print(f'mean_duration = {mean_duration}') - if self._last_mean_duration is not None: - diff = abs(mean_duration - self._last_mean_duration) - print(f'diff = {diff}') - if diff < self.max_error: - do_stop = True - self._last_mean_duration = mean_duration - return do_stop - - -class CommandPerfEstimator(): # (false positive) pylint: disable=function-redefined - '''a command runner that runs a given command multiple times and measures the average execution duration - - the 'star' term comes from hpl's stadgemm benchmark, where we launch `n` independent programs on `n` cores - ''' - run_command: List[str] # the command that this instance of CommandPerfEstimator is expected to run (eg: ['ctest', '--output-on-failure', '-L', '^arch4_quick$']). The command supports the following tags: - run_command_cwd: Path # the current directory to use when executing run_command - stdout_filepath: Path # the path of the file that records the standard output of run_command - stderr_filepath: Path # the path of the file that records the standard error of run_command - num_cores_per_run: int # the max number of threads used by each run - num_parallel_runs: int # how many times run_command is run simultaneously - max_num_cores: int # the maximum allowed number of cores for this CommandPerfEstimator - stop_condition: IStarBencherStopCondition # the condition that is used so that this CommandPerfEstimator can decide to stop launching commands - stop_on_error: bool - _next_run_id: int - _runs: Dict[int, Run] - _last_mean_duration: Optional[DurationInSeconds] - _num_runs: int - _runs_lock: threading.Lock - _finished_event: threading.Event - - def __init__(self, run_command: List[str], num_cores_per_run: int, num_parallel_runs: int, max_num_cores: int, stop_condition: IStarBencherStopCondition, stop_on_error=True, run_command_cwd: Path = None, stdout_filepath: Path = None, stderr_filepath: Path = None): - assert num_cores_per_run * num_parallel_runs <= max_num_cores - self.run_command = run_command - self.run_command_cwd = run_command_cwd - self.stdout_filepath = stdout_filepath - self.stderr_filepath = stderr_filepath - self.num_cores_per_run = num_cores_per_run - self.num_parallel_runs = num_parallel_runs - self.max_num_cores = max_num_cores - self.stop_condition = stop_condition - self.stop_on_error = stop_on_error - self._next_run_id = 0 - self._runs = {} - self._last_mean_duration = None - self._num_runs = 0 - self._runs_lock = threading.Lock() - self._finished_event = threading.Event() - - def popen_and_call(self, popen_args: List[str], on_exit: Callable[[ProcessId, ReturnCode, RunId], None], run_id: RunId, cwd: Path, stdout_filepath: Path = None, stderr_filepath: Path = None): - """ - Runs the given args in a subprocess.Popen, and then calls the function - on_exit when the subprocess completes. - on_exit is a callable object, and popen_args is a list/tuple of args that - would give to subprocess.Popen. - """ - def run_in_thread(popen_args: List[str], on_exit: Callable[[ProcessId, ReturnCode, RunId], None]): - stdout = None - stderr = None - returncode = -1 - pid = -1 - streams_are_ok = True - try: - # with open(stdout_filepath, 'w', encoding='utf8') as stdout, open(stderr_filepath, 'w', encoding='utf8') as stderr: - if stdout_filepath is not None: - stdout = open(stdout_filepath, 'w', encoding='utf8') - if stderr_filepath is not None: - stderr = open(stderr_filepath, 'w', encoding='utf8') - except: - print(f'failed to open {stdout_filepath} or {stderr_filepath} in write mode') - streams_are_ok = False - if streams_are_ok: - try: - env = os.environ.copy() - # restrict the number of threads used by openmp - env['OMP_NUM_THREADS'] = f'{self.num_cores_per_run}' - # restrict the nu,ber of threads used by intel math kernel library - env['MKL_NUM_THREADS'] = f'{self.num_cores_per_run}' - proc = subprocess.Popen(popen_args, cwd=cwd, stdout=stdout, stderr=stderr, env=env) - pid = proc.pid - proc.wait() - returncode = proc.returncode - except: - print(f'command failed: {popen_args}') - on_exit(pid, returncode, run_id) - return - thread = threading.Thread(target=run_in_thread, args=(popen_args, on_exit)) - thread.start() - # returns immediately after the thread starts - return thread - - def get_run_mean_duration(self) -> Tuple[DurationInSeconds, int]: - """returns the average duration of all completed runs of this CommandPerfEstimator instance - """ - duration_sums = 0.0 # in python3.6+, replace with duration_sums: float = 0.0 - num_finished_runs = 0 # in python3.6+, replace with num_finished_runs: int = 0 - with self._runs_lock: - for run in self._runs.values(): - if run.has_finished(): - num_finished_runs += 1 - duration_sums += run.get_duration() - assert num_finished_runs > 0 - return duration_sums / num_finished_runs, num_finished_runs - - def _all_runs_have_finished(self): - with self._runs_lock: - for run in self._runs.values(): - if not run.has_finished(): - return False - return True - - def on_exit(self, pid: ProcessId, return_code: ReturnCode, run_id: RunId): - """method called when the command executed by a run ends. Unless the stop condition is met, a new run is started. - - pid: the process identifier of the process of the run that just finished - return_code: the return code of the process of the run that just finished - run_id: the run that just completed - """ - end_time = datetime.now() - # print(self, pid, run_id) - run = self._runs[run_id] - run.pid = pid - run.end_time = end_time - run.return_code = return_code - - do_stop = False - if self.stop_on_error and run.return_code != 0: - do_stop = True - else: - do_stop = self.stop_condition.should_stop(self) - if not do_stop: - # print('adding a run') - self._start_run(run.worker_id) # reuse the same worker as the run that has just finished - if self._all_runs_have_finished(): - # tell the main thread that all the runs have finished - self._finished_event.set() - - @staticmethod - def _interpret_tags(tagged_string: str, tags_value: Dict[str, str]) -> str: - untagged_string = tagged_string - for tag_id, tag_value in tags_value.items(): - assert isinstance(untagged_string, str) - untagged_string = untagged_string.replace(tag_id, tag_value) - return untagged_string - - def _start_run(self, worker_id: WorkerId): - """starts a run using the given worker""" - tags_value = { - '': f'{worker_id:03d}' - } - run_command = [CommandPerfEstimator._interpret_tags(s, tags_value) for s in self.run_command] - run_command_cwd = CommandPerfEstimator._interpret_tags(str(self.run_command_cwd), tags_value) - stdout_filepath = None - if self.stdout_filepath is not None: - stdout_filepath = CommandPerfEstimator._interpret_tags(str(self.stdout_filepath), tags_value) - Path(stdout_filepath).parent.mkdir(exist_ok=True) - stderr_filepath = None - if self.stderr_filepath is not None: - stderr_filepath = CommandPerfEstimator._interpret_tags(str(self.stderr_filepath), tags_value) - Path(stderr_filepath).parent.mkdir(exist_ok=True) - - with self._runs_lock: - run = Run(self._next_run_id, worker_id) - self._next_run_id += 1 - self._runs[run.id] = run - _run_thread = self.popen_and_call(popen_args=run_command, on_exit=self.on_exit, run_id=run.id, cwd=run_command_cwd, stdout_filepath=stdout_filepath, stderr_filepath=stderr_filepath) # noqa:F841 - - def run(self) -> DurationInSeconds: - '''performs the runs of the command and returns the runs' average duration''' - print(f"executing the following command in parallel ({self.num_parallel_runs} parallel runs) : '{str(self.run_command)}'") - for worker_id in range(self.num_parallel_runs): - self._start_run(worker_id) - # wait until all runs have finished - self._finished_event.wait() - with self._runs_lock: - workers_success = [run.return_code == 0 for run in self._runs.values()] - if not all(workers_success): - raise StarBenchException(f'at least one run failed (workers_success = {workers_success})') - mean_duration, num_runs = self.get_run_mean_duration() - print(f'mean duration : {mean_duration:.3f} s ({num_runs} runs)') - return mean_duration diff --git a/src/starbench/main.py b/src/starbench/main.py deleted file mode 100755 index 7535ca1..0000000 --- a/src/starbench/main.py +++ /dev/null @@ -1,166 +0,0 @@ -#!/usr/bin/env python3 -'''starbench is an application that is able to measure the execution time of a user software suite in various conditions (different build modes and different execution modes) - -''' -__version__ = '1.0.0' -import argparse -from abc import ABC, abstractmethod -import subprocess -from typing import List, Optional -from pathlib import Path -from .core import GitCommitId, Url, CommandPerfEstimator, StopAfterSingleRun - - -class IFileTreeProvider(ABC): - - @abstractmethod - def get_source_tree_path(self) -> Path: - pass - - -class ExistingDir(IFileTreeProvider): - dir_path: Path - - def __init__(self, dir_path: Path): - self.dir_path = dir_path - - def get_source_tree_path(self) -> Path: - return self.dir_path - - -class GitRepos(IFileTreeProvider): - git_repos_url: Url - git_user: Optional[str] - git_password: Optional[str] - code_version: Optional[GitCommitId] - src_dir: Optional[Path] # the temporary directory used to populate the source code - - def __init__(self, git_repos_url: Url, git_user: Optional[str] = None, git_password: Optional[str] = None, code_version: Optional[GitCommitId] = None, src_dir: Optional[Path] = None): - self.git_repos_url = git_repos_url - self.git_user = git_user - self.git_password = git_password - self.code_version = code_version - self.src_dir = src_dir - - def get_source_tree_path(self) -> Path: - self.src_dir.mkdir(exist_ok=True) - git_credentials = [] - if self.git_user: - git_credentials.append(self.git_user) - if self.git_password: - git_credentials.append(self.git_password) - git_repos_url = self.git_repos_url - if len(git_credentials) != 0: - git_repos_url = git_repos_url.replace('https://', f"https://{':'.join(git_credentials)}@") - # src_dir.mkdir(exist_ok=True) - subprocess.run(['git', 'clone', f'{str(self.git_repos_url)}', str(self.src_dir)], cwd=str(self.src_dir), check=True) - if self.code_version: - subprocess.run(['git', 'checkout', f'{self.code_version}'], cwd=str(self.src_dir), check=True) - return self.src_dir - - -def starbench_cmake_app(source_code_provider: IFileTreeProvider, tmp_dir: Path, num_cores: int, benchmark_command: List[str], cmake_options: Optional[List[str]] = None, cmake_exe_location: Path = None): - """ - tests_to_run : regular expression as understood by ctest's -L option. eg '^arch4_quick$' - """ - src_dir = source_code_provider.get_source_tree_path() - # we need one build for each parallel run, otherwise running ctest on parallel would overwrite the same file, which causes the test to randomly fail depnding on race conditions - worker_dir = tmp_dir / 'worker' - build_dir = worker_dir / 'build' - if cmake_options is None: - cmake_options = [] - print(f'creating build directory {worker_dir}') - create_build_dir = CommandPerfEstimator( - run_command=['mkdir', '-p', str(build_dir)], - num_cores_per_run=1, - num_parallel_runs=num_cores, - max_num_cores=num_cores, - stop_condition=StopAfterSingleRun(), - run_command_cwd=Path('/tmp'), - stdout_filepath=worker_dir / 'createdir_stdout.txt', - stderr_filepath=worker_dir / 'createdir_stderr.txt') - _create_build_dir_duration = create_build_dir.run() # noqa: F841 - # build_dir.mkdir(exist_ok=True) - - print(f'configuring {src_dir} into {build_dir} ...') - cmake_prog = 'cmake' - if cmake_exe_location: - cmake_prog = str(cmake_exe_location) - configure = CommandPerfEstimator( - run_command=[cmake_prog] + cmake_options + [str(src_dir)], - num_cores_per_run=1, - num_parallel_runs=num_cores, - max_num_cores=num_cores, - stop_condition=StopAfterSingleRun(), - run_command_cwd=build_dir, - stdout_filepath=worker_dir / 'configure_stdout.txt', - stderr_filepath=worker_dir / 'configure_stderr.txt') - _configure_duration = configure.run() # noqa: F841 - - print(f'building {build_dir} ...') - build = CommandPerfEstimator( - run_command=['make'], - num_cores_per_run=1, - num_parallel_runs=num_cores, - max_num_cores=num_cores, - stop_condition=StopAfterSingleRun(), - run_command_cwd=build_dir, - stdout_filepath=worker_dir / 'build_stdout.txt', - stderr_filepath=worker_dir / 'build_stderr.txt') - _build_duration = build.run() # noqa: F841 - - print(f'benchmarking {build_dir} ...') - stop_condition = StopAfterSingleRun() - bench = CommandPerfEstimator( - run_command=benchmark_command, - num_cores_per_run=1, - num_parallel_runs=num_cores, - max_num_cores=num_cores, - stop_condition=stop_condition, - run_command_cwd=build_dir, - stdout_filepath=worker_dir / 'bench_stdout.txt', - stderr_filepath=worker_dir / 'bench_stderr.txt') - mean_duration = bench.run() - print(f'duration : {mean_duration:.3f} s' % ()) - - -def main(): - '''main program''' - - example_text = '''example: - - %(prog)s --git-repos-url https://github.com/hibridon/hibridon --code-version a3bed1c3ccfbca572003020d3e3d3b1ff3934fad --git-user g-raffy --git-pass-file "$HOME/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" --num-cores 2 --output-dir=/tmp/hibench --cmake-path=/opt/cmake/cmake-3.23.0/bin/cmake --cmake-option=-DCMAKE_BUILD_TYPE=Release --cmake-option=-DBUILD_TESTING=ON --benchmark-command='ctest --output-on-failure -L ^arch4_quick$' - - ''' - - parser = argparse.ArgumentParser(description='performs a benchmark on a cmake buildable app hosted on a git repository', epilog=example_text, formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('--git-repos-url', required=True, help='the url of the code to benchmark (eg https://github.com/hibridon/hibridon)') - parser.add_argument('--code-version', help='the version of the code to use; either a branch or a commit id (eg a3bed1c3ccfbca572003020d3e3d3b1ff3934fad)') - parser.add_argument('--git-user', help='the git user to use to clone the code repository') - password_group = parser.add_mutually_exclusive_group() - password_group.add_argument('--git-pass-file', help='the path to a file containing the password (or personal access token)') - password_group.add_argument('--git-pass', type=str, help='the password (or personal access token) to use (not recommended for security reasons)') - parser.add_argument('--num-cores', type=int, required=True, help='the number of cores that the benchmark will use') - parser.add_argument('--output-dir', type=Path, required=True, help='where the output files will be placed') - parser.add_argument('--cmake-path', type=Path, help='the path to the cmake executable to use in case a specific cmake is wanted') - parser.add_argument('--cmake-option', type=str, action='append', help='additional option passed to cmake in the configure step (use this flag multiple times if you need more than one cmake option)') - parser.add_argument('--benchmark-command', required=True, type=str, help='the command to benchmark') - args = parser.parse_args() - - git_user = args.git_user - git_repos_url = args.git_repos_url - - git_password = None - if args.git_pass: - git_password = args.git_pass - elif args.git_pass_file: - with open(args.git_pass_file, 'r', encoding='utf8') as f: - git_password = f.readline().replace('\n', '') # os.environ['HIBRIDON_REPOS_PAT'] - - source_tree_provider = GitRepos(git_repos_url=git_repos_url, code_version=args.code_version, git_user=git_user, git_password=git_password, src_dir=args.output_dir / 'source.git') - - starbench_cmake_app(source_tree_provider, tmp_dir=args.output_dir, num_cores=args.num_cores, cmake_options=args.cmake_option, benchmark_command=args.benchmark_command.split(' '), cmake_exe_location=args.cmake_path) - - -if __name__ == '__main__': - main() diff --git a/test/mamul1/CMakeLists.txt b/test/mamul1/CMakeLists.txt deleted file mode 100644 index 80095ea..0000000 --- a/test/mamul1/CMakeLists.txt +++ /dev/null @@ -1,43 +0,0 @@ - -enable_language (Fortran) - -set(MAMUL1_USE_MAGMA "OFF" CACHE BOOL "if set, mamul1 build uses magma (matrix algebra on gpu)") - -set(MAMUL1_MAGMA_API "CPU_MEM_API" CACHE STRING "which magma API to use when building mamul1: CPU_MEM_API for BLAS compatible API (uses matrices stored on CPU memory) or GPU_MEM_API (use matrices stored on GPU memory)") - -add_executable(mamul1 mamul1.F90) - -if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") - # Allow arbitrary long lines. Needed as preprocessing could generate long line lengths. - target_compile_options(mamul1 PUBLIC -ffree-line-length-none) -elseif (Fortran_COMPILER_NAME STREQUAL "ifort") - # Intel (ifort) - target_compile_options(mamul1 PUBLIC -no-wrap-margin) -endif() - - -if (MAMUL1_USE_MAGMA) - find_package( MAGMA REQUIRED ) - if( MAMUL1_MAGMA_API STREQUAL "CPU_MEM_API" ) - target_compile_definitions(mamul1 PUBLIC USE_MAGMA_DGEMM) - elseif( MAMUL1_MAGMA_API STREQUAL "GPU_MEM_API" ) - target_compile_definitions(mamul1 PUBLIC USE_MAGMA_DGEMM_GPU) - else() - message(FATAL_ERROR "unexpected value for MAMUL1_MAGMA_API : ${MAMUL1_MAGMA_API}") - endif() - message(STATUS "MAGMA_INCLUDES=${MAGMA_INCLUDES}") - include_directories("${MAGMA_INCLUDES}") - target_link_libraries(mamul1 "${MAGMA_LIBRARIES}") -else() - find_package( BLAS REQUIRED ) - find_package( LAPACK REQUIRED ) - # message("BLAS_LIBRARIES=${BLAS_LIBRARIES}") - # message("LAPACK_LIBRARIES=${LAPACK_LIBRARIES}") - target_compile_definitions(mamul1 PUBLIC USE_DGEMM) - - # Link Blas and Lapack libraries - target_link_libraries(mamul1 "${LAPACK_LIBRARIES}") - target_link_libraries(mamul1 "${BLAS_LIBRARIES}") -endif() - -install(TARGETS mamul1) diff --git a/test/mamul1/mamul1.F90 b/test/mamul1/mamul1.F90 deleted file mode 100644 index c31447b..0000000 --- a/test/mamul1/mamul1.F90 +++ /dev/null @@ -1,339 +0,0 @@ -#define MAMUL1_VERSION "1.0.0" - -#define magma_devptr_t integer(kind=8) -subroutine print_usage(prog_path) - character(len=*), intent(in) :: prog_path - character(len=80) :: build_variant -#if defined(USE_MAGMA_DGEMM_GPU) - build_variant='gpu' -#elif defined(USE_DGEMM) - build_variant='cpu' -#else - build_variant='unknown' -#endif - write(6,'("mamul1 v",a," (variant:",a,"): benchmark performs a square matrix multiplication in double precision")') MAMUL1_VERSION, trim(build_variant); - write(6,'()'); - write(6,'("Usage: ",a," ")') trim(prog_path); - write(6,'(" positive integer representing the size of the square matrices to multiply ")'); - write(6,'(" positive integer representing the number of times the multiplication is performed")'); -end subroutine - -program mamul1 - -implicit none - - -integer :: argc, info, ndim, num_loops - -character(len=32) :: arg0, arg1, arg2 - - -call get_command_argument(0,arg0) - -argc = command_argument_count() -if (argc /= 2) then - call print_usage(trim(arg0)) - ! write(6,'("Usage: ",a," NDIM NUM_LOOPS, where NDIM is a positive integer")') trim(arg0); - stop -end if - -call get_command_argument(1,arg1,status=info) -if (info /= 0) then - write(6,'("Error reading argument: info = ",i2)') info - call print_usage(trim(arg0)) -stop -end if - -call get_command_argument(2,arg2,status=info) -if (info /= 0) then - write(6,'("Error reading argument: info = ",i2)') info - call print_usage(trim(arg0)) -stop -end if - -read(arg1,*,iostat=info) ndim -if (info /= 0) then - write(6,'("Error converting ndim argument to integer: info = ",i2)') info - call print_usage(trim(arg0)) -stop -end if - -read(arg2,*,iostat=info) num_loops -if (info /= 0) then - write(6,'("Error converting num_loops argument to integer: info = ",i2)') info - call print_usage(trim(arg0)) -stop -end if - - -if (ndim < 1) then - call print_usage(trim(arg0)) -stop -end if - - call test_dgemm(ndim, num_loops) - -stop -end program mamul1 - -subroutine set_random_seed(seed) - integer :: seed - integer :: seed_array_size - INTEGER, ALLOCATABLE :: seed_array (:) - CALL RANDOM_SEED (SIZE = seed_array_size) ! I is set to the size of - ! ! the seed array - ALLOCATE (seed_array(seed_array_size)) - seed_array = seed - CALL RANDOM_SEED (PUT=seed_array(1:seed_array_size)) -end subroutine - -subroutine print_matrix(mat, ndim) - implicit none - integer, parameter :: dp = kind(1.0d0) - real(dp), intent(in) :: mat(ndim, ndim) - integer, intent(in) :: ndim - integer :: irow - do irow = 1, ndim - write(6, *) mat(irow,:) - end do -end subroutine - -! square matrix multiplication -subroutine sqmatmul(amat, bmat, cmat, ndim) -#if defined(USE_MAGMA_DGEMM_GPU) - use magma, only: magmaf_init, magmaf_finalize - use magma, only: magmaf_queue_create, magmaf_queue_destroy - use magma, only: magmaf_dmalloc, magmaf_free - use magma, only: magmaf_dsetmatrix, magmaf_dgetmatrix - use magma, only: magmablasf_dgemm -#endif - real*8, intent(in) :: amat(ndim,ndim) - real*8, intent(in) :: bmat(ndim,ndim) - real*8, intent(out) :: cmat(ndim,ndim) - integer :: lda, ldb, ldc - integer :: info - - real :: time_before, time_after - integer(8) :: num_ops - real :: gflops - -#ifdef USE_MAGMA_DGEMM_GPU - magma_devptr_t :: d_amat - magma_devptr_t :: d_bmat - magma_devptr_t :: d_cmat - magma_devptr_t :: queue !! really a CPU pointer -#endif - lda = ceiling(real(ndim)/32)*32 - ldb = ceiling(real(ndim)/32)*32 - ldc = ceiling(real(ndim)/32)*32 - - -#if defined(USE_MAGMA_DGEMM_GPU) - !! allocate GPU memory - write(6,'("DEBUG: before matrix A gpu memory allocation (",i0," doubles)")') lda * ndim - info = magmaf_dmalloc( d_amat, lda*ndim ) - if (d_amat == 0) then - print "(a)", "failed to allocate d_amat" - return - endif - write(6,'("DEBUG: before matrix B gpu memory allocation (",i0," doubles)")') ldb * ndim - info = magmaf_dmalloc( d_bmat, ldb*ndim ) - if (d_bmat == 0) then - print "(a)", "failed to allocate d_bmat" - return - endif - write(6,'("DEBUG: before matrix C gpu memory allocation (",i0," doubles)")') ldc * ndim - info = magmaf_dmalloc( d_cmat, ldc*ndim ) - if (d_cmat == 0) then - print "(a)", "failed to allocate d_cmat" - return - endif - - ! copy A to dA and B to dB - call magmaf_queue_create( 0, queue ) - write(6,'("DEBUG: queue = ",i0)') queue - if (queue == 0) then - print "(a)", "failed to create a queue" - return - endif - - write(6,*) 'DEBUG: copying matrix A from CPU to GPU memory' - call magmaf_dsetmatrix( ndim, ndim, amat, ndim, d_amat, lda, queue ) - write(6,*) 'DEBUG: copying matrix B from CPU to GPU memory' - call magmaf_dsetmatrix( ndim, ndim, bmat, ndim, d_bmat, ldb, queue ) - - call cpu_time(time_before) - write (6,*) 'before magmablasf_dgemm, time=', time_before - - call magmablasf_dgemm ('N', 'N', ndim, ndim, ndim, 1.0d0, d_amat, lda, d_bmat, ldb, 0.0d0, d_cmat, ldc, queue) - call magmaf_queue_sync(queue) - - call cpu_time(time_after) - num_ops = real(ndim) * real(ndim) * real(ndim) * 2 - gflops = num_ops / (time_after - time_before) / 1.0e9 - write (6,*) 'after magmablasf_dgemm, time=', time_after - write (6,*) 'magmablasf_dgemm (from gpu memory to gpu memory) duration :', (time_after - time_before), '(', gflops, ' gflops)' - - write(6,*) 'DEBUG: copying matrix C from GPU to CPU memory' - call magmaf_dgetmatrix( ndim, ndim, d_cmat, ldc, cmat, ndim, queue ) - call magmaf_queue_destroy( queue ) - - info = magmaf_free(d_cmat) - info = magmaf_free(d_bmat) - info = magmaf_free(d_amat) - -#endif - -#ifdef USE_DGEMM - ! subroutine dgemm ( character TRANSA, - ! character TRANSB, - ! integer M, - ! integer N, - ! integer K, - ! double precision ALPHA, - ! double precision, dimension(lda,*) A, - ! integer LDA, - ! double precision, dimension(ldb,*) B, - ! integer LDB, - ! double precision BETA, - ! double precision, dimension(ldc,*) C, - ! integer LDC - ! ) - call dgemm('N', 'N', ndim, ndim, ndim, 1.0d0, amat, ndim, bmat, ndim, 0.0d0, cmat, ndim) -#endif - -end subroutine - -subroutine check_cmat_element(cmat, row, col, amat, bmat, ndim) - real(8), intent(in) :: cmat(ndim, ndim) - integer, intent(in) :: row - integer, intent(in) :: col - real(8), intent(in) :: amat(ndim, ndim) - real(8), intent(in) :: bmat(ndim, ndim) - integer, intent(in) :: ndim - - real(8) :: x - x = 0.0d0 - do i = 1, ndim - x = x + amat(row, i) * bmat(i, col) - end do - - write(6, '("expected cmat(", i0, ", ", i0, ")", e23.15e3)') row, col, x - write(6, '("computed cmat(", i0, ", ", i0, ")", e23.15e3)') row, col, cmat(row, col) - if (abs(cmat(row, col) - x) > 1.0e-8) then - stop 'a computed element has a wrong value' - end if -end subroutine - - -subroutine test_dgemm(ndim, num_loops) -#if defined(USE_MAGMA_DGEMM_GPU) - use magma, only: magmaf_init, magmaf_finalize - use magma, only: magmablasf_dgemm !, magmaf_dgemm_gpu -#endif - - implicit none - integer, intent(in) :: ndim - integer, intent(in) :: num_loops - integer, parameter :: dp = kind(1.0d0) - real :: ct_start, ct_stop ! elapsed cpu time relative to an arbitrary fixed time. Expressed in seconds with the granularity of 1 microsecond - integer(8) :: num_ops - real :: gflops - - integer :: sc_start, sc_stop ! system clock time of start and stop events, expressed in ticks - integer :: sc_count_rate ! number of system clock ticks per second - integer :: sc_count_max ! the max possible number of system clock ticks returned by system_clock - integer :: s - REAL :: a_diff, diff - REAL :: num_sc_ticks_per_second ! the number of system clock ticks per second - - real*8, allocatable :: amat(:,:) - real*8, allocatable :: bmat(:,:) - real*8, allocatable :: cmat(:,:) - real(dp) :: x - integer :: i, j - -#if defined(USE_MAGMA_DGEMM_GPU) - write(6,*) 'DEBUG: init magma' - call magmaf_init() -#endif - - ! First initialize the system_clock - CALL system_clock(count_rate=sc_count_rate) - CALL system_clock(count_max=sc_count_max) - num_sc_ticks_per_second = REAL(sc_count_rate) - WRITE(*,*) "system_clock rate : ", num_sc_ticks_per_second, " ticks per second" - - diff = 0.0 - a_diff = 0.0 - s = 0 - - allocate(amat(ndim, ndim)) - allocate(bmat(ndim, ndim)) - allocate(cmat(ndim, ndim)) - - call set_random_seed(42) - - !call random_number(amat) - !amat = 0.5_dp*(amat + transpose(amat)) - do j = 1, ndim - do i = 1, ndim - call random_number(x) - amat(i,j) = x - call random_number(x) - bmat(i,j) = x - end do - end do - - call cpu_time(ct_start) - call system_clock(sc_start) - - do j = 1, num_loops - ! playmat = amat - - call sqmatmul(amat, bmat, cmat, ndim) - - end do - - call cpu_time(ct_stop) - call system_clock(sc_stop) - if ( (sc_stop - sc_start)/num_sc_ticks_per_second < (ct_stop - ct_start) ) s = s + 1 - diff = (sc_stop - sc_start)/num_sc_ticks_per_second - (ct_stop - ct_start) + diff - a_diff = ABS((sc_stop - sc_start)/num_sc_ticks_per_second - (ct_stop - ct_start)) + a_diff - - ! check one of the elements of cmat (the last one here: cmat(ndim, ndim)) - call check_cmat_element(cmat, 1, 1, amat, bmat, ndim) - call check_cmat_element(cmat, 1, ndim, amat, bmat, ndim) - call check_cmat_element(cmat, ndim, 1, amat, bmat, ndim) - call check_cmat_element(cmat, ndim, ndim, amat, bmat, ndim) - - ! write(6, *) 'amat = ' - ! call print_matrix(amat, ndim) - - ! write(6, *) 'bmat = ' - ! call print_matrix(bmat, ndim) - - ! write(6, *) 'cmat = ' - ! call print_matrix(cmat, ndim) - - num_ops = real(ndim) * real(ndim) * real(ndim) * 2 * num_loops - gflops = num_ops / (ct_stop-ct_start) / 1.0e9 - - - write(6, '("Time taken by dgemm for matrix size ",i8," was ",f10.2," seconds")') ndim, ct_stop-ct_start - WRITE(*,*) "gflops (including potential memory transfers) : ", gflops - - WRITE(*,*) "system_clock : ",(sc_stop - sc_start)/num_sc_ticks_per_second - WRITE(*,*) "cpu_time : ",(ct_stop - ct_start) - WRITE(*,*) "sys_clock < cpu_time : ",s - WRITE(*,*) "mean diff : ",diff - WRITE(*,*) "abs mean diff : ",a_diff - -#if defined(USE_MAGMA_DGEMM_GPU) - write(6,*) 'DEBUG: deinit magma' - call magmaf_finalize() -#endif - - - deallocate(amat, bmat, cmat) - end diff --git a/test/test_starbench.py b/test/test_starbench.py deleted file mode 100644 index bca5e34..0000000 --- a/test/test_starbench.py +++ /dev/null @@ -1,25 +0,0 @@ -import unittest -import logging -from pathlib import Path -# from cocluto import ClusterController -from starbench.main import starbench_cmake_app, ExistingDir - - -class StarbenchTestCase(unittest.TestCase): - - logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') - - def setUp(self) -> None: # pylint: disable=useless-parent-delegation - return super().setUp() - - def test_mamul1_benchmark(self): - logging.info('test_mamul1_benchmark') - source_code_provider = ExistingDir(Path('test/mamul1').absolute()) - tmp_dir = Path('tmp').absolute() - benchmark_command = ['./mamul1', '3000', '10'] - starbench_cmake_app(source_code_provider=source_code_provider, tmp_dir=tmp_dir, num_cores=2, benchmark_command=benchmark_command) - # self.assertIsInstance(job_state, JobsState) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/usecases/ipr/hibench/readme.md b/usecases/ipr/hibench/readme.md deleted file mode 100644 index af7757a..0000000 --- a/usecases/ipr/hibench/readme.md +++ /dev/null @@ -1,27 +0,0 @@ -This example illustrates how `starbench` is used at IPR (Institut de Physique de Rennes) to measure the performance of [hibridon](https://github.com/hibridon/hibridon) on IPR's cluster (`physix`) - - - - -usage: - -```sh -20241007-15:08:10 graffy@graffy-ws2:~/work/starbench/starbench.git$ rsync --exclude .git --exclude starbench.venv --exclude tmp --exclude usecases/ipr/hibench/results -va ./ graffy@alambix.ipr.univ-rennes.fr:/opt/ipr/cluster/work.global/graffy/starbench.git/ -sending incremental file list - -sent 1,416 bytes received 25 bytes 960.67 bytes/sec -total size is 140,225 speedup is 97.31 -last command status : [0] -``` - -```sh -graffy@alambix-frontal:/opt/ipr/cluster/work.global/graffy/starbench.git/usecases/ipr/hibench$ ./hibenchonphysix.py --commit-id 53894da48505892bfa05693a52312bacb12c70c9 --results-dir $GLOBAL_WORK_DIR/graffy/hibridon/benchmarks/starbench/hibench/$(date --iso=seconds) --arch-regexp 'intel_xeon_x5650' --cmake-path /usr/bin/cmake -``` - -`hibenchonphysix.py` script launches two `sge` jobs for each machine type in `physix` cluster: -- one job that performs a benchmark of hibridon with `gfortran` compiler -- one job that performs a benchmark of hibridon with `ifort` compiler - -When the job successfully completes, it puts the results of the benchmark on `physix`'s global work directory (eg `/opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/53894da48505892bfa05693a52312bacb12c70c9/nh3h2_qma_long/intel_xeon_x5550/gfortran`) - -