v0.0.14
- fixes to validate that `clusterbench-submit` work in a real situation (on alambix cluster): - fixed bug in hibridon benchmark where the requested ram was way too small - made sure the location of iprbench hardcoded virtual env path is writable - fixed bug in exception message - updated the `README.md` work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3958]
This commit is contained in:
		
							parent
							
								
									e5fb47ba64
								
							
						
					
					
						commit
						263474eb5a
					
				
							
								
								
									
										224
									
								
								README.md
								
								
								
								
							
							
						
						
									
										224
									
								
								README.md
								
								
								
								
							|  | @ -1,161 +1,131 @@ | |||
| # iprbenchmark | ||||
| # iprbench | ||||
| 
 | ||||
| This example illustrates how `starbench` is used at IPR (Institut de Physique de Rennes) to measure the performance of [hibridon](https://github.com/hibridon/hibridon) on IPR's cluster (`alambix`) | ||||
| 
 | ||||
| 
 | ||||
| usage: | ||||
| 
 | ||||
| ```sh | ||||
| 20241007-15:08:10 graffy@graffy-ws2:~/work/starbench/starbench.git$ rsync --exclude .git --exclude starbench.venv --exclude tmp --exclude usecases/ipr/hibridon/results  -va ./ graffy@alambix.ipr.univ-rennes.fr:/opt/ipr/cluster/work.global/graffy/starbench.git/ | ||||
| sending incremental file list | ||||
| 
 | ||||
| sent 1,416 bytes  received 25 bytes  960.67 bytes/sec | ||||
| total size is 140,225  speedup is 97.31 | ||||
| last command status : [0] | ||||
| ``` | ||||
| ## install iprbench | ||||
| 
 | ||||
| ```sh | ||||
| graffy@alambix-frontal:/opt/ipr/cluster/work.local/graffy/bug3372$ python3 -m venv iprbench.venv | ||||
| graffy@alambix-frontal:/opt/ipr/cluster/work.local/graffy/bug3372$ source ./iprbench.venv/bin/activate | ||||
| (iprbench.venv) graffy@alambix-frontal:/opt/ipr/cluster/work.local/graffy/bug3372$ pip install ./iprbench.git | ||||
| Processing ./iprbench.git | ||||
|   Installing build dependencies ... done | ||||
|   Getting requirements to build wheel ... done | ||||
|   Preparing metadata (pyproject.toml) ... done | ||||
| Collecting starbench@ git+https://github.com/g-raffy/starbench | ||||
|   Cloning https://github.com/g-raffy/starbench to /tmp/user/59825/pip-install-uw5i22k1/starbench_890d53070dec47738060b57fdd29b001 | ||||
|   Running command git clone --filter=blob:none --quiet https://github.com/g-raffy/starbench /tmp/user/59825/pip-install-uw5i22k1/starbench_890d53070dec47738060b57fdd29b001 | ||||
|   Resolved https://github.com/g-raffy/starbench to commit 3ca66d00636ad055506f6b4e2781b498cc7487ac | ||||
|   Installing build dependencies ... done | ||||
|   Getting requirements to build wheel ... done | ||||
|   Preparing metadata (pyproject.toml) ... done | ||||
| Collecting pandas | ||||
|   Downloading pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB) | ||||
|      ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 13.1/13.1 MB 23.6 MB/s eta 0:00:00 | ||||
| Collecting matplotlib | ||||
|   Downloading matplotlib-3.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.3 MB) | ||||
|      ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 8.3/8.3 MB 20.1 MB/s eta 0:00:00 | ||||
| Collecting sqlalchemy | ||||
|   Downloading SQLAlchemy-2.0.35-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB) | ||||
|      ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.2/3.2 MB 17.6 MB/s eta 0:00:00 | ||||
| Collecting contourpy>=1.0.1 | ||||
|   Downloading contourpy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (323 kB) | ||||
|      ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 323.2/323.2 kB 3.4 MB/s eta 0:00:00 | ||||
| Collecting cycler>=0.10 | ||||
|   Downloading cycler-0.12.1-py3-none-any.whl (8.3 kB) | ||||
| Collecting fonttools>=4.22.0 | ||||
|   Downloading fonttools-4.54.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.9 MB) | ||||
|      ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.9/4.9 MB 20.9 MB/s eta 0:00:00 | ||||
| Collecting kiwisolver>=1.3.1 | ||||
|   Downloading kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB) | ||||
|      ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.4/1.4 MB 8.9 MB/s eta 0:00:00 | ||||
| Collecting numpy>=1.23 | ||||
|   Downloading numpy-2.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.3 MB) | ||||
|      ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 16.3/16.3 MB 19.9 MB/s eta 0:00:00 | ||||
| Collecting packaging>=20.0 | ||||
|   Downloading packaging-24.1-py3-none-any.whl (53 kB) | ||||
|      ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 54.0/54.0 kB 1.0 MB/s eta 0:00:00 | ||||
| Collecting pillow>=8 | ||||
|   Downloading pillow-10.4.0-cp311-cp311-manylinux_2_28_x86_64.whl (4.5 MB) | ||||
|      ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.5/4.5 MB 20.9 MB/s eta 0:00:00 | ||||
| Collecting pyparsing>=2.3.1 | ||||
|   Downloading pyparsing-3.1.4-py3-none-any.whl (104 kB) | ||||
|      ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 104.1/104.1 kB 1.3 MB/s eta 0:00:00 | ||||
| Collecting python-dateutil>=2.7 | ||||
|   Downloading python_dateutil-2.9.0.post0-py2.py3-none-any.whl (229 kB) | ||||
|      ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 229.9/229.9 kB 933.1 kB/s eta 0:00:00 | ||||
| Collecting pytz>=2020.1 | ||||
|   Downloading pytz-2024.2-py2.py3-none-any.whl (508 kB) | ||||
|      ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 508.0/508.0 kB 3.9 MB/s eta 0:00:00 | ||||
| Collecting tzdata>=2022.7 | ||||
|   Downloading tzdata-2024.2-py2.py3-none-any.whl (346 kB) | ||||
|      ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 346.6/346.6 kB 2.0 MB/s eta 0:00:00 | ||||
| Collecting typing-extensions>=4.6.0 | ||||
|   Downloading typing_extensions-4.12.2-py3-none-any.whl (37 kB) | ||||
| Collecting greenlet!=0.4.17 | ||||
|   Downloading greenlet-3.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (602 kB) | ||||
|      ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 602.4/602.4 kB 4.9 MB/s eta 0:00:00 | ||||
| Collecting six>=1.5 | ||||
|   Using cached six-1.16.0-py2.py3-none-any.whl (11 kB) | ||||
| Building wheels for collected packages: iprbench, starbench | ||||
|   Building wheel for iprbench (pyproject.toml) ... done | ||||
|   Created wheel for iprbench: filename=iprbench-0.0.1-py3-none-any.whl size=19188 sha256=0ece4a9b1b44434c0f033a253aae301a5d53039955f1f6b182c0b833d44c3e93 | ||||
|   Stored in directory: /tmp/user/59825/pip-ephem-wheel-cache-8yw7rwk6/wheels/84/c9/82/e72d13fb7df12a8004ca6383b185a00f9ab3ddd8695e9e6cd8 | ||||
|   Building wheel for starbench (pyproject.toml) ... done | ||||
|   Created wheel for starbench: filename=starbench-1.0.0-py3-none-any.whl size=9612 sha256=18968f356bb3d6f6c2337b4bbaf709510c50a6a9902c3363f4b568c409846ac0 | ||||
|   Stored in directory: /tmp/user/59825/pip-ephem-wheel-cache-8yw7rwk6/wheels/cf/73/d3/14e4830d3e06c2c3ab71fdf68e0f14b50132ec23eaa6b2aa65 | ||||
| Successfully built iprbench starbench | ||||
| Installing collected packages: pytz, tzdata, typing-extensions, starbench, six, pyparsing, pillow, packaging, numpy, kiwisolver, greenlet, fonttools, cycler, sqlalchemy, python-dateutil, contourpy, pandas, matplotlib, iprbench | ||||
| Successfully installed contourpy-1.3.0 cycler-0.12.1 fonttools-4.54.1 greenlet-3.1.1 iprbench-0.0.1 kiwisolver-1.4.7 matplotlib-3.9.2 numpy-2.1.2 packaging-24.1 pandas-2.2.3 pillow-10.4.0 pyparsing-3.1.4 python-dateutil-2.9.0.post0 pytz-2024.2 six-1.16.0 sqlalchemy-2.0.35 starbench-1.0.0 typing-extensions-4.12.2 tzdata-2024.2 | ||||
| ``` | ||||
| 
 | ||||
| ## run unit tests | ||||
| 
 | ||||
| ```sh | ||||
| 20241018-16:56:05 graffy@graffy-ws2:~/work/starbench/iprbench.git$ python3 -m unittest test.test_iprbench | ||||
| 2024-10-18 16:57:42,589 - INFO - test_iprbench_run | ||||
| creating build directory /tmp/mamul1_out/output/worker<worker_id> | ||||
| executing the following command in parallel (2 parallel runs) : '['mkdir', '-p', '/tmp/mamul1_out/output/worker<worker_id>/build']' | ||||
| mean duration : 0.004 s (2 runs) | ||||
| configuring /home/graffy/work/starbench/iprbench.git/test/mamul1 into /tmp/mamul1_out/output/worker<worker_id>/build ... | ||||
| executing the following command in parallel (2 parallel runs) : '['/usr/bin/cmake', '-DCMAKE_BUILD_TYPE=Release', '-DCMAKE_Fortran_COMPILER=gfortran', '/home/graffy/work/starbench/iprbench.git/test/mamul1']' | ||||
| mean duration : 0.098 s (2 runs) | ||||
| building /tmp/mamul1_out/output/worker<worker_id>/build ... | ||||
| executing the following command in parallel (2 parallel runs) : '['make']' | ||||
| mean duration : 0.073 s (2 runs) | ||||
| benchmarking /tmp/mamul1_out/output/worker<worker_id>/build ... | ||||
| executing the following command in parallel (2 parallel runs) : '['./mamul1', '1024', '10']' | ||||
| mean duration : 0.660 s (2 runs) | ||||
| duration : 0.660 s | ||||
| . | ||||
| ---------------------------------------------------------------------- | ||||
| Ran 1 test in 1.035s | ||||
| 
 | ||||
| OK | ||||
| last command status : [0] | ||||
| 20241018-16:56:05 graffy@graffy-ws2:~/work/starbench/iprbench.git$ python3 -m unittest | ||||
| ``` | ||||
| 
 | ||||
| ```sh | ||||
| 20241018-16:56:05 graffy@graffy-ws2:~/work/starbench/iprbench.git$ python3 -m unittest test.test_clusterbench | ||||
| ``` | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| ## launch a benchmark on the current system | ||||
| 
 | ||||
| Here's a simple example to run the benchmark `mamul1` on the current system: | ||||
| 
 | ||||
| ```sh | ||||
| iprbench-run --benchmark-id 'mamul1' --config '{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10, "num_cores":2}' --results-dir /tmp/mamul1_out | ||||
| (iprbench.venv) graffy@alambix50:/opt/ipr/cluster/work.local/graffy/bug3958/iprbench.git$ iprbench-run --benchmark-id 'mamul1' --config '{"fortran_compiler": "gfortran:<default>", "blas_library": "<default-libblas>:<default>", "matrix_size": 1024, "num_loops":10, "num_cores":2, "launcher": "manual"}' --results-dir /tmp/mamul1_out --target-system-type-id 'debian' --resultsdb-params '{"type": "tsv-files", "tsv_results_dir": "/tmp/mamul1_out/tsv"}' | ||||
| DEBUG:root:extracting package iprbench.resources.mamul1 resource CMakeLists.txt to /tmp/mamul1_out/mamul1 | ||||
| DEBUG:root:extracting package iprbench.resources.mamul1 resource mamul1.F90 to /tmp/mamul1_out/mamul1 | ||||
| DEBUG:root:shell_command = "starbench --source-tree-provider '{"type": "existing-dir", "dir-path": "/tmp/mamul1_out/mamul1"}' --num-cores 2 --output-dir=/tmp/mamul1_out/output --cmake-path=/usr/bin/cmake --cmake-option=-DCMAKE_BUILD_TYPE=Release --cmake-option=-DCMAKE_Fortran_COMPILER=gfortran --cmake-option=-DBLA_VENDOR=OpenBLAS --benchmark-command='./mamul1 1024 10' --output-measurements=/tmp/mamul1_out/output/measurements.tsv" | ||||
| creating build directory /tmp/mamul1_out/output/worker<worker_id> | ||||
| executing the following command in parallel (2 parallel runs) : '['mkdir', '-p', '/tmp/mamul1_out/output/worker<worker_id>/build']' | ||||
| mean duration : 0.002 s (2 runs) | ||||
| configuring /tmp/mamul1_out/mamul1 into /tmp/mamul1_out/output/worker<worker_id>/build ... | ||||
| executing the following command in parallel (2 parallel runs) : '['/usr/bin/cmake', '-DCMAKE_BUILD_TYPE=Release', '-DCMAKE_Fortran_COMPILER=gfortran', '-DBLA_VENDOR=OpenBLAS', '/tmp/mamul1_out/mamul1']' | ||||
| mean duration : 0.057 s (2 runs) | ||||
| building /tmp/mamul1_out/output/worker<worker_id>/build ... | ||||
| executing the following command in parallel (2 parallel runs) : '['make']' | ||||
| mean duration : 0.368 s (2 runs) | ||||
| benchmarking /tmp/mamul1_out/output/worker<worker_id>/build ... | ||||
| executing the following command in parallel (2 parallel runs) : '['./mamul1', '1024', '10']' | ||||
| mean duration : 1.933 s (2 runs) | ||||
| duration : 1.933 s | ||||
| DEBUG:root:output_measurements_file_path = /tmp/mamul1_out/output/measurements.tsv | ||||
| DEBUG:root:row = Unnamed: 0    0.000000 | ||||
| run_id        0.000000 | ||||
| duration      1.932536 | ||||
| Name: 0, dtype: float64 | ||||
| DEBUG:root:row = Unnamed: 0    1.000000 | ||||
| run_id        1.000000 | ||||
| duration      1.933324 | ||||
| Name: 1, dtype: float64 | ||||
| total number of cores (including virtual cores) on this host : 24 | ||||
| DEBUG:root:table_file_path=/tmp/mamul1_out/tsv/mamul1.tsv | ||||
|             measurement_time ipr_bench_version    host_id  ... duration_stddev duration_min  duration_max | ||||
| 0 2024-11-27 10:51:02.551947            0.0.13  <unknown>  ...        0.000557     1.932536      1.933324 | ||||
| 
 | ||||
| [1 rows x 18 columns] | ||||
| ``` | ||||
| 
 | ||||
| Now that the run has completed succesfully, the results can be found in the folder `/tmp/mamul1_out/tsv` that we chose: | ||||
| 
 | ||||
| ```sh | ||||
| (iprbench.venv) graffy@alambix50:/opt/ipr/cluster/work.local/graffy/bug3958/iprbench.git$ cat /tmp/mamul1_out/tsv/mamul1.tsv  | ||||
| measurement_time	ipr_bench_version	host_id	host_fqdn	user	num_cpus	cpu_model	launcher	fortran_compiler	blas_library	num_cores	matrix_size	num_loops	duration_avg	duration_med	duration_stddev	duration_min	duration_max | ||||
| 2024-11-27 10:51:02.551947	0.0.13	<unknown>	alambix50.ipr.univ-rennes.fr	graffy	2	intel_xeon_x5650	manual	gfortran:12.2.0	libopenblas-pthread:0.3.21	2	1024	10	1.93293	1.93293	0.0005572001435750071	1.932536	1.933324 | ||||
| ``` | ||||
| 
 | ||||
| ## launch benchmark jobs on alambix cluster | ||||
| 
 | ||||
| The following example command submits jobs (one job per cpu architecture) that execute the benchmark `hibridon` on the cluster `alambix` (only for the architecture `intel_xeon_x5650` in this example). | ||||
| 
 | ||||
| ```sh | ||||
| (iprbench.venv) graffy@alambix-frontal:/opt/ipr/cluster/work.local/graffy/bug3372$ hibridononphysix --commit-id 53894da48505892bfa05693a52312bacb12c70c9 --results-dir $GLOBAL_WORK_DIR/graffy/hibridon/benchmarks/starbench/hibridon/$(date --iso=seconds) --arch-regexp 'intel_xeon_x5650' --cmake-path /usr/bin/cmake | ||||
| INFO:root:available host groups: dict_keys(['intel_xeon_x5550', 'intel_xeon_x5650', 'intel_xeon_e5-2660', 'intel_xeon_e5-2660v2', 'intel_xeon_e5-2660v4', 'intel_xeon_gold_6140', 'intel_xeon_gold_6154', 'intel_xeon_gold_5220', 'intel_xeon_gold_6226r', 'intel_xeon_gold_6248r', 'intel_xeon_gold_6348', 'amd_epyc_7282', 'amd_epyc_7452']) | ||||
| (iprbench.venv) graffy@alambix50:/opt/ipr/cluster/work.local/graffy/bug3958/iprbench.git$ results_dir="$GLOBAL_WORK_DIR/graffy/iprbenchs/test_results/clusterbench_submit/$(date --iso-=seconds)"; clusterbench-submit --cluster-id 'alambix' --arch-regexp "intel_xeon_x5650.*" --benchmark-id 'hibridon' --config '{"fortran_compiler": "ifort:<default>", "blas_library": "intelmkl:<default>", "test_id": "arch4_quick", "hibridon_version": "a3bed1c3ccfbca572003020d3e3d3b1ff3934fad", "cmake_path": "cmake", "num_cores": 1, "launcher": "graffy.manual"}' --results-dir "${results_dir}" --resultsdb-params '{"type": "tsv-files", "tsv_results_dir": "'"$results_dir"'"}' --target-system-type-id "fr.univ-rennes.ipr.cluster-node" | ||||
| INFO:root:available host groups: dict_keys(['intel_core_i5_8350u', 'intel_xeon_x5550', 'intel_xeon_x5650', 'intel_xeon_e5-2660', 'intel_xeon_e5-2660v2', 'intel_xeon_e5-2660v4', 'intel_xeon_gold_6140', 'intel_xeon_gold_6154', 'intel_xeon_gold_5220', 'intel_xeon_gold_6226r', 'intel_xeon_gold_6248r', 'intel_xeon_gold_6348', 'amd_epyc_7282', 'amd_epyc_7452']) | ||||
| INFO:root:requested host groups: ['intel_xeon_x5650'] | ||||
| INFO:root:using test arch4_quick for benchmarking | ||||
| INFO:root:creating /opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibridon/2024-10-10T12:11:44+02:00/iprbench.venv.tgz (the virtual environment that will be used in this bench by all its jobs at some point) | ||||
| DEBUG:root:iprbench_venv_hardcoded_path = /tmp/user/59825/iprbench.venv | ||||
| INFO:root:creating /opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00/iprbench.venv.tgz (the virtual environment that will be used in this bench by all its jobs at some point) | ||||
| Collecting virtualenv-clone | ||||
|   Using cached virtualenv_clone-0.5.7-py3-none-any.whl (6.6 kB) | ||||
| Installing collected packages: virtualenv-clone | ||||
| Successfully installed virtualenv-clone-0.5.7 | ||||
| DEBUG:root:command = /opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibridon/2024-10-10T12:11:44+02:00/53894da48505892bfa05693a52312bacb12c70c9/arch4_quick/intel_xeon_x5650/gfortran/starbench.job "https://github.com/hibridon/hibridon" "g-raffy" "/mnt/home.ipr/graffy/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" "53894da48505892bfa05693a52312bacb12c70c9" "-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON -DCMAKE_Fortran_COMPILER=gfortran" "ctest --output-on-failure -L ^arch4_quick$" "" "/usr/bin/cmake" | ||||
| DEBUG:root:qsub_command = qsub -pe smp 12 -l "hostname=alambix50.ipr.univ-rennes.fr" -S /bin/bash -cwd -m ae -l mem_available=1G -j y -N hibridon_intel_xeon_x5650_gfortran_53894da48505892bfa05693a52312bacb12c70c9 /opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibridon/2024-10-10T12:11:44+02:00/53894da48505892bfa05693a52312bacb12c70c9/arch4_quick/intel_xeon_x5650/gfortran/starbench.job "https://github.com/hibridon/hibridon" "g-raffy" "/mnt/home.ipr/graffy/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" "53894da48505892bfa05693a52312bacb12c70c9" "-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON -DCMAKE_Fortran_COMPILER=gfortran" "ctest --output-on-failure -L ^arch4_quick$" "" "/usr/bin/cmake" | ||||
| Your job 17357 ("hibridon_intel_xeon_x5650_gfortran_53894da48505892bfa05693a52312bacb12c70c9") has been submitted | ||||
| INFO:root:using test arch4_quick for benchmarking | ||||
| INFO:root:skipping the creation of /opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibridon/2024-10-10T12:11:44+02:00/iprbench.venv.tgz because it already exists (probably created for other jobs of the same bench) | ||||
| DEBUG:root:command = /opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibridon/2024-10-10T12:11:44+02:00/53894da48505892bfa05693a52312bacb12c70c9/arch4_quick/intel_xeon_x5650/ifort/starbench.job "https://github.com/hibridon/hibridon" "g-raffy" "/mnt/home.ipr/graffy/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" "53894da48505892bfa05693a52312bacb12c70c9" "-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON -DCMAKE_Fortran_COMPILER=ifort -DBLA_VENDOR=Intel10_64lp" "ctest --output-on-failure -L ^arch4_quick$" "module load compilers/ifort/latest" "/usr/bin/cmake" | ||||
| DEBUG:root:qsub_command = qsub -pe smp 12 -l "hostname=alambix50.ipr.univ-rennes.fr" -S /bin/bash -cwd -m ae -l mem_available=1G -j y -N hibridon_intel_xeon_x5650_ifort_53894da48505892bfa05693a52312bacb12c70c9 /opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibridon/2024-10-10T12:11:44+02:00/53894da48505892bfa05693a52312bacb12c70c9/arch4_quick/intel_xeon_x5650/ifort/starbench.job "https://github.com/hibridon/hibridon" "g-raffy" "/mnt/home.ipr/graffy/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" "53894da48505892bfa05693a52312bacb12c70c9" "-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON -DCMAKE_Fortran_COMPILER=ifort -DBLA_VENDOR=Intel10_64lp" "ctest --output-on-failure -L ^arch4_quick$" "module load compilers/ifort/latest" "/usr/bin/cmake" | ||||
| Your job 17358 ("hibridon_intel_xeon_x5650_ifort_53894da48505892bfa05693a52312bacb12c70c9") has been submitted | ||||
| DEBUG:root:type of resultsdb_params = <class 'dict'> | ||||
| DEBUG:root:resultsdb_params = {'type': 'tsv-files', 'tsv_results_dir': '/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00'} | ||||
| DEBUG:root:resultsdb_params = {"type": "tsv-files", "tsv_results_dir": "/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00"} | ||||
| DEBUG:root:tags_dict = {'<benchmark_id>': 'hibridon', '<starbench_job_path>': '/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00/intel_xeon_x5650/starbench.job', '<iprbench_venv_hardcoded_path>': '/tmp/user/59825/iprbench.venv', '<iprbench_venv_archive_path>': '/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00/iprbench.venv.tgz', '<benchmark_config>': '{\\"fortran_compiler\\": \\"ifort:<default>\\", \\"blas_library\\": \\"intelmkl:<default>\\", \\"test_id\\": \\"arch4_quick\\", \\"hibridon_version\\": \\"a3bed1c3ccfbca572003020d3e3d3b1ff3934fad\\", \\"cmake_path\\": \\"cmake\\", \\"num_cores\\": 12, \\"launcher\\": \\"graffy.manual\\"}', '<results_dir>': '/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00', '<resultsdb_params>': '{\\"type\\": \\"tsv-files\\", \\"tsv_results_dir\\": \\"/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00\\"}', '<num_cores>': '12', '<target_system_type_id>': 'fr.univ-rennes.ipr.cluster-node'} | ||||
| DEBUG:root:ram_per_core = 1.073741824G | ||||
| DEBUG:root:qsub_args = ['-pe', 'smp', '12', '-l', '"hostname=alambix50.ipr.univ-rennes.fr"', '-S', '/bin/bash', '-cwd', '-m', 'ae', '-l', 'mem_available=1.073741824G', '-j', 'y', '-N', 'hibridon_intel_xeon_x5650'] | ||||
| DEBUG:root:qsub_command = qsub -pe smp 12 -l "hostname=alambix50.ipr.univ-rennes.fr" -S /bin/bash -cwd -m ae -l mem_available=1.073741824G -j y -N hibridon_intel_xeon_x5650 /opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00/intel_xeon_x5650/starbench.job , working_dir=/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00/intel_xeon_x5650 | ||||
| Your job 18879 ("hibridon_intel_xeon_x5650") has been submitted | ||||
| ``` | ||||
| 
 | ||||
| `hibridononphysix` script launches two `sge` jobs for each machine type in `alambix` cluster: | ||||
| - one job that performs a benchmark of hibridon with `gfortran` compiler | ||||
| - one job that performs a benchmark of hibridon with `ifort` compiler | ||||
| The following command shows that the job is running | ||||
| ```sh | ||||
| (iprbench.venv) graffy@alambix50:/opt/ipr/cluster/work.local/graffy/bug3958/iprbench.git$ qstat | ||||
| job-ID  prior   name       user         state submit/start at     queue                          slots ja-task-ID  | ||||
| ----------------------------------------------------------------------------------------------------------------- | ||||
|   18879 0.65000 hibridon_i graffy       r     11/26/2024 18:15:32 short.q@alambix50.ipr.univ-ren    12         | ||||
| ``` | ||||
| 
 | ||||
| When the job successfully completes, it puts the results of the benchmark on `alambix`'s global work directory (eg `/opt/ipr/cluster/work.global/graffy/hibridon/benchmarks/starbench/hibridon/2024-10-10T12:11:44+02:00/53894da48505892bfa05693a52312bacb12c70c9/arch4_quick/intel_xeon_x5650/gfortran`) | ||||
| the configuration of the benchmark (`--config`) is defined to run the test `arch4_quick` using the latest versions of ifort and mkl: | ||||
| ```json | ||||
| { | ||||
|   "fortran_compiler": "ifort:<default>", | ||||
|   "blas_library": "intelmkl:<default>", | ||||
|   "test_id": "arch4_quick", | ||||
|   "hibridon_version": "a3bed1c3ccfbca572003020d3e3d3b1ff3934fad", | ||||
|   "cmake_path": "cmake", | ||||
|   "num_cores": 1, | ||||
|   "launcher": "graffy.manual" | ||||
| } | ||||
| ``` | ||||
| 
 | ||||
| This will cause the benchmark to use the latest versions on ifort and mkl available on the cluster node that run the benchmark. | ||||
| note: the value given to `num_cores` is not important as `clusterbench-submit` overwrites it with the number of cores of the cluster node that runs the benchmark. | ||||
| 
 | ||||
| the results database backend used in the benchmark (`--resultsdb-params`) is: | ||||
| ```json | ||||
| { | ||||
|   "type": "tsv-files", | ||||
|   "tsv_results_dir": "/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00" | ||||
| } | ||||
| ``` | ||||
| 
 | ||||
| This means that we want to register the results of the benchmark in the tsv (tab separated values) file `/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00/hibridon.tsv`. Please note that this result database backend is not really appropriate for `clusterbench-submit`, as it suffers from racing conditions (`sqlserver-viassh-database` would be a better alternative, but it requires a more complicate setup). | ||||
| 
 | ||||
| When the jobs successfully complete, they put their results of the benchmark in `$results_dir` (eg `/opt/ipr/cluster/work.global/graffy/iprbenchs/test_results/clusterbench_submit/2024-11-26T18:15:14+01:00`) | ||||
| 
 | ||||
| ```sh | ||||
| (iprbench.venv) graffy@alambix50:/opt/ipr/cluster/work.local/graffy/bug3958/iprbench.git$ cat $results_dir/hibridon.tsv  | ||||
| measurement_time	ipr_bench_version	host_id	host_fqdn	user	num_cpus	cpu_model	launcher	num_cores	hibridon_version	fortran_compilerblas_library	test_id	cmake_path	duration_avg	duration_med	duration_stddev	duration_min	duration_max	num_threads_per_run | ||||
| 2024-11-26 18:18:18.391137	0.0.13	<unknown>	alambix50.ipr.univ-rennes.fr	graffy	2	intel_xeon_x5650	graffy.manual	12	a3bed1c3ccfbca572003020d3e3d3b1ff3934fad	ifort:2021.13.1	intelmkl:2024.2.1	arch4_quick	cmake	3.7509884166666665	3.68795	0.1596150383672931	3.533953	4.036977	1 | ||||
| ``` | ||||
| 
 | ||||
| ## graph the results of benchmarks | ||||
| 
 | ||||
|  |  | |||
|  | @ -36,12 +36,12 @@ class HibridonBenchmark(IBenchmark): | |||
|         ram_per_core = 0  # in bytes | ||||
|         benchmark_test = config['test_id'] | ||||
|         if benchmark_test == 'arch4_quick': | ||||
|             ram_per_core = int(1 * GIBIBYTE_TO_BYTE) | ||||
|             ram_per_core = int(1.0 * GIBIBYTE_TO_BYTE) | ||||
|         elif benchmark_test == 'nh3h2_qma_long': | ||||
|             ram_per_core = int(2.8 * GIBIBYTE_TO_BYTE)  # this was enough on physix48, but maybe we can reduce more | ||||
|         else: | ||||
|             assert f'unhandled benchmark_test : {benchmark_test}' | ||||
|         return ram_per_core | ||||
|         return ram_per_core * config['num_cores'] | ||||
| 
 | ||||
|     def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path, target_host: ITargetHost) -> BenchmarkMeasurements: | ||||
| 
 | ||||
|  |  | |||
|  | @ -292,7 +292,8 @@ def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: Benchmark | |||
|     starbench_job_path = this_bench_dir / 'starbench.job' | ||||
| 
 | ||||
|     job_venv_archive_path = results_dir / 'iprbench.venv.tgz' | ||||
|     iprbench_venv_hardcoded_path = Path('/tmp') / 'iprbench.venv' | ||||
|     iprbench_venv_hardcoded_path = Path(getenv('TMPDIR', default='/tmp')) / 'iprbench.venv' | ||||
|     logging.debug("iprbench_venv_hardcoded_path = %s", iprbench_venv_hardcoded_path) | ||||
|     if job_venv_archive_path.exists(): | ||||
|         logging.info('skipping the creation of %s because it already exists (probably created for other jobs of the same bench)', job_venv_archive_path) | ||||
|     else: | ||||
|  | @ -325,6 +326,7 @@ def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: Benchmark | |||
| 
 | ||||
|     ram_requirements = benchmark.get_ram_requirements(benchmark_config) | ||||
|     ram_per_core = f'{ram_requirements / num_cores / 1.e9}G' | ||||
|     logging.debug('ram_per_core = %s', ram_per_core) | ||||
| 
 | ||||
|     qsub_args = [] | ||||
|     qsub_args += ['-pe', 'smp', f'{num_cores}'] | ||||
|  | @ -396,6 +398,6 @@ def main(): | |||
|     target_system_type_id = HostTypeId(args.target_system_type_id) | ||||
| 
 | ||||
|     if not cluster.path_is_reachable_by_compute_nodes(results_dir): | ||||
|         raise ValueError('the results path is expected to be on a disk that is accessible to all cluster nodes, and it doesn\'t seem to be the case for {results_dir}') | ||||
|         raise ValueError(f'the results path is expected to be on a disk that is accessible to all cluster nodes, and it doesn\'t seem to be the case for {results_dir}') | ||||
| 
 | ||||
|     launch_perf_jobs(benchmark, benchmark_config, results_dir, cluster, arch_regexp, resultsdb_params, target_system_type_id) | ||||
|  |  | |||
|  | @ -1 +1 @@ | |||
| __version__ = '0.0.13' | ||||
| __version__ = '0.0.14' | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue