refactored iprbench to separate ipr benchmark framework from the actual benchmarks
This decoupling allows to write benchmarks as modules that can be used in various situations (from a benchmark job or directly from a user), but this design will allow automatic registering of the benchmark results in a user selectable form (sql database, stdout, etc.) - separated `hibenchonphysix.py` into `clusterbench.py` (tool to run a benchmark on a cluster) and `hibench.py` (hibridon benchmark module) so that `clusterbench.py` no longer has a knowledge about hibridon. - there are currently 2 ways to run a bechmark: 1. as a simple run through `clusterbench-run` command (which will eventually be renamed as iprbench-run since it might be completely independent from the concept of cluster) 2. as cluster jobs through `clusterbench-submit` command - added unit test - added another benchmark `mamul1` that is used as a unittest because it has 2 benefits over `hibench` benchmark: 1. it's standalone (no external resources needed) 2. it's quicker to execute note: this refactoring work is not complete yet, but the concept proof is complete (the 2 unittests pass): - still need to provide the user a way to switch between IpRCluster and DummyCluster(which is only intended to only be used for testing clusterbench)) - still need to run multiple configs of the same benchmark in one run (as hibenchonphysix did) work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3958] and [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3372]
This commit is contained in:
parent
fe4a07a67e
commit
011d4eddf9
|
@ -1,3 +1,7 @@
|
|||
iprbench.venv/
|
||||
results/
|
||||
iprbench/__pycache__/__init__.cpython-38.pyc
|
||||
iprbench.egg-info/
|
||||
iprbench/benchmarks/__pycache__/
|
||||
iprbench/__pycache__/
|
||||
test/__pycache__/
|
||||
iprbench/resources/__pycache__/
|
||||
|
|
38
README.md
38
README.md
|
@ -90,6 +90,44 @@ Installing collected packages: pytz, tzdata, typing-extensions, starbench, six,
|
|||
Successfully installed contourpy-1.3.0 cycler-0.12.1 fonttools-4.54.1 greenlet-3.1.1 iprbench-0.0.1 kiwisolver-1.4.7 matplotlib-3.9.2 numpy-2.1.2 packaging-24.1 pandas-2.2.3 pillow-10.4.0 pyparsing-3.1.4 python-dateutil-2.9.0.post0 pytz-2024.2 six-1.16.0 sqlalchemy-2.0.35 starbench-1.0.0 typing-extensions-4.12.2 tzdata-2024.2
|
||||
```
|
||||
|
||||
## run unit tests
|
||||
|
||||
```sh
|
||||
20241018-16:56:05 graffy@graffy-ws2:~/work/starbench/iprbench.git$ python3 -m unittest test.test_iprbench
|
||||
2024-10-18 16:57:42,589 - INFO - test_iprbench_run
|
||||
creating build directory /tmp/mamul1_out/output/worker<worker_id>
|
||||
executing the following command in parallel (2 parallel runs) : '['mkdir', '-p', '/tmp/mamul1_out/output/worker<worker_id>/build']'
|
||||
mean duration : 0.004 s (2 runs)
|
||||
configuring /home/graffy/work/starbench/iprbench.git/test/mamul1 into /tmp/mamul1_out/output/worker<worker_id>/build ...
|
||||
executing the following command in parallel (2 parallel runs) : '['/usr/bin/cmake', '-DCMAKE_BUILD_TYPE=Release', '-DCMAKE_Fortran_COMPILER=gfortran', '/home/graffy/work/starbench/iprbench.git/test/mamul1']'
|
||||
mean duration : 0.098 s (2 runs)
|
||||
building /tmp/mamul1_out/output/worker<worker_id>/build ...
|
||||
executing the following command in parallel (2 parallel runs) : '['make']'
|
||||
mean duration : 0.073 s (2 runs)
|
||||
benchmarking /tmp/mamul1_out/output/worker<worker_id>/build ...
|
||||
executing the following command in parallel (2 parallel runs) : '['./mamul1', '1024', '10']'
|
||||
mean duration : 0.660 s (2 runs)
|
||||
duration : 0.660 s
|
||||
.
|
||||
----------------------------------------------------------------------
|
||||
Ran 1 test in 1.035s
|
||||
|
||||
OK
|
||||
last command status : [0]
|
||||
```
|
||||
|
||||
```sh
|
||||
20241018-16:56:05 graffy@graffy-ws2:~/work/starbench/iprbench.git$ python3 -m unittest test.test_clusterbench
|
||||
```
|
||||
|
||||
|
||||
|
||||
## launch a benchmark on the current system
|
||||
|
||||
```sh
|
||||
iprbench-run --benchmark-id 'mamul1' --config '{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10, "num_cores":2}' --results-dir /tmp/mamul1_out
|
||||
```
|
||||
|
||||
## launch benchmark jobs on alambix cluster
|
||||
|
||||
```sh
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
from ..core import IBenchmark, BenchParam, BenchmarkConfig
|
||||
from pathlib import Path
|
||||
import subprocess
|
||||
import os
|
||||
|
||||
|
||||
class HiBench(IBenchmark):
|
||||
"""Hibridon benchmark
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
bench_params = []
|
||||
bench_params.append(BenchParam('hibridon_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of hibridon, in the form of a commit id'))
|
||||
bench_params.append(BenchParam('compiler_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the compiler used in the benchmark'))
|
||||
bench_params.append(BenchParam('compiler_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of the used compiler'))
|
||||
bench_params.append(BenchParam('blas_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the blas library used in the benchmark'))
|
||||
bench_params.append(BenchParam('blas_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of the blas library used in the benchmark'))
|
||||
bench_params.append(BenchParam('test_id', BenchParam.Type.PARAM_TYPE_STRING, 'the name of the test to run (eg arch4_quick (about 2s on a core i5 8th generation) or nh3h2_qma_long (about 10min on a core i5 8th generation))'))
|
||||
|
||||
super().__init__(bench_id='hibench', bench_params=bench_params)
|
||||
|
||||
def get_ram_requirements(self, config: BenchmarkConfig) -> int:
|
||||
GIBIBYTE_TO_BYTE = 1024 * 1024 * 1024
|
||||
ram_per_core = 0 # in bytes
|
||||
benchmark_test = config['test_id']
|
||||
if benchmark_test == 'arch4_quick':
|
||||
ram_per_core = int(1 * GIBIBYTE_TO_BYTE)
|
||||
elif benchmark_test == 'nh3h2_qma_long':
|
||||
ram_per_core = int(2.8 * GIBIBYTE_TO_BYTE) # this was enough on physix48, but maybe we can reduce more
|
||||
else:
|
||||
assert f'unhandled benchmark_test : {benchmark_test}'
|
||||
return ram_per_core
|
||||
|
||||
def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path):
|
||||
git_repos_url = 'https://github.com/hibridon/hibridon'
|
||||
git_user = 'g-raffy' # os.environ['HIBRIDON_REPOS_USER']
|
||||
git_pass_file = f'{os.getenv("HOME")}/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat'
|
||||
hibridon_version = config['hibridon_version']
|
||||
test_id = config['test_id'] # eg arch4_quick or nh3h2_qma_long
|
||||
compiler_id = config['compiler_id']
|
||||
|
||||
src_dir = benchmark_output_dir / 'hibridon.git'
|
||||
output_dir = benchmark_output_dir / 'output'
|
||||
|
||||
password_provider = f'{{"type": "password-file", "password-file-path": "{git_pass_file}"}}'
|
||||
source_tree_provider = f'{{"type": "git-cloner", "repos-url": "{git_repos_url}", "src-dir": "{src_dir}", "code-version": "{hibridon_version}", "git-user": "{git_user}", "password-provider": {password_provider}}}'
|
||||
benchmark_command = f'ctest --output-on-failure -L ^{test_id}$'
|
||||
|
||||
cmake_options = [
|
||||
'-DCMAKE_BUILD_TYPE=Release', # build in release mode for highest performance
|
||||
'-DBUILD_TESTING=ON' # enable hibridon tests
|
||||
]
|
||||
|
||||
env_vars_bash_commands = ''
|
||||
if compiler_id == 'ifort':
|
||||
env_vars_bash_commands = 'module load compilers/ifort/latest'
|
||||
cmake_options.append('-DCMAKE_Fortran_COMPILER=ifort') # use intel fortran compiler
|
||||
cmake_options.append('-DBLA_VENDOR=Intel10_64lp') # use 64 bits intel mkl with multithreading
|
||||
elif compiler_id == 'gfortran':
|
||||
env_vars_bash_commands = ''
|
||||
cmake_options.append('-DCMAKE_Fortran_COMPILER=gfortran') # use gfortran compiler
|
||||
else:
|
||||
assert f'unhandled compiler_id : {compiler_id}'
|
||||
|
||||
ur1_proxy_url = 'http://proxy-nt.univ-rennes1.fr:3128'
|
||||
proxy_env_vars = ''
|
||||
proxy_env_vars = f'{proxy_env_vars} HTTP_PROXY={ur1_proxy_url}'
|
||||
proxy_env_vars = f'{proxy_env_vars} HTTPS_PROXY={ur1_proxy_url}'
|
||||
proxy_env_vars = f'{proxy_env_vars} FTP_PROXY={ur1_proxy_url}'
|
||||
proxy_env_vars = f'{proxy_env_vars} http_proxy={ur1_proxy_url}'
|
||||
proxy_env_vars = f'{proxy_env_vars} https_proxy={ur1_proxy_url}'
|
||||
proxy_env_vars = f'{proxy_env_vars} ftp_proxy={ur1_proxy_url}'
|
||||
|
||||
shell_command = f'{env_vars_bash_commands} && {proxy_env_vars} starbench --source-tree-provider \'{source_tree_provider}\' --num-cores 2 --output-dir={output_dir} --cmake-path=/usr/bin/cmake {" ".join([f"--cmake-option={option}" for option in cmake_options])} --benchmark-command=\'{benchmark_command}\''
|
||||
subprocess.run(shell_command, shell=True, check=True, encoding='/bin/bash')
|
|
@ -0,0 +1,62 @@
|
|||
from ..core import IBenchmark, BenchParam, BenchmarkConfig
|
||||
from pathlib import Path
|
||||
import subprocess
|
||||
# import importlib.resources
|
||||
|
||||
|
||||
class MaMul1(IBenchmark):
|
||||
"""Matrix multiplication benchmark
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
bench_params = []
|
||||
bench_params.append(BenchParam('compiler_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the compiler used in the benchmark'))
|
||||
bench_params.append(BenchParam('num_cores', BenchParam.Type.PARAM_TYPE_INT, 'the number of cores to use by this benchmark'))
|
||||
bench_params.append(BenchParam('matrix_size', BenchParam.Type.PARAM_TYPE_INT, 'the size n of all the the n * n matrices'))
|
||||
bench_params.append(BenchParam('num_loops', BenchParam.Type.PARAM_TYPE_INT, 'the number of identical multiplications performed in sequence'))
|
||||
# bench_params.append(BenchParam('source_dir', BenchParam.Type.PARAM_TYPE_STRING, 'the path to the directory containing mamul1 test source files'))
|
||||
super().__init__(bench_id='mamul1', bench_params=bench_params)
|
||||
|
||||
def get_ram_requirements(self, config: BenchmarkConfig) -> int:
|
||||
GIBIBYTE_TO_BYTE = 1024 * 1024 * 1024
|
||||
SIZE_OF_DOUBLE = 8 # in bytes
|
||||
matrix_size = config['matrix_size']
|
||||
matrix_ram_size = matrix_size * matrix_size * SIZE_OF_DOUBLE
|
||||
num_matrices = 3
|
||||
ram_requirements = int(1 * GIBIBYTE_TO_BYTE) + num_matrices * matrix_ram_size
|
||||
return ram_requirements
|
||||
|
||||
def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path):
|
||||
compiler_id = config['compiler_id']
|
||||
num_cores = config['num_cores']
|
||||
matrix_size = config['matrix_size']
|
||||
num_loops = config['num_loops']
|
||||
|
||||
# src_dir = Path('test/mamul1').absolute()
|
||||
src_dir = Path('/home/graffy/work/starbench/iprbench.git/test/mamul1')
|
||||
# with importlib.resources.path('iprbench.resources', 'mamul1') as src_dir:
|
||||
output_dir = benchmark_output_dir / 'output'
|
||||
|
||||
source_tree_provider = f'{{"type": "existing-dir", "dir-path": "{src_dir}"}}'
|
||||
benchmark_command = ['./mamul1', f'{matrix_size}', f'{num_loops}']
|
||||
|
||||
cmake_options = [
|
||||
'-DCMAKE_BUILD_TYPE=Release', # build in release mode for highest performance
|
||||
]
|
||||
|
||||
env_vars_bash_commands = ''
|
||||
if compiler_id == 'ifort':
|
||||
env_vars_bash_commands = 'module load compilers/ifort/latest'
|
||||
cmake_options.append('-DCMAKE_Fortran_COMPILER=ifort') # use intel fortran compiler
|
||||
cmake_options.append('-DBLA_VENDOR=Intel10_64lp') # use 64 bits intel mkl with multithreading
|
||||
elif compiler_id == 'gfortran':
|
||||
env_vars_bash_commands = ''
|
||||
cmake_options.append('-DCMAKE_Fortran_COMPILER=gfortran') # use gfortran compiler
|
||||
else:
|
||||
assert f'unhandled compiler_id : {compiler_id}'
|
||||
|
||||
shell_command = ''
|
||||
if len(env_vars_bash_commands) > 0:
|
||||
shell_command += f'{env_vars_bash_commands} && '
|
||||
shell_command += f'starbench --source-tree-provider \'{source_tree_provider}\' --num-cores {num_cores} --output-dir={output_dir} --cmake-path=/usr/bin/cmake {" ".join([f"--cmake-option={option}" for option in cmake_options])} --benchmark-command=\'{" ".join(benchmark_command)}\''
|
||||
subprocess.run(shell_command, shell=True, check=True, encoding='/bin/bash')
|
|
@ -1,8 +1,10 @@
|
|||
#!/usr/bin/env python3
|
||||
# this script launches jobs to run hibridon benchmarks on physix cluster for the given version of hibridon (commit number)
|
||||
from typing import List, Tuple, Dict
|
||||
from argparse import ArgumentParser
|
||||
import argparse
|
||||
from os import getenv, makedirs
|
||||
from .core import IBenchmark, BenchmarkConfig, BenchmarkId
|
||||
from .main import BenchmarkFactory
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
import subprocess
|
||||
|
@ -10,6 +12,8 @@ import re
|
|||
import logging
|
||||
import importlib.resources
|
||||
import venv
|
||||
import json
|
||||
import abc
|
||||
|
||||
|
||||
HostFqdn = str # eg 'physix90.ipr.univ-rennes1.fr'
|
||||
|
@ -63,9 +67,8 @@ class ClusterNodeDb:
|
|||
cluster_nodes_defs: List[ClusterNodeDef]
|
||||
cpu_defs: Dict[str, int]
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, cluster_id='alambix'):
|
||||
self.cluster_nodes_defs = []
|
||||
cluster_id = 'alambix'
|
||||
include_multiqueue_nodes = False # at the moment hibench only works on nodes that have all their cores in the same queue
|
||||
if cluster_id == 'alambix':
|
||||
self.add_cluster_node_def(ClusterNodeDef('alambix50.ipr.univ-rennes.fr', 'intel_xeon_x5650', 2))
|
||||
|
@ -135,8 +138,13 @@ class ClusterNodeDb:
|
|||
self.add_cluster_node_def(ClusterNodeDef('physix100.ipr.univ-rennes1.fr', 'intel_xeon_gold_6248r', 2))
|
||||
self.add_cluster_node_def(ClusterNodeDef('physix101.ipr.univ-rennes1.fr', 'intel_xeon_gold_6248r', 2))
|
||||
self.add_cluster_node_def(ClusterNodeDef('physix102.ipr.univ-rennes1.fr', 'intel_xeon_gold_6248r', 2))
|
||||
elif cluster_id == 'dummy':
|
||||
self.add_cluster_node_def(ClusterNodeDef('graffy-ws2.ipr.univ-rennes.fr', 'intel_core_i5_8350u', 1))
|
||||
else:
|
||||
assert False
|
||||
|
||||
self.cpu_defs = {}
|
||||
self.add_cpu_def(CpuDef('intel_core_i5_8350u', 4))
|
||||
self.add_cpu_def(CpuDef('intel_xeon_x5550', 4))
|
||||
self.add_cpu_def(CpuDef('intel_xeon_x5650', 6))
|
||||
self.add_cpu_def(CpuDef('intel_xeon_e5-2660', 8))
|
||||
|
@ -169,6 +177,62 @@ class ClusterNodeDb:
|
|||
return (hosts, num_cores)
|
||||
|
||||
|
||||
class ICluster(abc.ABC):
|
||||
cluster_db: ClusterNodeDb
|
||||
|
||||
def __init__(self, cluster_db: ClusterNodeDb):
|
||||
self.cluster_db = cluster_db
|
||||
|
||||
@abc.abstractmethod
|
||||
def path_is_reachable_by_compute_nodes(self, path: Path):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def submit_job(self, qsub_args: List[str], exec_path: Path, exec_args: List[str], working_dir: Path):
|
||||
"""
|
||||
qsub_args: the arguments sent to qsub, eg ['-pe', 'smp', '12', 'gaussian.job', 'h2o.gjf']
|
||||
"""
|
||||
|
||||
def get_cluster_db(self) -> ClusterNodeDb:
|
||||
return self.cluster_db
|
||||
|
||||
|
||||
class IprCluster(ICluster):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(ClusterNodeDb('alambix'))
|
||||
|
||||
def path_is_reachable_by_compute_nodes(self, path: Path):
|
||||
path_is_reachable = False
|
||||
for shared_disk_path in [Path('/opt/ipr/cluster/work.global')]:
|
||||
try:
|
||||
_ = path.relative_to(shared_disk_path)
|
||||
except ValueError:
|
||||
continue
|
||||
path_is_reachable = True
|
||||
break
|
||||
return path_is_reachable
|
||||
|
||||
def submit_job(self, qsub_args: List[str], exec_path: Path, exec_args: List[str], working_dir: Path):
|
||||
qsub_command = f'qsub {" ".join(qsub_args)} {exec_path} {" ".join(exec_args)}'
|
||||
logging.debug('qsub_command = %s, working_dir=%s', qsub_command, working_dir)
|
||||
subprocess.run(qsub_command, cwd=working_dir, check=True, shell=True)
|
||||
|
||||
|
||||
class DummyCluster(ICluster):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(ClusterNodeDb('dummy'))
|
||||
|
||||
def path_is_reachable_by_compute_nodes(self, path: Path):
|
||||
return True
|
||||
|
||||
def submit_job(self, qsub_args: List[str], exec_path: Path, exec_args: List[str], working_dir: Path):
|
||||
qsub_command = f'qsub {" ".join(qsub_args)} {exec_path} {" ".join(exec_args)}'
|
||||
logging.info('executing %s as a replacement of qsub_command %s, working_dir=%s', exec_path, qsub_command, working_dir)
|
||||
subprocess.run(exec_path, check=True, cwd=working_dir)
|
||||
|
||||
|
||||
def duplicate_this_virtualenv_to(duplicate_virtualenv_path: Path):
|
||||
this_virtualenv_path = Path(getenv('VIRTUAL_ENV')) # eg /home/graffy/work/starbench/iprbench.git/iprbench.venv
|
||||
assert this_virtualenv_path.exists(), f'failed to find the root the virtual environment in use (VIRTUAL_ENV environment variable has the value {this_virtualenv_path})'
|
||||
|
@ -188,55 +252,22 @@ def archive_this_virtualenv_to(venv_archive_path: Path, venv_hardcoded_path: Pat
|
|||
subprocess.run(f'tar czvf {venv_archive_path} {venv_hardcoded_path.relative_to(venv_hardcoded_path.parent)}', shell=True, check=True, cwd=venv_hardcoded_path.parent, stdout=subprocess.DEVNULL)
|
||||
|
||||
|
||||
def launch_job_for_host_group(hibridon_version: GitCommitTag, host_group_id: HostGroupId, results_dir: Path, compiler_id: CompilerId, cmake_path: str):
|
||||
def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, host_group_id: HostGroupId, results_dir: Path, cluster: ICluster, cmake_path: str):
|
||||
|
||||
cluster_db = ClusterNodeDb()
|
||||
compiler_id: CompilerId = benchmark_config['compiler_id']
|
||||
|
||||
cluster_db = cluster.get_cluster_db()
|
||||
|
||||
(hosts, num_cores) = cluster_db.get_host_group_info(host_group_id)
|
||||
if len(hosts) == 0:
|
||||
logging.warning('skipping benchmarks with compiler %s on architecture %s because no hosts are available for it', compiler_id, host_group_id)
|
||||
return
|
||||
|
||||
quick_test = 'arch4_quick' # about 2s on a core i5 8th generation
|
||||
representative_test = 'nh3h2_qma_long' # about 10min on a core i5 8th generation
|
||||
use_test_mode = True
|
||||
if use_test_mode:
|
||||
benchmark_test = quick_test
|
||||
else:
|
||||
benchmark_test = representative_test
|
||||
|
||||
logging.info('using test %s for benchmarking', benchmark_test)
|
||||
if benchmark_test == 'arch4_quick':
|
||||
ram_per_core = '1G'
|
||||
elif benchmark_test == 'nh3h2_qma_long':
|
||||
ram_per_core = '2.8G' # this was enough on physix48, but maybe we can reduce more
|
||||
else:
|
||||
assert f'unhandled benchmark_test : {benchmark_test}'
|
||||
|
||||
git_repos_url = 'https://github.com/hibridon/hibridon'
|
||||
git_user = 'g-raffy' # os.environ['HIBRIDON_REPOS_USER']
|
||||
git_pass_file = f'{getenv("HOME")}/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat'
|
||||
cmake_options = [
|
||||
'-DCMAKE_BUILD_TYPE=Release', # build in release mode for highest performance
|
||||
'-DBUILD_TESTING=ON' # enable hibridon tests
|
||||
]
|
||||
|
||||
benchmark_command = f'ctest --output-on-failure -L ^{benchmark_test}$'
|
||||
|
||||
env_vars_bash_commands = ''
|
||||
if compiler_id == 'ifort':
|
||||
env_vars_bash_commands = 'module load compilers/ifort/latest'
|
||||
cmake_options.append('-DCMAKE_Fortran_COMPILER=ifort') # use intel fortran compiler
|
||||
cmake_options.append('-DBLA_VENDOR=Intel10_64lp') # use 64 bits intel mkl with multithreading
|
||||
elif compiler_id == 'gfortran':
|
||||
env_vars_bash_commands = ''
|
||||
cmake_options.append('-DCMAKE_Fortran_COMPILER=gfortran') # use gfortran compiler
|
||||
else:
|
||||
assert f'unhandled compiler_id : {compiler_id}'
|
||||
benchmark_config['num_cores'] = num_cores # we expect the benchmark to have the parameter num_cores
|
||||
|
||||
makedirs(results_dir, exist_ok=True)
|
||||
|
||||
this_bench_dir = Path(f'{results_dir}/{hibridon_version}/{benchmark_test}/{host_group_id}/{compiler_id}')
|
||||
this_bench_dir = Path(f'{results_dir}/{host_group_id}')
|
||||
makedirs(this_bench_dir, exist_ok=True)
|
||||
|
||||
starbench_job_path = this_bench_dir / 'starbench.job'
|
||||
|
@ -253,44 +284,45 @@ def launch_job_for_host_group(hibridon_version: GitCommitTag, host_group_id: Hos
|
|||
# create the job file (which embeds starbench.py)
|
||||
tags_dict = {
|
||||
# '<include:starbench.py>': scripts_dir / 'starbench.py',
|
||||
'<benchmark_id>': str(benchmark.bench_id),
|
||||
'<starbench_job_path>': str(starbench_job_path),
|
||||
'<iprbench_venv_hardcoded_path>': str(iprbench_venv_hardcoded_path),
|
||||
'<iprbench_venv_archive_path>': str(job_venv_archive_path)
|
||||
'<iprbench_venv_archive_path>': str(job_venv_archive_path),
|
||||
'<benchmark_config>': json.dumps(benchmark_config).replace('"', r'\"'),
|
||||
'<results_dir>': str(results_dir)
|
||||
}
|
||||
with importlib.resources.path('iprbench.resources', 'starbench-template.job') as job_template_path:
|
||||
logging.debug('tags_dict = %s', str(tags_dict))
|
||||
with importlib.resources.path('iprbench.resources', 'clusterbench-template.job') as job_template_path:
|
||||
# job_template_path = importlib.resources..files('iprbench.resources') / 'hibench' / 'starbench-template.job'
|
||||
substitute_tags(input_file_path=job_template_path, tags_dict=tags_dict, output_file_path=starbench_job_path)
|
||||
subprocess.run(['chmod', 'a+x', starbench_job_path], check=True)
|
||||
|
||||
command = f'{starbench_job_path} "{git_repos_url}" "{git_user}" "{git_pass_file}" "{hibridon_version}" "{" ".join(cmake_options)}" "{benchmark_command}" "{env_vars_bash_commands}" "{cmake_path}"'
|
||||
logging.debug('command = %s', command)
|
||||
ram_requirements = benchmark.get_ram_requirements(benchmark_config)
|
||||
ram_per_core = f'{ram_requirements / num_cores / 1.e9}G'
|
||||
|
||||
qsub_command = 'qsub'
|
||||
qsub_command += f' -pe smp {num_cores}'
|
||||
qsub_command += f' -l "hostname={"|".join(hosts)}"'
|
||||
qsub_command += ' -S /bin/bash'
|
||||
qsub_command += ' -cwd'
|
||||
qsub_command += ' -m ae'
|
||||
qsub_command += f' -l mem_available={ram_per_core}'
|
||||
qsub_command += ' -j y' # merge stderr file into stdout file for easier reading of history of events
|
||||
qsub_command += f' -N hibench_{host_group_id}_{compiler_id}_{hibridon_version}'
|
||||
qsub_command += f' {command}'
|
||||
logging.debug('qsub_command = %s', qsub_command)
|
||||
qsub_args = []
|
||||
qsub_args += ['-pe', 'smp', f'{num_cores}']
|
||||
qsub_args += ['-l', f'"hostname={"|".join(hosts)}"']
|
||||
qsub_args += ['-S', '/bin/bash']
|
||||
qsub_args += ['-cwd']
|
||||
qsub_args += ['-m', 'ae']
|
||||
qsub_args += ['-l', f'mem_available={ram_per_core}']
|
||||
qsub_args += ['-j', 'y'] # merge stderr file into stdout file for easier reading of history of events
|
||||
qsub_args += ['-N', f'hibench_{host_group_id}']
|
||||
|
||||
subprocess.run(qsub_command, cwd=this_bench_dir, check=True, shell=True)
|
||||
logging.debug('qsub_args = %s', str(qsub_args))
|
||||
|
||||
exec_path = starbench_job_path
|
||||
exec_args = []
|
||||
|
||||
cluster.submit_job(qsub_args, exec_path, exec_args, this_bench_dir)
|
||||
|
||||
|
||||
def launch_perf_jobs(hibridon_version: GitCommitTag, results_dir: Path, arch_regexp: str, cmake_path: str):
|
||||
def launch_perf_jobs(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, results_dir: Path, cluster: ICluster, arch_regexp: str, cmake_path: str):
|
||||
"""
|
||||
hibridon_version: the version of hibridon to test, in the form of a valid commit number eg 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad'
|
||||
results_dir: where the results of the benchmark are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)
|
||||
"""
|
||||
|
||||
compilers = [
|
||||
'gfortran',
|
||||
'ifort'
|
||||
]
|
||||
|
||||
cluster_db = ClusterNodeDb()
|
||||
all_host_groups = cluster_db.cpu_defs.keys()
|
||||
|
||||
|
@ -298,43 +330,37 @@ def launch_perf_jobs(hibridon_version: GitCommitTag, results_dir: Path, arch_reg
|
|||
host_groups = [host_group for host_group in all_host_groups if re.match(arch_regexp, host_group) is not None]
|
||||
logging.info('requested host groups: %s', host_groups)
|
||||
|
||||
for compiler in compilers:
|
||||
for host_group in host_groups:
|
||||
launch_job_for_host_group(hibridon_version, host_group, results_dir, compiler, cmake_path)
|
||||
|
||||
|
||||
def path_is_reachable_by_compute_nodes(path: Path):
|
||||
path_is_reachable = False
|
||||
for shared_disk_path in [Path('/opt/ipr/cluster/work.global')]:
|
||||
try:
|
||||
_ = path.relative_to(shared_disk_path)
|
||||
except ValueError:
|
||||
continue
|
||||
path_is_reachable = True
|
||||
break
|
||||
return path_is_reachable
|
||||
for host_group in host_groups:
|
||||
launch_job_for_host_group(benchmark, benchmark_config, host_group, results_dir, cluster, cmake_path)
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
arg_parser = ArgumentParser(description='launches hibridon benchmark jobs on IPR\'s physix cluster', epilog='example:\n --commit-id a3bed1c3ccfbca572003020d3e3d3b1ff3934fad')
|
||||
arg_parser.add_argument('--commit-id', type=str, required=True, help='the commit id of the version of code to benchmark')
|
||||
example_text = '''example:
|
||||
|
||||
%(prog)s --benchmark-id 'mamul1' --config '{"matrix_size": 1024, "num_loops":10}' --results-dir /tmp/mamul1_out
|
||||
|
||||
'''
|
||||
|
||||
arg_parser = argparse.ArgumentParser(description='submits a benchmark on the compute cluster (assuming this is running from a sge cluster machine where qsub command is available)', epilog=example_text, formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
arg_parser.add_argument('--benchmark-id', type=BenchmarkId, required=True, help='the benchmark id of the benchmark to perform (eg mamul1)')
|
||||
arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)')
|
||||
arg_parser.add_argument('--config', type=str, default='cmake', help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}')
|
||||
arg_parser.add_argument('--arch-regexp', type=str, default='.*', help='the regular expression for the architectures the benchmark is allowed to run on (eg "intel_xeon_.*"). By defauls, all available architectures are allowed.')
|
||||
arg_parser.add_argument('--cmake-path', type=str, default='cmake', help='the location of the cmake command to use (eg /opt/cmake/cmake-3.23.0/bin/cmake)')
|
||||
|
||||
args = arg_parser.parse_args()
|
||||
hibridon_version = args.commit_id
|
||||
|
||||
# the version of hibridon to test, in the form of a valid commit number eg 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad'
|
||||
# '53894da48505892bfa05693a52312bacb12c70c9' # latest from branch master as of 10/06/2022 00:30
|
||||
# code_version='dd0f413b85cf0f727a5a4e88b2b02d75a28b377f' # latest from branch graffy-issue51 as of 10/06/2022 00:30
|
||||
benchmark_id = BenchmarkId(args.benchmark_id)
|
||||
benchmark = BenchmarkFactory().create_benchmark(benchmark_id)
|
||||
|
||||
results_dir = Path(args.results_dir)
|
||||
arch_regexp = args.arch_regexp
|
||||
cmake_path = args.cmake_path
|
||||
benchmark_config = json.loads(args.config)
|
||||
|
||||
if not path_is_reachable_by_compute_nodes(results_dir):
|
||||
cluster = DummyCluster()
|
||||
|
||||
if not cluster.path_is_reachable_by_compute_nodes(results_dir):
|
||||
raise ValueError('the results path is expected to be on a disk that is accessible to all cluster nodes, and it doesn\'t seem to be the case for {results_dir}')
|
||||
|
||||
launch_perf_jobs(hibridon_version, results_dir, arch_regexp, cmake_path)
|
||||
launch_perf_jobs(benchmark, benchmark_config, results_dir, cluster, arch_regexp, cmake_path)
|
|
@ -0,0 +1,74 @@
|
|||
from typing import List, Dict, Union
|
||||
from enum import Enum
|
||||
import abc
|
||||
from pathlib import Path
|
||||
|
||||
BenchmarkId = str # a unique name for a benchmark, eg 'matmul1'
|
||||
BenchParamId = str
|
||||
BenchParamType = Union[int, str]
|
||||
BenchmarkConfig = Dict[BenchParamId, BenchParamType]
|
||||
|
||||
|
||||
class Singleton(type):
|
||||
_instances = {}
|
||||
|
||||
def __call__(cls, *args, **kwargs):
|
||||
if cls not in cls._instances:
|
||||
cls._instances[cls] = super(type(cls), cls).__call__(*args, **kwargs) # pylint: disable=bad-super-call, no-member
|
||||
return cls._instances[cls]
|
||||
|
||||
|
||||
class BenchParam():
|
||||
'''a parameter of a benchmark
|
||||
|
||||
for example the id of the compiler, the cpu id, the size of the matrix, etc.
|
||||
|
||||
'''
|
||||
class Type(Enum):
|
||||
PARAM_TYPE_STRING = 0
|
||||
PARAM_TYPE_INT = 1
|
||||
|
||||
name: BenchParamId # the name of the parameter, eg 'matrix_size'
|
||||
param_type: Type # the type of the parameter, eg 'PARAM_TYPE_INT'
|
||||
description: str # the description of the parameter, eg 'the size n of the n*n matrix '
|
||||
|
||||
def __init__(self, name: str, param_type: Type, description: str):
|
||||
self.name = name
|
||||
self.param_type = param_type
|
||||
self.description = description
|
||||
|
||||
|
||||
class IBenchmark(abc.ABC):
|
||||
|
||||
bench_id: BenchmarkId # a unique name for this benchmark, eg 'matmul1'
|
||||
bench_params: List[BenchParam]
|
||||
|
||||
def __init__(self, bench_id: str, bench_params: List[BenchParam]):
|
||||
self.bench_id = bench_id
|
||||
self.bench_params = bench_params
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_ram_requirements(self, config: BenchmarkConfig) -> int:
|
||||
"""returns the ram requirements for this benchmark, in bytes
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path):
|
||||
"""execute the benchmark for the given config
|
||||
"""
|
||||
|
||||
def validate_config(self, config: BenchmarkConfig):
|
||||
"""checks that all benchmark parameters have been set in the given config"""
|
||||
for bench_param in self.bench_params:
|
||||
try:
|
||||
_ = config[bench_param.name]
|
||||
except KeyError:
|
||||
assert False, f'failed to find the benchmark parameter {bench_param.name} in the benchmark config'
|
||||
# check that all parameters in benchmark config exist as parameters for this benchmark
|
||||
for param_name in config.keys():
|
||||
param_exists = False
|
||||
for bench_param in self.bench_params:
|
||||
if bench_param.name == param_name:
|
||||
param_exists = True
|
||||
break
|
||||
assert param_exists, f'parameter {param_name} doesn\'t exist for benchmark {self.bench_id}'
|
|
@ -1 +1,47 @@
|
|||
__version__ = '0.0.1'
|
||||
from .core import BenchmarkId, IBenchmark, Singleton
|
||||
from .benchmarks.hibench import HiBench
|
||||
from .benchmarks.mamul1 import MaMul1
|
||||
import logging
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
import json
|
||||
|
||||
__version__ = '0.0.1'
|
||||
|
||||
|
||||
class BenchmarkFactory(metaclass=Singleton):
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def create_benchmark(self, bench_id: BenchmarkId) -> IBenchmark:
|
||||
benchmark = {
|
||||
'hibench': HiBench(),
|
||||
'mamul1': MaMul1()
|
||||
}[bench_id]
|
||||
return benchmark
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
|
||||
"""
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
example_text = '''example:
|
||||
|
||||
%(prog)s --benchmark-id 'mamul1' --config '{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10, "num_cores":2}' --results-dir /tmp/mamul1_out
|
||||
|
||||
'''
|
||||
|
||||
arg_parser = argparse.ArgumentParser(description='executes a benchmark in a cluster job environment', epilog=example_text, formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
arg_parser.add_argument('--benchmark-id', type=BenchmarkId, required=True, help='the benchmark id of the benchmark to perform (eg mamul1)')
|
||||
arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)')
|
||||
arg_parser.add_argument('--config', type=str, default='cmake', help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}')
|
||||
|
||||
args = arg_parser.parse_args()
|
||||
benchmark_id = BenchmarkId(args.benchmark_id)
|
||||
benchmark = BenchmarkFactory().create_benchmark(benchmark_id)
|
||||
benchmark_config = json.loads(args.config)
|
||||
benchmark.validate_config(benchmark_config)
|
||||
benchmark.execute(benchmark_config, args.results_dir)
|
||||
|
|
|
@ -1,13 +1,5 @@
|
|||
#!/usr/bin/env bash
|
||||
# this job file is a template file for starbench jobs
|
||||
git_repos_url="$1" # eg "https://github.com/hibridon/hibridon"
|
||||
git_user="$2" # eg 'g-raffy'
|
||||
git_pass_file="$3" # eg "$HOME/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat"
|
||||
code_version="$4" # git branch id or commit id eg : 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad'
|
||||
cmake_options="$5" # eg '-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON'
|
||||
benchmark_command="$6" # eg 'ctest -L ^arch4_quick$'
|
||||
env_vars_bash_commands="$7" # defines extra environment variables prior to launch starbench. eg "export MKLROOT=/opt/intel/compilers_and_libraries_2020.1.217/linux/mkl"
|
||||
cmake_path="$8" # eg '/opt/cmake/cmake-3.23.0/bin/cmake'
|
||||
# this job file is a template file for clusterbench jobs
|
||||
executed_by_sge=''
|
||||
|
||||
if [ "${JOB_ID}" = '' ]
|
||||
|
@ -37,7 +29,7 @@ iprbench_venv_parent=$(dirname "$iprbench_venv_path")
|
|||
iprbench_venv_archive_path='<iprbench_venv_archive_path>'
|
||||
echo "unarchiving virtual environment ${iprbench_venv_archive_path} to ${iprbench_venv_parent}"
|
||||
pushd "${iprbench_venv_parent}"
|
||||
tar xzvf "${iprbench_venv_archive_path}"
|
||||
tar xzvf "${iprbench_venv_archive_path}" > /dev/null
|
||||
popd
|
||||
if [ ! -d "${iprbench_venv_path}" ]
|
||||
then
|
||||
|
@ -61,34 +53,9 @@ num_cores=${NSLOTS}
|
|||
|
||||
# set environment variables
|
||||
|
||||
echo "env_vars_bash_commands=$env_vars_bash_commands"
|
||||
eval $env_vars_bash_commands
|
||||
|
||||
# launch starbench
|
||||
|
||||
strUr1ProxyUrl='http://proxy-nt.univ-rennes1.fr:3128'
|
||||
strProxyVars=''
|
||||
strProxyVars="$strProxyVars HTTP_PROXY=$strUr1ProxyUrl"
|
||||
strProxyVars="$strProxyVars HTTPS_PROXY=$strUr1ProxyUrl"
|
||||
strProxyVars="$strProxyVars FTP_PROXY=$strUr1ProxyUrl"
|
||||
strProxyVars="$strProxyVars http_proxy=$strUr1ProxyUrl"
|
||||
strProxyVars="$strProxyVars https_proxy=$strUr1ProxyUrl"
|
||||
strProxyVars="$strProxyVars ftp_proxy=$strUr1ProxyUrl"
|
||||
|
||||
command="$strProxyVars starbench"
|
||||
command="${command} --git-repos-url ${git_repos_url}"
|
||||
command="${command} --git-user ${git_user}"
|
||||
command="${command} --git-pass-file ${git_pass_file}"
|
||||
command="${command} --num-cores ${num_cores}"
|
||||
command="${command} --output-dir ${output_dir}"
|
||||
command="${command} --code-version ${code_version}"
|
||||
command="${command} --cmake-path ${cmake_path}"
|
||||
# echo "cmake_options: @$cmake_options@"
|
||||
for cmake_option in ${cmake_options}
|
||||
do
|
||||
command="${command} --cmake-option=${cmake_option}"
|
||||
done
|
||||
command="${command} --benchmark-command=\"${benchmark_command}\""
|
||||
# launch the benchmark
|
||||
command="iprbench-run --benchmark-id '<benchmark_id>' --config '<benchmark_config>' --results-dir '${output_dir}'"
|
||||
|
||||
echo "command: ${command}"
|
||||
eval ${command}
|
|
@ -15,7 +15,8 @@ dependencies = [
|
|||
"sqlalchemy",
|
||||
# "cocluto >= 1.2"
|
||||
# "cocluto@git+https://git.ipr.univ-rennes.fr/cellinfo/cocluto"
|
||||
"starbench@git+https://github.com/g-raffy/starbench"
|
||||
"starbench >= 1.0.1"
|
||||
# "starbench@git+https://github.com/g-raffy/starbench"
|
||||
]
|
||||
requires-python = ">= 3.8"
|
||||
authors = [
|
||||
|
@ -23,14 +24,15 @@ authors = [
|
|||
]
|
||||
|
||||
[project.scripts]
|
||||
hibenchonphysix = "iprbench.hibench.hibenchonphysix:main"
|
||||
showresults = "iprbench.hibench.showresults:main"
|
||||
clusterbench-submit = "iprbench.clusterbench:main"
|
||||
iprbench-run = "iprbench.main:main"
|
||||
showresults = "iprbench.benchmarks.showresults:main"
|
||||
|
||||
[project.urls]
|
||||
Repository = "https://github.com/g-raffy/starbench"
|
||||
|
||||
[tool.setuptools]
|
||||
packages = ["iprbench", "iprbench.hibench"]
|
||||
packages = ["iprbench", "iprbench.benchmarks"]
|
||||
|
||||
[tool.setuptools.dynamic]
|
||||
version = {attr = "iprbench.main.__version__"}
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
|
||||
enable_language (Fortran)
|
||||
|
||||
set(MAMUL1_USE_MAGMA "OFF" CACHE BOOL "if set, mamul1 build uses magma (matrix algebra on gpu)")
|
||||
|
||||
set(MAMUL1_MAGMA_API "CPU_MEM_API" CACHE STRING "which magma API to use when building mamul1: CPU_MEM_API for BLAS compatible API (uses matrices stored on CPU memory) or GPU_MEM_API (use matrices stored on GPU memory)")
|
||||
|
||||
add_executable(mamul1 mamul1.F90)
|
||||
|
||||
if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
|
||||
# Allow arbitrary long lines. Needed as preprocessing could generate long line lengths.
|
||||
target_compile_options(mamul1 PUBLIC -ffree-line-length-none)
|
||||
elseif (Fortran_COMPILER_NAME STREQUAL "ifort")
|
||||
# Intel (ifort)
|
||||
target_compile_options(mamul1 PUBLIC -no-wrap-margin)
|
||||
endif()
|
||||
|
||||
|
||||
if (MAMUL1_USE_MAGMA)
|
||||
find_package( MAGMA REQUIRED )
|
||||
if( MAMUL1_MAGMA_API STREQUAL "CPU_MEM_API" )
|
||||
target_compile_definitions(mamul1 PUBLIC USE_MAGMA_DGEMM)
|
||||
elseif( MAMUL1_MAGMA_API STREQUAL "GPU_MEM_API" )
|
||||
target_compile_definitions(mamul1 PUBLIC USE_MAGMA_DGEMM_GPU)
|
||||
else()
|
||||
message(FATAL_ERROR "unexpected value for MAMUL1_MAGMA_API : ${MAMUL1_MAGMA_API}")
|
||||
endif()
|
||||
message(STATUS "MAGMA_INCLUDES=${MAGMA_INCLUDES}")
|
||||
include_directories("${MAGMA_INCLUDES}")
|
||||
target_link_libraries(mamul1 "${MAGMA_LIBRARIES}")
|
||||
else()
|
||||
find_package( BLAS REQUIRED )
|
||||
find_package( LAPACK REQUIRED )
|
||||
# message("BLAS_LIBRARIES=${BLAS_LIBRARIES}")
|
||||
# message("LAPACK_LIBRARIES=${LAPACK_LIBRARIES}")
|
||||
target_compile_definitions(mamul1 PUBLIC USE_DGEMM)
|
||||
|
||||
# Link Blas and Lapack libraries
|
||||
target_link_libraries(mamul1 "${LAPACK_LIBRARIES}")
|
||||
target_link_libraries(mamul1 "${BLAS_LIBRARIES}")
|
||||
endif()
|
||||
|
||||
install(TARGETS mamul1)
|
|
@ -0,0 +1,339 @@
|
|||
#define MAMUL1_VERSION "1.0.0"
|
||||
|
||||
#define magma_devptr_t integer(kind=8)
|
||||
subroutine print_usage(prog_path)
|
||||
character(len=*), intent(in) :: prog_path
|
||||
character(len=80) :: build_variant
|
||||
#if defined(USE_MAGMA_DGEMM_GPU)
|
||||
build_variant='gpu'
|
||||
#elif defined(USE_DGEMM)
|
||||
build_variant='cpu'
|
||||
#else
|
||||
build_variant='unknown'
|
||||
#endif
|
||||
write(6,'("mamul1 v",a," (variant:",a,"): benchmark performs a square matrix multiplication in double precision")') MAMUL1_VERSION, trim(build_variant);
|
||||
write(6,'()');
|
||||
write(6,'("Usage: ",a," <NDIM> <NUM_LOOPS>")') trim(prog_path);
|
||||
write(6,'(" <NDIM> positive integer representing the size of the square matrices to multiply ")');
|
||||
write(6,'(" <NUM_LOOPS> positive integer representing the number of times the multiplication is performed")');
|
||||
end subroutine
|
||||
|
||||
program mamul1
|
||||
|
||||
implicit none
|
||||
|
||||
|
||||
integer :: argc, info, ndim, num_loops
|
||||
|
||||
character(len=32) :: arg0, arg1, arg2
|
||||
|
||||
|
||||
call get_command_argument(0,arg0)
|
||||
|
||||
argc = command_argument_count()
|
||||
if (argc /= 2) then
|
||||
call print_usage(trim(arg0))
|
||||
! write(6,'("Usage: ",a," NDIM NUM_LOOPS, where NDIM is a positive integer")') trim(arg0);
|
||||
stop
|
||||
end if
|
||||
|
||||
call get_command_argument(1,arg1,status=info)
|
||||
if (info /= 0) then
|
||||
write(6,'("Error reading argument: info = ",i2)') info
|
||||
call print_usage(trim(arg0))
|
||||
stop
|
||||
end if
|
||||
|
||||
call get_command_argument(2,arg2,status=info)
|
||||
if (info /= 0) then
|
||||
write(6,'("Error reading argument: info = ",i2)') info
|
||||
call print_usage(trim(arg0))
|
||||
stop
|
||||
end if
|
||||
|
||||
read(arg1,*,iostat=info) ndim
|
||||
if (info /= 0) then
|
||||
write(6,'("Error converting ndim argument to integer: info = ",i2)') info
|
||||
call print_usage(trim(arg0))
|
||||
stop
|
||||
end if
|
||||
|
||||
read(arg2,*,iostat=info) num_loops
|
||||
if (info /= 0) then
|
||||
write(6,'("Error converting num_loops argument to integer: info = ",i2)') info
|
||||
call print_usage(trim(arg0))
|
||||
stop
|
||||
end if
|
||||
|
||||
|
||||
if (ndim < 1) then
|
||||
call print_usage(trim(arg0))
|
||||
stop
|
||||
end if
|
||||
|
||||
call test_dgemm(ndim, num_loops)
|
||||
|
||||
stop
|
||||
end program mamul1
|
||||
|
||||
subroutine set_random_seed(seed)
|
||||
integer :: seed
|
||||
integer :: seed_array_size
|
||||
INTEGER, ALLOCATABLE :: seed_array (:)
|
||||
CALL RANDOM_SEED (SIZE = seed_array_size) ! I is set to the size of
|
||||
! ! the seed array
|
||||
ALLOCATE (seed_array(seed_array_size))
|
||||
seed_array = seed
|
||||
CALL RANDOM_SEED (PUT=seed_array(1:seed_array_size))
|
||||
end subroutine
|
||||
|
||||
subroutine print_matrix(mat, ndim)
|
||||
implicit none
|
||||
integer, parameter :: dp = kind(1.0d0)
|
||||
real(dp), intent(in) :: mat(ndim, ndim)
|
||||
integer, intent(in) :: ndim
|
||||
integer :: irow
|
||||
do irow = 1, ndim
|
||||
write(6, *) mat(irow,:)
|
||||
end do
|
||||
end subroutine
|
||||
|
||||
! square matrix multiplication
|
||||
subroutine sqmatmul(amat, bmat, cmat, ndim)
|
||||
#if defined(USE_MAGMA_DGEMM_GPU)
|
||||
use magma, only: magmaf_init, magmaf_finalize
|
||||
use magma, only: magmaf_queue_create, magmaf_queue_destroy
|
||||
use magma, only: magmaf_dmalloc, magmaf_free
|
||||
use magma, only: magmaf_dsetmatrix, magmaf_dgetmatrix
|
||||
use magma, only: magmablasf_dgemm
|
||||
#endif
|
||||
real*8, intent(in) :: amat(ndim,ndim)
|
||||
real*8, intent(in) :: bmat(ndim,ndim)
|
||||
real*8, intent(out) :: cmat(ndim,ndim)
|
||||
integer :: lda, ldb, ldc
|
||||
integer :: info
|
||||
|
||||
real :: time_before, time_after
|
||||
integer(8) :: num_ops
|
||||
real :: gflops
|
||||
|
||||
#ifdef USE_MAGMA_DGEMM_GPU
|
||||
magma_devptr_t :: d_amat
|
||||
magma_devptr_t :: d_bmat
|
||||
magma_devptr_t :: d_cmat
|
||||
magma_devptr_t :: queue !! really a CPU pointer
|
||||
#endif
|
||||
lda = ceiling(real(ndim)/32)*32
|
||||
ldb = ceiling(real(ndim)/32)*32
|
||||
ldc = ceiling(real(ndim)/32)*32
|
||||
|
||||
|
||||
#if defined(USE_MAGMA_DGEMM_GPU)
|
||||
!! allocate GPU memory
|
||||
write(6,'("DEBUG: before matrix A gpu memory allocation (",i0," doubles)")') lda * ndim
|
||||
info = magmaf_dmalloc( d_amat, lda*ndim )
|
||||
if (d_amat == 0) then
|
||||
print "(a)", "failed to allocate d_amat"
|
||||
return
|
||||
endif
|
||||
write(6,'("DEBUG: before matrix B gpu memory allocation (",i0," doubles)")') ldb * ndim
|
||||
info = magmaf_dmalloc( d_bmat, ldb*ndim )
|
||||
if (d_bmat == 0) then
|
||||
print "(a)", "failed to allocate d_bmat"
|
||||
return
|
||||
endif
|
||||
write(6,'("DEBUG: before matrix C gpu memory allocation (",i0," doubles)")') ldc * ndim
|
||||
info = magmaf_dmalloc( d_cmat, ldc*ndim )
|
||||
if (d_cmat == 0) then
|
||||
print "(a)", "failed to allocate d_cmat"
|
||||
return
|
||||
endif
|
||||
|
||||
! copy A to dA and B to dB
|
||||
call magmaf_queue_create( 0, queue )
|
||||
write(6,'("DEBUG: queue = ",i0)') queue
|
||||
if (queue == 0) then
|
||||
print "(a)", "failed to create a queue"
|
||||
return
|
||||
endif
|
||||
|
||||
write(6,*) 'DEBUG: copying matrix A from CPU to GPU memory'
|
||||
call magmaf_dsetmatrix( ndim, ndim, amat, ndim, d_amat, lda, queue )
|
||||
write(6,*) 'DEBUG: copying matrix B from CPU to GPU memory'
|
||||
call magmaf_dsetmatrix( ndim, ndim, bmat, ndim, d_bmat, ldb, queue )
|
||||
|
||||
call cpu_time(time_before)
|
||||
write (6,*) 'before magmablasf_dgemm, time=', time_before
|
||||
|
||||
call magmablasf_dgemm ('N', 'N', ndim, ndim, ndim, 1.0d0, d_amat, lda, d_bmat, ldb, 0.0d0, d_cmat, ldc, queue)
|
||||
call magmaf_queue_sync(queue)
|
||||
|
||||
call cpu_time(time_after)
|
||||
num_ops = real(ndim) * real(ndim) * real(ndim) * 2
|
||||
gflops = num_ops / (time_after - time_before) / 1.0e9
|
||||
write (6,*) 'after magmablasf_dgemm, time=', time_after
|
||||
write (6,*) 'magmablasf_dgemm (from gpu memory to gpu memory) duration :', (time_after - time_before), '(', gflops, ' gflops)'
|
||||
|
||||
write(6,*) 'DEBUG: copying matrix C from GPU to CPU memory'
|
||||
call magmaf_dgetmatrix( ndim, ndim, d_cmat, ldc, cmat, ndim, queue )
|
||||
call magmaf_queue_destroy( queue )
|
||||
|
||||
info = magmaf_free(d_cmat)
|
||||
info = magmaf_free(d_bmat)
|
||||
info = magmaf_free(d_amat)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef USE_DGEMM
|
||||
! subroutine dgemm ( character TRANSA,
|
||||
! character TRANSB,
|
||||
! integer M,
|
||||
! integer N,
|
||||
! integer K,
|
||||
! double precision ALPHA,
|
||||
! double precision, dimension(lda,*) A,
|
||||
! integer LDA,
|
||||
! double precision, dimension(ldb,*) B,
|
||||
! integer LDB,
|
||||
! double precision BETA,
|
||||
! double precision, dimension(ldc,*) C,
|
||||
! integer LDC
|
||||
! )
|
||||
call dgemm('N', 'N', ndim, ndim, ndim, 1.0d0, amat, ndim, bmat, ndim, 0.0d0, cmat, ndim)
|
||||
#endif
|
||||
|
||||
end subroutine
|
||||
|
||||
subroutine check_cmat_element(cmat, row, col, amat, bmat, ndim)
|
||||
real(8), intent(in) :: cmat(ndim, ndim)
|
||||
integer, intent(in) :: row
|
||||
integer, intent(in) :: col
|
||||
real(8), intent(in) :: amat(ndim, ndim)
|
||||
real(8), intent(in) :: bmat(ndim, ndim)
|
||||
integer, intent(in) :: ndim
|
||||
|
||||
real(8) :: x
|
||||
x = 0.0d0
|
||||
do i = 1, ndim
|
||||
x = x + amat(row, i) * bmat(i, col)
|
||||
end do
|
||||
|
||||
write(6, '("expected cmat(", i0, ", ", i0, ")", e23.15e3)') row, col, x
|
||||
write(6, '("computed cmat(", i0, ", ", i0, ")", e23.15e3)') row, col, cmat(row, col)
|
||||
if (abs(cmat(row, col) - x) > 1.0e-8) then
|
||||
stop 'a computed element has a wrong value'
|
||||
end if
|
||||
end subroutine
|
||||
|
||||
|
||||
subroutine test_dgemm(ndim, num_loops)
|
||||
#if defined(USE_MAGMA_DGEMM_GPU)
|
||||
use magma, only: magmaf_init, magmaf_finalize
|
||||
use magma, only: magmablasf_dgemm !, magmaf_dgemm_gpu
|
||||
#endif
|
||||
|
||||
implicit none
|
||||
integer, intent(in) :: ndim
|
||||
integer, intent(in) :: num_loops
|
||||
integer, parameter :: dp = kind(1.0d0)
|
||||
real :: ct_start, ct_stop ! elapsed cpu time relative to an arbitrary fixed time. Expressed in seconds with the granularity of 1 microsecond
|
||||
integer(8) :: num_ops
|
||||
real :: gflops
|
||||
|
||||
integer :: sc_start, sc_stop ! system clock time of start and stop events, expressed in ticks
|
||||
integer :: sc_count_rate ! number of system clock ticks per second
|
||||
integer :: sc_count_max ! the max possible number of system clock ticks returned by system_clock
|
||||
integer :: s
|
||||
REAL :: a_diff, diff
|
||||
REAL :: num_sc_ticks_per_second ! the number of system clock ticks per second
|
||||
|
||||
real*8, allocatable :: amat(:,:)
|
||||
real*8, allocatable :: bmat(:,:)
|
||||
real*8, allocatable :: cmat(:,:)
|
||||
real(dp) :: x
|
||||
integer :: i, j
|
||||
|
||||
#if defined(USE_MAGMA_DGEMM_GPU)
|
||||
write(6,*) 'DEBUG: init magma'
|
||||
call magmaf_init()
|
||||
#endif
|
||||
|
||||
! First initialize the system_clock
|
||||
CALL system_clock(count_rate=sc_count_rate)
|
||||
CALL system_clock(count_max=sc_count_max)
|
||||
num_sc_ticks_per_second = REAL(sc_count_rate)
|
||||
WRITE(*,*) "system_clock rate : ", num_sc_ticks_per_second, " ticks per second"
|
||||
|
||||
diff = 0.0
|
||||
a_diff = 0.0
|
||||
s = 0
|
||||
|
||||
allocate(amat(ndim, ndim))
|
||||
allocate(bmat(ndim, ndim))
|
||||
allocate(cmat(ndim, ndim))
|
||||
|
||||
call set_random_seed(42)
|
||||
|
||||
!call random_number(amat)
|
||||
!amat = 0.5_dp*(amat + transpose(amat))
|
||||
do j = 1, ndim
|
||||
do i = 1, ndim
|
||||
call random_number(x)
|
||||
amat(i,j) = x
|
||||
call random_number(x)
|
||||
bmat(i,j) = x
|
||||
end do
|
||||
end do
|
||||
|
||||
call cpu_time(ct_start)
|
||||
call system_clock(sc_start)
|
||||
|
||||
do j = 1, num_loops
|
||||
! playmat = amat
|
||||
|
||||
call sqmatmul(amat, bmat, cmat, ndim)
|
||||
|
||||
end do
|
||||
|
||||
call cpu_time(ct_stop)
|
||||
call system_clock(sc_stop)
|
||||
if ( (sc_stop - sc_start)/num_sc_ticks_per_second < (ct_stop - ct_start) ) s = s + 1
|
||||
diff = (sc_stop - sc_start)/num_sc_ticks_per_second - (ct_stop - ct_start) + diff
|
||||
a_diff = ABS((sc_stop - sc_start)/num_sc_ticks_per_second - (ct_stop - ct_start)) + a_diff
|
||||
|
||||
! check one of the elements of cmat (the last one here: cmat(ndim, ndim))
|
||||
call check_cmat_element(cmat, 1, 1, amat, bmat, ndim)
|
||||
call check_cmat_element(cmat, 1, ndim, amat, bmat, ndim)
|
||||
call check_cmat_element(cmat, ndim, 1, amat, bmat, ndim)
|
||||
call check_cmat_element(cmat, ndim, ndim, amat, bmat, ndim)
|
||||
|
||||
! write(6, *) 'amat = '
|
||||
! call print_matrix(amat, ndim)
|
||||
|
||||
! write(6, *) 'bmat = '
|
||||
! call print_matrix(bmat, ndim)
|
||||
|
||||
! write(6, *) 'cmat = '
|
||||
! call print_matrix(cmat, ndim)
|
||||
|
||||
num_ops = real(ndim) * real(ndim) * real(ndim) * 2 * num_loops
|
||||
gflops = num_ops / (ct_stop-ct_start) / 1.0e9
|
||||
|
||||
|
||||
write(6, '("Time taken by dgemm for matrix size ",i8," was ",f10.2," seconds")') ndim, ct_stop-ct_start
|
||||
WRITE(*,*) "gflops (including potential memory transfers) : ", gflops
|
||||
|
||||
WRITE(*,*) "system_clock : ",(sc_stop - sc_start)/num_sc_ticks_per_second
|
||||
WRITE(*,*) "cpu_time : ",(ct_stop - ct_start)
|
||||
WRITE(*,*) "sys_clock < cpu_time : ",s
|
||||
WRITE(*,*) "mean diff : ",diff
|
||||
WRITE(*,*) "abs mean diff : ",a_diff
|
||||
|
||||
#if defined(USE_MAGMA_DGEMM_GPU)
|
||||
write(6,*) 'DEBUG: deinit magma'
|
||||
call magmaf_finalize()
|
||||
#endif
|
||||
|
||||
|
||||
deallocate(amat, bmat, cmat)
|
||||
end
|
|
@ -0,0 +1,30 @@
|
|||
import unittest
|
||||
import logging
|
||||
import subprocess
|
||||
# import importlib.resources
|
||||
|
||||
|
||||
class ClusterBenchTestCase(unittest.TestCase):
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
def setUp(self) -> None: # pylint: disable=useless-parent-delegation
|
||||
return super().setUp()
|
||||
|
||||
def test_clusterbench_submit(self):
|
||||
logging.info('test_clusterbench_submit')
|
||||
# with importlib.resources.path('iprbench.resources', 'clusterbench-template.job') as job_template_path:
|
||||
# print(job_template_path)
|
||||
# assert False
|
||||
# subprocess.run('pip list', shell=True, check=True, executable='/bin/bash')
|
||||
command = 'clusterbench-submit --arch-regexp "intel_core.*" --benchmark-id \'mamul1\' --config \'{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10}\' --results-dir /tmp/mamul1_out'
|
||||
subprocess.run(command, shell=True, check=True, executable='/bin/bash')
|
||||
|
||||
# def test_clusterbench_hibench(self):
|
||||
# logging.info('test_clusterbench_hibench')
|
||||
# command = 'clusterbench-submit --benchmark-id \'hibench\' --config \'{"compiler_id": "gfortran", "test_id": "arch4_quick"}\' --results-dir /tmp/mamul1_out'
|
||||
# subprocess.run(command, shell=True, check=True, executable='/bin/bash')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -0,0 +1,27 @@
|
|||
import unittest
|
||||
import logging
|
||||
import subprocess
|
||||
# import importlib.resources
|
||||
|
||||
|
||||
class IprBenchTestCase(unittest.TestCase):
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
def setUp(self) -> None: # pylint: disable=useless-parent-delegation
|
||||
return super().setUp()
|
||||
|
||||
def test_iprbench_run(self):
|
||||
logging.info('test_iprbench_run')
|
||||
# with importlib.resources.path('iprbench.resources', 'mamul1') as src_dir:
|
||||
# with open(src_dir / 'mamul1.F90', encoding='utf8') as f:
|
||||
# print(f.readlines())
|
||||
# with open(src_dir / 'CMakeLists.txt', encoding='utf8') as f:
|
||||
# print(f.readlines())
|
||||
# subprocess.run(f'cat {src_dir / "CMakeLists.txt"}', check=True)
|
||||
command = 'iprbench-run --benchmark-id \'mamul1\' --config \'{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10, "num_cores":2}\' --results-dir /tmp/mamul1_out'
|
||||
subprocess.run(command, shell=True, check=True, executable='/bin/bash')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
Reference in New Issue