refactored iprbench to separate ipr benchmark framework from the actual benchmarks

This decoupling allows to write benchmarks as modules that can be used in various situations (from a benchmark job or directly from a user), but this design will allow automatic registering of the benchmark results in a user selectable form (sql database, stdout, etc.)

- separated `hibenchonphysix.py` into `clusterbench.py` (tool to run a benchmark on a cluster) and `hibench.py` (hibridon benchmark module) so that `clusterbench.py` no longer has a knowledge about hibridon.
- there are currently 2 ways to run a bechmark:
  1. as a simple run through `clusterbench-run` command (which will eventually be renamed as iprbench-run since it might be completely independent from the concept of cluster)
  2. as cluster jobs  through `clusterbench-submit` command
- added unit test
- added another benchmark `mamul1` that is used as a unittest because it has 2 benefits over `hibench` benchmark:
   1. it's standalone (no external resources needed)
   2. it's quicker to execute

note: this refactoring work is not complete yet, but the concept  proof is complete (the 2 unittests pass):
- still need to provide the user a way to switch between IpRCluster and DummyCluster(which is only intended to only be used for testing clusterbench))
- still need to run multiple configs of the same benchmark in one run (as hibenchonphysix did)

work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3958] and [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3372]
This commit is contained in:
Guillaume Raffy 2024-10-22 09:16:41 +02:00
parent fe4a07a67e
commit 011d4eddf9
17 changed files with 865 additions and 132 deletions

6
.gitignore vendored
View File

@ -1,3 +1,7 @@
iprbench.venv/ iprbench.venv/
results/ results/
iprbench/__pycache__/__init__.cpython-38.pyc iprbench.egg-info/
iprbench/benchmarks/__pycache__/
iprbench/__pycache__/
test/__pycache__/
iprbench/resources/__pycache__/

View File

@ -90,6 +90,44 @@ Installing collected packages: pytz, tzdata, typing-extensions, starbench, six,
Successfully installed contourpy-1.3.0 cycler-0.12.1 fonttools-4.54.1 greenlet-3.1.1 iprbench-0.0.1 kiwisolver-1.4.7 matplotlib-3.9.2 numpy-2.1.2 packaging-24.1 pandas-2.2.3 pillow-10.4.0 pyparsing-3.1.4 python-dateutil-2.9.0.post0 pytz-2024.2 six-1.16.0 sqlalchemy-2.0.35 starbench-1.0.0 typing-extensions-4.12.2 tzdata-2024.2 Successfully installed contourpy-1.3.0 cycler-0.12.1 fonttools-4.54.1 greenlet-3.1.1 iprbench-0.0.1 kiwisolver-1.4.7 matplotlib-3.9.2 numpy-2.1.2 packaging-24.1 pandas-2.2.3 pillow-10.4.0 pyparsing-3.1.4 python-dateutil-2.9.0.post0 pytz-2024.2 six-1.16.0 sqlalchemy-2.0.35 starbench-1.0.0 typing-extensions-4.12.2 tzdata-2024.2
``` ```
## run unit tests
```sh
20241018-16:56:05 graffy@graffy-ws2:~/work/starbench/iprbench.git$ python3 -m unittest test.test_iprbench
2024-10-18 16:57:42,589 - INFO - test_iprbench_run
creating build directory /tmp/mamul1_out/output/worker<worker_id>
executing the following command in parallel (2 parallel runs) : '['mkdir', '-p', '/tmp/mamul1_out/output/worker<worker_id>/build']'
mean duration : 0.004 s (2 runs)
configuring /home/graffy/work/starbench/iprbench.git/test/mamul1 into /tmp/mamul1_out/output/worker<worker_id>/build ...
executing the following command in parallel (2 parallel runs) : '['/usr/bin/cmake', '-DCMAKE_BUILD_TYPE=Release', '-DCMAKE_Fortran_COMPILER=gfortran', '/home/graffy/work/starbench/iprbench.git/test/mamul1']'
mean duration : 0.098 s (2 runs)
building /tmp/mamul1_out/output/worker<worker_id>/build ...
executing the following command in parallel (2 parallel runs) : '['make']'
mean duration : 0.073 s (2 runs)
benchmarking /tmp/mamul1_out/output/worker<worker_id>/build ...
executing the following command in parallel (2 parallel runs) : '['./mamul1', '1024', '10']'
mean duration : 0.660 s (2 runs)
duration : 0.660 s
.
----------------------------------------------------------------------
Ran 1 test in 1.035s
OK
last command status : [0]
```
```sh
20241018-16:56:05 graffy@graffy-ws2:~/work/starbench/iprbench.git$ python3 -m unittest test.test_clusterbench
```
## launch a benchmark on the current system
```sh
iprbench-run --benchmark-id 'mamul1' --config '{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10, "num_cores":2}' --results-dir /tmp/mamul1_out
```
## launch benchmark jobs on alambix cluster ## launch benchmark jobs on alambix cluster
```sh ```sh

View File

@ -0,0 +1,75 @@
from ..core import IBenchmark, BenchParam, BenchmarkConfig
from pathlib import Path
import subprocess
import os
class HiBench(IBenchmark):
"""Hibridon benchmark
"""
def __init__(self):
bench_params = []
bench_params.append(BenchParam('hibridon_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of hibridon, in the form of a commit id'))
bench_params.append(BenchParam('compiler_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the compiler used in the benchmark'))
bench_params.append(BenchParam('compiler_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of the used compiler'))
bench_params.append(BenchParam('blas_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the blas library used in the benchmark'))
bench_params.append(BenchParam('blas_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of the blas library used in the benchmark'))
bench_params.append(BenchParam('test_id', BenchParam.Type.PARAM_TYPE_STRING, 'the name of the test to run (eg arch4_quick (about 2s on a core i5 8th generation) or nh3h2_qma_long (about 10min on a core i5 8th generation))'))
super().__init__(bench_id='hibench', bench_params=bench_params)
def get_ram_requirements(self, config: BenchmarkConfig) -> int:
GIBIBYTE_TO_BYTE = 1024 * 1024 * 1024
ram_per_core = 0 # in bytes
benchmark_test = config['test_id']
if benchmark_test == 'arch4_quick':
ram_per_core = int(1 * GIBIBYTE_TO_BYTE)
elif benchmark_test == 'nh3h2_qma_long':
ram_per_core = int(2.8 * GIBIBYTE_TO_BYTE) # this was enough on physix48, but maybe we can reduce more
else:
assert f'unhandled benchmark_test : {benchmark_test}'
return ram_per_core
def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path):
git_repos_url = 'https://github.com/hibridon/hibridon'
git_user = 'g-raffy' # os.environ['HIBRIDON_REPOS_USER']
git_pass_file = f'{os.getenv("HOME")}/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat'
hibridon_version = config['hibridon_version']
test_id = config['test_id'] # eg arch4_quick or nh3h2_qma_long
compiler_id = config['compiler_id']
src_dir = benchmark_output_dir / 'hibridon.git'
output_dir = benchmark_output_dir / 'output'
password_provider = f'{{"type": "password-file", "password-file-path": "{git_pass_file}"}}'
source_tree_provider = f'{{"type": "git-cloner", "repos-url": "{git_repos_url}", "src-dir": "{src_dir}", "code-version": "{hibridon_version}", "git-user": "{git_user}", "password-provider": {password_provider}}}'
benchmark_command = f'ctest --output-on-failure -L ^{test_id}$'
cmake_options = [
'-DCMAKE_BUILD_TYPE=Release', # build in release mode for highest performance
'-DBUILD_TESTING=ON' # enable hibridon tests
]
env_vars_bash_commands = ''
if compiler_id == 'ifort':
env_vars_bash_commands = 'module load compilers/ifort/latest'
cmake_options.append('-DCMAKE_Fortran_COMPILER=ifort') # use intel fortran compiler
cmake_options.append('-DBLA_VENDOR=Intel10_64lp') # use 64 bits intel mkl with multithreading
elif compiler_id == 'gfortran':
env_vars_bash_commands = ''
cmake_options.append('-DCMAKE_Fortran_COMPILER=gfortran') # use gfortran compiler
else:
assert f'unhandled compiler_id : {compiler_id}'
ur1_proxy_url = 'http://proxy-nt.univ-rennes1.fr:3128'
proxy_env_vars = ''
proxy_env_vars = f'{proxy_env_vars} HTTP_PROXY={ur1_proxy_url}'
proxy_env_vars = f'{proxy_env_vars} HTTPS_PROXY={ur1_proxy_url}'
proxy_env_vars = f'{proxy_env_vars} FTP_PROXY={ur1_proxy_url}'
proxy_env_vars = f'{proxy_env_vars} http_proxy={ur1_proxy_url}'
proxy_env_vars = f'{proxy_env_vars} https_proxy={ur1_proxy_url}'
proxy_env_vars = f'{proxy_env_vars} ftp_proxy={ur1_proxy_url}'
shell_command = f'{env_vars_bash_commands} && {proxy_env_vars} starbench --source-tree-provider \'{source_tree_provider}\' --num-cores 2 --output-dir={output_dir} --cmake-path=/usr/bin/cmake {" ".join([f"--cmake-option={option}" for option in cmake_options])} --benchmark-command=\'{benchmark_command}\''
subprocess.run(shell_command, shell=True, check=True, encoding='/bin/bash')

View File

@ -0,0 +1,62 @@
from ..core import IBenchmark, BenchParam, BenchmarkConfig
from pathlib import Path
import subprocess
# import importlib.resources
class MaMul1(IBenchmark):
"""Matrix multiplication benchmark
"""
def __init__(self):
bench_params = []
bench_params.append(BenchParam('compiler_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the compiler used in the benchmark'))
bench_params.append(BenchParam('num_cores', BenchParam.Type.PARAM_TYPE_INT, 'the number of cores to use by this benchmark'))
bench_params.append(BenchParam('matrix_size', BenchParam.Type.PARAM_TYPE_INT, 'the size n of all the the n * n matrices'))
bench_params.append(BenchParam('num_loops', BenchParam.Type.PARAM_TYPE_INT, 'the number of identical multiplications performed in sequence'))
# bench_params.append(BenchParam('source_dir', BenchParam.Type.PARAM_TYPE_STRING, 'the path to the directory containing mamul1 test source files'))
super().__init__(bench_id='mamul1', bench_params=bench_params)
def get_ram_requirements(self, config: BenchmarkConfig) -> int:
GIBIBYTE_TO_BYTE = 1024 * 1024 * 1024
SIZE_OF_DOUBLE = 8 # in bytes
matrix_size = config['matrix_size']
matrix_ram_size = matrix_size * matrix_size * SIZE_OF_DOUBLE
num_matrices = 3
ram_requirements = int(1 * GIBIBYTE_TO_BYTE) + num_matrices * matrix_ram_size
return ram_requirements
def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path):
compiler_id = config['compiler_id']
num_cores = config['num_cores']
matrix_size = config['matrix_size']
num_loops = config['num_loops']
# src_dir = Path('test/mamul1').absolute()
src_dir = Path('/home/graffy/work/starbench/iprbench.git/test/mamul1')
# with importlib.resources.path('iprbench.resources', 'mamul1') as src_dir:
output_dir = benchmark_output_dir / 'output'
source_tree_provider = f'{{"type": "existing-dir", "dir-path": "{src_dir}"}}'
benchmark_command = ['./mamul1', f'{matrix_size}', f'{num_loops}']
cmake_options = [
'-DCMAKE_BUILD_TYPE=Release', # build in release mode for highest performance
]
env_vars_bash_commands = ''
if compiler_id == 'ifort':
env_vars_bash_commands = 'module load compilers/ifort/latest'
cmake_options.append('-DCMAKE_Fortran_COMPILER=ifort') # use intel fortran compiler
cmake_options.append('-DBLA_VENDOR=Intel10_64lp') # use 64 bits intel mkl with multithreading
elif compiler_id == 'gfortran':
env_vars_bash_commands = ''
cmake_options.append('-DCMAKE_Fortran_COMPILER=gfortran') # use gfortran compiler
else:
assert f'unhandled compiler_id : {compiler_id}'
shell_command = ''
if len(env_vars_bash_commands) > 0:
shell_command += f'{env_vars_bash_commands} && '
shell_command += f'starbench --source-tree-provider \'{source_tree_provider}\' --num-cores {num_cores} --output-dir={output_dir} --cmake-path=/usr/bin/cmake {" ".join([f"--cmake-option={option}" for option in cmake_options])} --benchmark-command=\'{" ".join(benchmark_command)}\''
subprocess.run(shell_command, shell=True, check=True, encoding='/bin/bash')

View File

@ -1,8 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# this script launches jobs to run hibridon benchmarks on physix cluster for the given version of hibridon (commit number) # this script launches jobs to run hibridon benchmarks on physix cluster for the given version of hibridon (commit number)
from typing import List, Tuple, Dict from typing import List, Tuple, Dict
from argparse import ArgumentParser import argparse
from os import getenv, makedirs from os import getenv, makedirs
from .core import IBenchmark, BenchmarkConfig, BenchmarkId
from .main import BenchmarkFactory
import shutil import shutil
from pathlib import Path from pathlib import Path
import subprocess import subprocess
@ -10,6 +12,8 @@ import re
import logging import logging
import importlib.resources import importlib.resources
import venv import venv
import json
import abc
HostFqdn = str # eg 'physix90.ipr.univ-rennes1.fr' HostFqdn = str # eg 'physix90.ipr.univ-rennes1.fr'
@ -63,9 +67,8 @@ class ClusterNodeDb:
cluster_nodes_defs: List[ClusterNodeDef] cluster_nodes_defs: List[ClusterNodeDef]
cpu_defs: Dict[str, int] cpu_defs: Dict[str, int]
def __init__(self): def __init__(self, cluster_id='alambix'):
self.cluster_nodes_defs = [] self.cluster_nodes_defs = []
cluster_id = 'alambix'
include_multiqueue_nodes = False # at the moment hibench only works on nodes that have all their cores in the same queue include_multiqueue_nodes = False # at the moment hibench only works on nodes that have all their cores in the same queue
if cluster_id == 'alambix': if cluster_id == 'alambix':
self.add_cluster_node_def(ClusterNodeDef('alambix50.ipr.univ-rennes.fr', 'intel_xeon_x5650', 2)) self.add_cluster_node_def(ClusterNodeDef('alambix50.ipr.univ-rennes.fr', 'intel_xeon_x5650', 2))
@ -135,8 +138,13 @@ class ClusterNodeDb:
self.add_cluster_node_def(ClusterNodeDef('physix100.ipr.univ-rennes1.fr', 'intel_xeon_gold_6248r', 2)) self.add_cluster_node_def(ClusterNodeDef('physix100.ipr.univ-rennes1.fr', 'intel_xeon_gold_6248r', 2))
self.add_cluster_node_def(ClusterNodeDef('physix101.ipr.univ-rennes1.fr', 'intel_xeon_gold_6248r', 2)) self.add_cluster_node_def(ClusterNodeDef('physix101.ipr.univ-rennes1.fr', 'intel_xeon_gold_6248r', 2))
self.add_cluster_node_def(ClusterNodeDef('physix102.ipr.univ-rennes1.fr', 'intel_xeon_gold_6248r', 2)) self.add_cluster_node_def(ClusterNodeDef('physix102.ipr.univ-rennes1.fr', 'intel_xeon_gold_6248r', 2))
elif cluster_id == 'dummy':
self.add_cluster_node_def(ClusterNodeDef('graffy-ws2.ipr.univ-rennes.fr', 'intel_core_i5_8350u', 1))
else:
assert False
self.cpu_defs = {} self.cpu_defs = {}
self.add_cpu_def(CpuDef('intel_core_i5_8350u', 4))
self.add_cpu_def(CpuDef('intel_xeon_x5550', 4)) self.add_cpu_def(CpuDef('intel_xeon_x5550', 4))
self.add_cpu_def(CpuDef('intel_xeon_x5650', 6)) self.add_cpu_def(CpuDef('intel_xeon_x5650', 6))
self.add_cpu_def(CpuDef('intel_xeon_e5-2660', 8)) self.add_cpu_def(CpuDef('intel_xeon_e5-2660', 8))
@ -169,6 +177,62 @@ class ClusterNodeDb:
return (hosts, num_cores) return (hosts, num_cores)
class ICluster(abc.ABC):
cluster_db: ClusterNodeDb
def __init__(self, cluster_db: ClusterNodeDb):
self.cluster_db = cluster_db
@abc.abstractmethod
def path_is_reachable_by_compute_nodes(self, path: Path):
pass
@abc.abstractmethod
def submit_job(self, qsub_args: List[str], exec_path: Path, exec_args: List[str], working_dir: Path):
"""
qsub_args: the arguments sent to qsub, eg ['-pe', 'smp', '12', 'gaussian.job', 'h2o.gjf']
"""
def get_cluster_db(self) -> ClusterNodeDb:
return self.cluster_db
class IprCluster(ICluster):
def __init__(self):
super().__init__(ClusterNodeDb('alambix'))
def path_is_reachable_by_compute_nodes(self, path: Path):
path_is_reachable = False
for shared_disk_path in [Path('/opt/ipr/cluster/work.global')]:
try:
_ = path.relative_to(shared_disk_path)
except ValueError:
continue
path_is_reachable = True
break
return path_is_reachable
def submit_job(self, qsub_args: List[str], exec_path: Path, exec_args: List[str], working_dir: Path):
qsub_command = f'qsub {" ".join(qsub_args)} {exec_path} {" ".join(exec_args)}'
logging.debug('qsub_command = %s, working_dir=%s', qsub_command, working_dir)
subprocess.run(qsub_command, cwd=working_dir, check=True, shell=True)
class DummyCluster(ICluster):
def __init__(self):
super().__init__(ClusterNodeDb('dummy'))
def path_is_reachable_by_compute_nodes(self, path: Path):
return True
def submit_job(self, qsub_args: List[str], exec_path: Path, exec_args: List[str], working_dir: Path):
qsub_command = f'qsub {" ".join(qsub_args)} {exec_path} {" ".join(exec_args)}'
logging.info('executing %s as a replacement of qsub_command %s, working_dir=%s', exec_path, qsub_command, working_dir)
subprocess.run(exec_path, check=True, cwd=working_dir)
def duplicate_this_virtualenv_to(duplicate_virtualenv_path: Path): def duplicate_this_virtualenv_to(duplicate_virtualenv_path: Path):
this_virtualenv_path = Path(getenv('VIRTUAL_ENV')) # eg /home/graffy/work/starbench/iprbench.git/iprbench.venv this_virtualenv_path = Path(getenv('VIRTUAL_ENV')) # eg /home/graffy/work/starbench/iprbench.git/iprbench.venv
assert this_virtualenv_path.exists(), f'failed to find the root the virtual environment in use (VIRTUAL_ENV environment variable has the value {this_virtualenv_path})' assert this_virtualenv_path.exists(), f'failed to find the root the virtual environment in use (VIRTUAL_ENV environment variable has the value {this_virtualenv_path})'
@ -188,55 +252,22 @@ def archive_this_virtualenv_to(venv_archive_path: Path, venv_hardcoded_path: Pat
subprocess.run(f'tar czvf {venv_archive_path} {venv_hardcoded_path.relative_to(venv_hardcoded_path.parent)}', shell=True, check=True, cwd=venv_hardcoded_path.parent, stdout=subprocess.DEVNULL) subprocess.run(f'tar czvf {venv_archive_path} {venv_hardcoded_path.relative_to(venv_hardcoded_path.parent)}', shell=True, check=True, cwd=venv_hardcoded_path.parent, stdout=subprocess.DEVNULL)
def launch_job_for_host_group(hibridon_version: GitCommitTag, host_group_id: HostGroupId, results_dir: Path, compiler_id: CompilerId, cmake_path: str): def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, host_group_id: HostGroupId, results_dir: Path, cluster: ICluster, cmake_path: str):
cluster_db = ClusterNodeDb() compiler_id: CompilerId = benchmark_config['compiler_id']
cluster_db = cluster.get_cluster_db()
(hosts, num_cores) = cluster_db.get_host_group_info(host_group_id) (hosts, num_cores) = cluster_db.get_host_group_info(host_group_id)
if len(hosts) == 0: if len(hosts) == 0:
logging.warning('skipping benchmarks with compiler %s on architecture %s because no hosts are available for it', compiler_id, host_group_id) logging.warning('skipping benchmarks with compiler %s on architecture %s because no hosts are available for it', compiler_id, host_group_id)
return return
quick_test = 'arch4_quick' # about 2s on a core i5 8th generation benchmark_config['num_cores'] = num_cores # we expect the benchmark to have the parameter num_cores
representative_test = 'nh3h2_qma_long' # about 10min on a core i5 8th generation
use_test_mode = True
if use_test_mode:
benchmark_test = quick_test
else:
benchmark_test = representative_test
logging.info('using test %s for benchmarking', benchmark_test)
if benchmark_test == 'arch4_quick':
ram_per_core = '1G'
elif benchmark_test == 'nh3h2_qma_long':
ram_per_core = '2.8G' # this was enough on physix48, but maybe we can reduce more
else:
assert f'unhandled benchmark_test : {benchmark_test}'
git_repos_url = 'https://github.com/hibridon/hibridon'
git_user = 'g-raffy' # os.environ['HIBRIDON_REPOS_USER']
git_pass_file = f'{getenv("HOME")}/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat'
cmake_options = [
'-DCMAKE_BUILD_TYPE=Release', # build in release mode for highest performance
'-DBUILD_TESTING=ON' # enable hibridon tests
]
benchmark_command = f'ctest --output-on-failure -L ^{benchmark_test}$'
env_vars_bash_commands = ''
if compiler_id == 'ifort':
env_vars_bash_commands = 'module load compilers/ifort/latest'
cmake_options.append('-DCMAKE_Fortran_COMPILER=ifort') # use intel fortran compiler
cmake_options.append('-DBLA_VENDOR=Intel10_64lp') # use 64 bits intel mkl with multithreading
elif compiler_id == 'gfortran':
env_vars_bash_commands = ''
cmake_options.append('-DCMAKE_Fortran_COMPILER=gfortran') # use gfortran compiler
else:
assert f'unhandled compiler_id : {compiler_id}'
makedirs(results_dir, exist_ok=True) makedirs(results_dir, exist_ok=True)
this_bench_dir = Path(f'{results_dir}/{hibridon_version}/{benchmark_test}/{host_group_id}/{compiler_id}') this_bench_dir = Path(f'{results_dir}/{host_group_id}')
makedirs(this_bench_dir, exist_ok=True) makedirs(this_bench_dir, exist_ok=True)
starbench_job_path = this_bench_dir / 'starbench.job' starbench_job_path = this_bench_dir / 'starbench.job'
@ -253,44 +284,45 @@ def launch_job_for_host_group(hibridon_version: GitCommitTag, host_group_id: Hos
# create the job file (which embeds starbench.py) # create the job file (which embeds starbench.py)
tags_dict = { tags_dict = {
# '<include:starbench.py>': scripts_dir / 'starbench.py', # '<include:starbench.py>': scripts_dir / 'starbench.py',
'<benchmark_id>': str(benchmark.bench_id),
'<starbench_job_path>': str(starbench_job_path), '<starbench_job_path>': str(starbench_job_path),
'<iprbench_venv_hardcoded_path>': str(iprbench_venv_hardcoded_path), '<iprbench_venv_hardcoded_path>': str(iprbench_venv_hardcoded_path),
'<iprbench_venv_archive_path>': str(job_venv_archive_path) '<iprbench_venv_archive_path>': str(job_venv_archive_path),
'<benchmark_config>': json.dumps(benchmark_config).replace('"', r'\"'),
'<results_dir>': str(results_dir)
} }
with importlib.resources.path('iprbench.resources', 'starbench-template.job') as job_template_path: logging.debug('tags_dict = %s', str(tags_dict))
with importlib.resources.path('iprbench.resources', 'clusterbench-template.job') as job_template_path:
# job_template_path = importlib.resources..files('iprbench.resources') / 'hibench' / 'starbench-template.job' # job_template_path = importlib.resources..files('iprbench.resources') / 'hibench' / 'starbench-template.job'
substitute_tags(input_file_path=job_template_path, tags_dict=tags_dict, output_file_path=starbench_job_path) substitute_tags(input_file_path=job_template_path, tags_dict=tags_dict, output_file_path=starbench_job_path)
subprocess.run(['chmod', 'a+x', starbench_job_path], check=True) subprocess.run(['chmod', 'a+x', starbench_job_path], check=True)
command = f'{starbench_job_path} "{git_repos_url}" "{git_user}" "{git_pass_file}" "{hibridon_version}" "{" ".join(cmake_options)}" "{benchmark_command}" "{env_vars_bash_commands}" "{cmake_path}"' ram_requirements = benchmark.get_ram_requirements(benchmark_config)
logging.debug('command = %s', command) ram_per_core = f'{ram_requirements / num_cores / 1.e9}G'
qsub_command = 'qsub' qsub_args = []
qsub_command += f' -pe smp {num_cores}' qsub_args += ['-pe', 'smp', f'{num_cores}']
qsub_command += f' -l "hostname={"|".join(hosts)}"' qsub_args += ['-l', f'"hostname={"|".join(hosts)}"']
qsub_command += ' -S /bin/bash' qsub_args += ['-S', '/bin/bash']
qsub_command += ' -cwd' qsub_args += ['-cwd']
qsub_command += ' -m ae' qsub_args += ['-m', 'ae']
qsub_command += f' -l mem_available={ram_per_core}' qsub_args += ['-l', f'mem_available={ram_per_core}']
qsub_command += ' -j y' # merge stderr file into stdout file for easier reading of history of events qsub_args += ['-j', 'y'] # merge stderr file into stdout file for easier reading of history of events
qsub_command += f' -N hibench_{host_group_id}_{compiler_id}_{hibridon_version}' qsub_args += ['-N', f'hibench_{host_group_id}']
qsub_command += f' {command}'
logging.debug('qsub_command = %s', qsub_command)
subprocess.run(qsub_command, cwd=this_bench_dir, check=True, shell=True) logging.debug('qsub_args = %s', str(qsub_args))
exec_path = starbench_job_path
exec_args = []
cluster.submit_job(qsub_args, exec_path, exec_args, this_bench_dir)
def launch_perf_jobs(hibridon_version: GitCommitTag, results_dir: Path, arch_regexp: str, cmake_path: str): def launch_perf_jobs(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, results_dir: Path, cluster: ICluster, arch_regexp: str, cmake_path: str):
""" """
hibridon_version: the version of hibridon to test, in the form of a valid commit number eg 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad'
results_dir: where the results of the benchmark are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench) results_dir: where the results of the benchmark are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)
""" """
compilers = [
'gfortran',
'ifort'
]
cluster_db = ClusterNodeDb() cluster_db = ClusterNodeDb()
all_host_groups = cluster_db.cpu_defs.keys() all_host_groups = cluster_db.cpu_defs.keys()
@ -298,43 +330,37 @@ def launch_perf_jobs(hibridon_version: GitCommitTag, results_dir: Path, arch_reg
host_groups = [host_group for host_group in all_host_groups if re.match(arch_regexp, host_group) is not None] host_groups = [host_group for host_group in all_host_groups if re.match(arch_regexp, host_group) is not None]
logging.info('requested host groups: %s', host_groups) logging.info('requested host groups: %s', host_groups)
for compiler in compilers:
for host_group in host_groups: for host_group in host_groups:
launch_job_for_host_group(hibridon_version, host_group, results_dir, compiler, cmake_path) launch_job_for_host_group(benchmark, benchmark_config, host_group, results_dir, cluster, cmake_path)
def path_is_reachable_by_compute_nodes(path: Path):
path_is_reachable = False
for shared_disk_path in [Path('/opt/ipr/cluster/work.global')]:
try:
_ = path.relative_to(shared_disk_path)
except ValueError:
continue
path_is_reachable = True
break
return path_is_reachable
def main(): def main():
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
arg_parser = ArgumentParser(description='launches hibridon benchmark jobs on IPR\'s physix cluster', epilog='example:\n --commit-id a3bed1c3ccfbca572003020d3e3d3b1ff3934fad') example_text = '''example:
arg_parser.add_argument('--commit-id', type=str, required=True, help='the commit id of the version of code to benchmark')
%(prog)s --benchmark-id 'mamul1' --config '{"matrix_size": 1024, "num_loops":10}' --results-dir /tmp/mamul1_out
'''
arg_parser = argparse.ArgumentParser(description='submits a benchmark on the compute cluster (assuming this is running from a sge cluster machine where qsub command is available)', epilog=example_text, formatter_class=argparse.RawDescriptionHelpFormatter)
arg_parser.add_argument('--benchmark-id', type=BenchmarkId, required=True, help='the benchmark id of the benchmark to perform (eg mamul1)')
arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)') arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)')
arg_parser.add_argument('--config', type=str, default='cmake', help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}')
arg_parser.add_argument('--arch-regexp', type=str, default='.*', help='the regular expression for the architectures the benchmark is allowed to run on (eg "intel_xeon_.*"). By defauls, all available architectures are allowed.') arg_parser.add_argument('--arch-regexp', type=str, default='.*', help='the regular expression for the architectures the benchmark is allowed to run on (eg "intel_xeon_.*"). By defauls, all available architectures are allowed.')
arg_parser.add_argument('--cmake-path', type=str, default='cmake', help='the location of the cmake command to use (eg /opt/cmake/cmake-3.23.0/bin/cmake)') arg_parser.add_argument('--cmake-path', type=str, default='cmake', help='the location of the cmake command to use (eg /opt/cmake/cmake-3.23.0/bin/cmake)')
args = arg_parser.parse_args() args = arg_parser.parse_args()
hibridon_version = args.commit_id benchmark_id = BenchmarkId(args.benchmark_id)
benchmark = BenchmarkFactory().create_benchmark(benchmark_id)
# the version of hibridon to test, in the form of a valid commit number eg 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad'
# '53894da48505892bfa05693a52312bacb12c70c9' # latest from branch master as of 10/06/2022 00:30
# code_version='dd0f413b85cf0f727a5a4e88b2b02d75a28b377f' # latest from branch graffy-issue51 as of 10/06/2022 00:30
results_dir = Path(args.results_dir) results_dir = Path(args.results_dir)
arch_regexp = args.arch_regexp arch_regexp = args.arch_regexp
cmake_path = args.cmake_path cmake_path = args.cmake_path
benchmark_config = json.loads(args.config)
if not path_is_reachable_by_compute_nodes(results_dir): cluster = DummyCluster()
if not cluster.path_is_reachable_by_compute_nodes(results_dir):
raise ValueError('the results path is expected to be on a disk that is accessible to all cluster nodes, and it doesn\'t seem to be the case for {results_dir}') raise ValueError('the results path is expected to be on a disk that is accessible to all cluster nodes, and it doesn\'t seem to be the case for {results_dir}')
launch_perf_jobs(hibridon_version, results_dir, arch_regexp, cmake_path) launch_perf_jobs(benchmark, benchmark_config, results_dir, cluster, arch_regexp, cmake_path)

74
iprbench/core.py Normal file
View File

@ -0,0 +1,74 @@
from typing import List, Dict, Union
from enum import Enum
import abc
from pathlib import Path
BenchmarkId = str # a unique name for a benchmark, eg 'matmul1'
BenchParamId = str
BenchParamType = Union[int, str]
BenchmarkConfig = Dict[BenchParamId, BenchParamType]
class Singleton(type):
_instances = {}
def __call__(cls, *args, **kwargs):
if cls not in cls._instances:
cls._instances[cls] = super(type(cls), cls).__call__(*args, **kwargs) # pylint: disable=bad-super-call, no-member
return cls._instances[cls]
class BenchParam():
'''a parameter of a benchmark
for example the id of the compiler, the cpu id, the size of the matrix, etc.
'''
class Type(Enum):
PARAM_TYPE_STRING = 0
PARAM_TYPE_INT = 1
name: BenchParamId # the name of the parameter, eg 'matrix_size'
param_type: Type # the type of the parameter, eg 'PARAM_TYPE_INT'
description: str # the description of the parameter, eg 'the size n of the n*n matrix '
def __init__(self, name: str, param_type: Type, description: str):
self.name = name
self.param_type = param_type
self.description = description
class IBenchmark(abc.ABC):
bench_id: BenchmarkId # a unique name for this benchmark, eg 'matmul1'
bench_params: List[BenchParam]
def __init__(self, bench_id: str, bench_params: List[BenchParam]):
self.bench_id = bench_id
self.bench_params = bench_params
@abc.abstractmethod
def get_ram_requirements(self, config: BenchmarkConfig) -> int:
"""returns the ram requirements for this benchmark, in bytes
"""
@abc.abstractmethod
def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path):
"""execute the benchmark for the given config
"""
def validate_config(self, config: BenchmarkConfig):
"""checks that all benchmark parameters have been set in the given config"""
for bench_param in self.bench_params:
try:
_ = config[bench_param.name]
except KeyError:
assert False, f'failed to find the benchmark parameter {bench_param.name} in the benchmark config'
# check that all parameters in benchmark config exist as parameters for this benchmark
for param_name in config.keys():
param_exists = False
for bench_param in self.bench_params:
if bench_param.name == param_name:
param_exists = True
break
assert param_exists, f'parameter {param_name} doesn\'t exist for benchmark {self.bench_id}'

View File

@ -1 +1,47 @@
from .core import BenchmarkId, IBenchmark, Singleton
from .benchmarks.hibench import HiBench
from .benchmarks.mamul1 import MaMul1
import logging
import argparse
from pathlib import Path
import json
__version__ = '0.0.1' __version__ = '0.0.1'
class BenchmarkFactory(metaclass=Singleton):
def __init__(self):
pass
def create_benchmark(self, bench_id: BenchmarkId) -> IBenchmark:
benchmark = {
'hibench': HiBench(),
'mamul1': MaMul1()
}[bench_id]
return benchmark
def main():
"""
"""
logging.basicConfig(level=logging.DEBUG)
example_text = '''example:
%(prog)s --benchmark-id 'mamul1' --config '{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10, "num_cores":2}' --results-dir /tmp/mamul1_out
'''
arg_parser = argparse.ArgumentParser(description='executes a benchmark in a cluster job environment', epilog=example_text, formatter_class=argparse.RawDescriptionHelpFormatter)
arg_parser.add_argument('--benchmark-id', type=BenchmarkId, required=True, help='the benchmark id of the benchmark to perform (eg mamul1)')
arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)')
arg_parser.add_argument('--config', type=str, default='cmake', help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}')
args = arg_parser.parse_args()
benchmark_id = BenchmarkId(args.benchmark_id)
benchmark = BenchmarkFactory().create_benchmark(benchmark_id)
benchmark_config = json.loads(args.config)
benchmark.validate_config(benchmark_config)
benchmark.execute(benchmark_config, args.results_dir)

View File

@ -1,13 +1,5 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# this job file is a template file for starbench jobs # this job file is a template file for clusterbench jobs
git_repos_url="$1" # eg "https://github.com/hibridon/hibridon"
git_user="$2" # eg 'g-raffy'
git_pass_file="$3" # eg "$HOME/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat"
code_version="$4" # git branch id or commit id eg : 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad'
cmake_options="$5" # eg '-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON'
benchmark_command="$6" # eg 'ctest -L ^arch4_quick$'
env_vars_bash_commands="$7" # defines extra environment variables prior to launch starbench. eg "export MKLROOT=/opt/intel/compilers_and_libraries_2020.1.217/linux/mkl"
cmake_path="$8" # eg '/opt/cmake/cmake-3.23.0/bin/cmake'
executed_by_sge='' executed_by_sge=''
if [ "${JOB_ID}" = '' ] if [ "${JOB_ID}" = '' ]
@ -37,7 +29,7 @@ iprbench_venv_parent=$(dirname "$iprbench_venv_path")
iprbench_venv_archive_path='<iprbench_venv_archive_path>' iprbench_venv_archive_path='<iprbench_venv_archive_path>'
echo "unarchiving virtual environment ${iprbench_venv_archive_path} to ${iprbench_venv_parent}" echo "unarchiving virtual environment ${iprbench_venv_archive_path} to ${iprbench_venv_parent}"
pushd "${iprbench_venv_parent}" pushd "${iprbench_venv_parent}"
tar xzvf "${iprbench_venv_archive_path}" tar xzvf "${iprbench_venv_archive_path}" > /dev/null
popd popd
if [ ! -d "${iprbench_venv_path}" ] if [ ! -d "${iprbench_venv_path}" ]
then then
@ -61,34 +53,9 @@ num_cores=${NSLOTS}
# set environment variables # set environment variables
echo "env_vars_bash_commands=$env_vars_bash_commands"
eval $env_vars_bash_commands
# launch starbench # launch the benchmark
command="iprbench-run --benchmark-id '<benchmark_id>' --config '<benchmark_config>' --results-dir '${output_dir}'"
strUr1ProxyUrl='http://proxy-nt.univ-rennes1.fr:3128'
strProxyVars=''
strProxyVars="$strProxyVars HTTP_PROXY=$strUr1ProxyUrl"
strProxyVars="$strProxyVars HTTPS_PROXY=$strUr1ProxyUrl"
strProxyVars="$strProxyVars FTP_PROXY=$strUr1ProxyUrl"
strProxyVars="$strProxyVars http_proxy=$strUr1ProxyUrl"
strProxyVars="$strProxyVars https_proxy=$strUr1ProxyUrl"
strProxyVars="$strProxyVars ftp_proxy=$strUr1ProxyUrl"
command="$strProxyVars starbench"
command="${command} --git-repos-url ${git_repos_url}"
command="${command} --git-user ${git_user}"
command="${command} --git-pass-file ${git_pass_file}"
command="${command} --num-cores ${num_cores}"
command="${command} --output-dir ${output_dir}"
command="${command} --code-version ${code_version}"
command="${command} --cmake-path ${cmake_path}"
# echo "cmake_options: @$cmake_options@"
for cmake_option in ${cmake_options}
do
command="${command} --cmake-option=${cmake_option}"
done
command="${command} --benchmark-command=\"${benchmark_command}\""
echo "command: ${command}" echo "command: ${command}"
eval ${command} eval ${command}

View File

@ -15,7 +15,8 @@ dependencies = [
"sqlalchemy", "sqlalchemy",
# "cocluto >= 1.2" # "cocluto >= 1.2"
# "cocluto@git+https://git.ipr.univ-rennes.fr/cellinfo/cocluto" # "cocluto@git+https://git.ipr.univ-rennes.fr/cellinfo/cocluto"
"starbench@git+https://github.com/g-raffy/starbench" "starbench >= 1.0.1"
# "starbench@git+https://github.com/g-raffy/starbench"
] ]
requires-python = ">= 3.8" requires-python = ">= 3.8"
authors = [ authors = [
@ -23,14 +24,15 @@ authors = [
] ]
[project.scripts] [project.scripts]
hibenchonphysix = "iprbench.hibench.hibenchonphysix:main" clusterbench-submit = "iprbench.clusterbench:main"
showresults = "iprbench.hibench.showresults:main" iprbench-run = "iprbench.main:main"
showresults = "iprbench.benchmarks.showresults:main"
[project.urls] [project.urls]
Repository = "https://github.com/g-raffy/starbench" Repository = "https://github.com/g-raffy/starbench"
[tool.setuptools] [tool.setuptools]
packages = ["iprbench", "iprbench.hibench"] packages = ["iprbench", "iprbench.benchmarks"]
[tool.setuptools.dynamic] [tool.setuptools.dynamic]
version = {attr = "iprbench.main.__version__"} version = {attr = "iprbench.main.__version__"}

0
test/__init__.py Normal file
View File

View File

@ -0,0 +1,43 @@
enable_language (Fortran)
set(MAMUL1_USE_MAGMA "OFF" CACHE BOOL "if set, mamul1 build uses magma (matrix algebra on gpu)")
set(MAMUL1_MAGMA_API "CPU_MEM_API" CACHE STRING "which magma API to use when building mamul1: CPU_MEM_API for BLAS compatible API (uses matrices stored on CPU memory) or GPU_MEM_API (use matrices stored on GPU memory)")
add_executable(mamul1 mamul1.F90)
if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
# Allow arbitrary long lines. Needed as preprocessing could generate long line lengths.
target_compile_options(mamul1 PUBLIC -ffree-line-length-none)
elseif (Fortran_COMPILER_NAME STREQUAL "ifort")
# Intel (ifort)
target_compile_options(mamul1 PUBLIC -no-wrap-margin)
endif()
if (MAMUL1_USE_MAGMA)
find_package( MAGMA REQUIRED )
if( MAMUL1_MAGMA_API STREQUAL "CPU_MEM_API" )
target_compile_definitions(mamul1 PUBLIC USE_MAGMA_DGEMM)
elseif( MAMUL1_MAGMA_API STREQUAL "GPU_MEM_API" )
target_compile_definitions(mamul1 PUBLIC USE_MAGMA_DGEMM_GPU)
else()
message(FATAL_ERROR "unexpected value for MAMUL1_MAGMA_API : ${MAMUL1_MAGMA_API}")
endif()
message(STATUS "MAGMA_INCLUDES=${MAGMA_INCLUDES}")
include_directories("${MAGMA_INCLUDES}")
target_link_libraries(mamul1 "${MAGMA_LIBRARIES}")
else()
find_package( BLAS REQUIRED )
find_package( LAPACK REQUIRED )
# message("BLAS_LIBRARIES=${BLAS_LIBRARIES}")
# message("LAPACK_LIBRARIES=${LAPACK_LIBRARIES}")
target_compile_definitions(mamul1 PUBLIC USE_DGEMM)
# Link Blas and Lapack libraries
target_link_libraries(mamul1 "${LAPACK_LIBRARIES}")
target_link_libraries(mamul1 "${BLAS_LIBRARIES}")
endif()
install(TARGETS mamul1)

339
test/mamul1/mamul1.F90 Normal file
View File

@ -0,0 +1,339 @@
#define MAMUL1_VERSION "1.0.0"
#define magma_devptr_t integer(kind=8)
subroutine print_usage(prog_path)
character(len=*), intent(in) :: prog_path
character(len=80) :: build_variant
#if defined(USE_MAGMA_DGEMM_GPU)
build_variant='gpu'
#elif defined(USE_DGEMM)
build_variant='cpu'
#else
build_variant='unknown'
#endif
write(6,'("mamul1 v",a," (variant:",a,"): benchmark performs a square matrix multiplication in double precision")') MAMUL1_VERSION, trim(build_variant);
write(6,'()');
write(6,'("Usage: ",a," <NDIM> <NUM_LOOPS>")') trim(prog_path);
write(6,'(" <NDIM> positive integer representing the size of the square matrices to multiply ")');
write(6,'(" <NUM_LOOPS> positive integer representing the number of times the multiplication is performed")');
end subroutine
program mamul1
implicit none
integer :: argc, info, ndim, num_loops
character(len=32) :: arg0, arg1, arg2
call get_command_argument(0,arg0)
argc = command_argument_count()
if (argc /= 2) then
call print_usage(trim(arg0))
! write(6,'("Usage: ",a," NDIM NUM_LOOPS, where NDIM is a positive integer")') trim(arg0);
stop
end if
call get_command_argument(1,arg1,status=info)
if (info /= 0) then
write(6,'("Error reading argument: info = ",i2)') info
call print_usage(trim(arg0))
stop
end if
call get_command_argument(2,arg2,status=info)
if (info /= 0) then
write(6,'("Error reading argument: info = ",i2)') info
call print_usage(trim(arg0))
stop
end if
read(arg1,*,iostat=info) ndim
if (info /= 0) then
write(6,'("Error converting ndim argument to integer: info = ",i2)') info
call print_usage(trim(arg0))
stop
end if
read(arg2,*,iostat=info) num_loops
if (info /= 0) then
write(6,'("Error converting num_loops argument to integer: info = ",i2)') info
call print_usage(trim(arg0))
stop
end if
if (ndim < 1) then
call print_usage(trim(arg0))
stop
end if
call test_dgemm(ndim, num_loops)
stop
end program mamul1
subroutine set_random_seed(seed)
integer :: seed
integer :: seed_array_size
INTEGER, ALLOCATABLE :: seed_array (:)
CALL RANDOM_SEED (SIZE = seed_array_size) ! I is set to the size of
! ! the seed array
ALLOCATE (seed_array(seed_array_size))
seed_array = seed
CALL RANDOM_SEED (PUT=seed_array(1:seed_array_size))
end subroutine
subroutine print_matrix(mat, ndim)
implicit none
integer, parameter :: dp = kind(1.0d0)
real(dp), intent(in) :: mat(ndim, ndim)
integer, intent(in) :: ndim
integer :: irow
do irow = 1, ndim
write(6, *) mat(irow,:)
end do
end subroutine
! square matrix multiplication
subroutine sqmatmul(amat, bmat, cmat, ndim)
#if defined(USE_MAGMA_DGEMM_GPU)
use magma, only: magmaf_init, magmaf_finalize
use magma, only: magmaf_queue_create, magmaf_queue_destroy
use magma, only: magmaf_dmalloc, magmaf_free
use magma, only: magmaf_dsetmatrix, magmaf_dgetmatrix
use magma, only: magmablasf_dgemm
#endif
real*8, intent(in) :: amat(ndim,ndim)
real*8, intent(in) :: bmat(ndim,ndim)
real*8, intent(out) :: cmat(ndim,ndim)
integer :: lda, ldb, ldc
integer :: info
real :: time_before, time_after
integer(8) :: num_ops
real :: gflops
#ifdef USE_MAGMA_DGEMM_GPU
magma_devptr_t :: d_amat
magma_devptr_t :: d_bmat
magma_devptr_t :: d_cmat
magma_devptr_t :: queue !! really a CPU pointer
#endif
lda = ceiling(real(ndim)/32)*32
ldb = ceiling(real(ndim)/32)*32
ldc = ceiling(real(ndim)/32)*32
#if defined(USE_MAGMA_DGEMM_GPU)
!! allocate GPU memory
write(6,'("DEBUG: before matrix A gpu memory allocation (",i0," doubles)")') lda * ndim
info = magmaf_dmalloc( d_amat, lda*ndim )
if (d_amat == 0) then
print "(a)", "failed to allocate d_amat"
return
endif
write(6,'("DEBUG: before matrix B gpu memory allocation (",i0," doubles)")') ldb * ndim
info = magmaf_dmalloc( d_bmat, ldb*ndim )
if (d_bmat == 0) then
print "(a)", "failed to allocate d_bmat"
return
endif
write(6,'("DEBUG: before matrix C gpu memory allocation (",i0," doubles)")') ldc * ndim
info = magmaf_dmalloc( d_cmat, ldc*ndim )
if (d_cmat == 0) then
print "(a)", "failed to allocate d_cmat"
return
endif
! copy A to dA and B to dB
call magmaf_queue_create( 0, queue )
write(6,'("DEBUG: queue = ",i0)') queue
if (queue == 0) then
print "(a)", "failed to create a queue"
return
endif
write(6,*) 'DEBUG: copying matrix A from CPU to GPU memory'
call magmaf_dsetmatrix( ndim, ndim, amat, ndim, d_amat, lda, queue )
write(6,*) 'DEBUG: copying matrix B from CPU to GPU memory'
call magmaf_dsetmatrix( ndim, ndim, bmat, ndim, d_bmat, ldb, queue )
call cpu_time(time_before)
write (6,*) 'before magmablasf_dgemm, time=', time_before
call magmablasf_dgemm ('N', 'N', ndim, ndim, ndim, 1.0d0, d_amat, lda, d_bmat, ldb, 0.0d0, d_cmat, ldc, queue)
call magmaf_queue_sync(queue)
call cpu_time(time_after)
num_ops = real(ndim) * real(ndim) * real(ndim) * 2
gflops = num_ops / (time_after - time_before) / 1.0e9
write (6,*) 'after magmablasf_dgemm, time=', time_after
write (6,*) 'magmablasf_dgemm (from gpu memory to gpu memory) duration :', (time_after - time_before), '(', gflops, ' gflops)'
write(6,*) 'DEBUG: copying matrix C from GPU to CPU memory'
call magmaf_dgetmatrix( ndim, ndim, d_cmat, ldc, cmat, ndim, queue )
call magmaf_queue_destroy( queue )
info = magmaf_free(d_cmat)
info = magmaf_free(d_bmat)
info = magmaf_free(d_amat)
#endif
#ifdef USE_DGEMM
! subroutine dgemm ( character TRANSA,
! character TRANSB,
! integer M,
! integer N,
! integer K,
! double precision ALPHA,
! double precision, dimension(lda,*) A,
! integer LDA,
! double precision, dimension(ldb,*) B,
! integer LDB,
! double precision BETA,
! double precision, dimension(ldc,*) C,
! integer LDC
! )
call dgemm('N', 'N', ndim, ndim, ndim, 1.0d0, amat, ndim, bmat, ndim, 0.0d0, cmat, ndim)
#endif
end subroutine
subroutine check_cmat_element(cmat, row, col, amat, bmat, ndim)
real(8), intent(in) :: cmat(ndim, ndim)
integer, intent(in) :: row
integer, intent(in) :: col
real(8), intent(in) :: amat(ndim, ndim)
real(8), intent(in) :: bmat(ndim, ndim)
integer, intent(in) :: ndim
real(8) :: x
x = 0.0d0
do i = 1, ndim
x = x + amat(row, i) * bmat(i, col)
end do
write(6, '("expected cmat(", i0, ", ", i0, ")", e23.15e3)') row, col, x
write(6, '("computed cmat(", i0, ", ", i0, ")", e23.15e3)') row, col, cmat(row, col)
if (abs(cmat(row, col) - x) > 1.0e-8) then
stop 'a computed element has a wrong value'
end if
end subroutine
subroutine test_dgemm(ndim, num_loops)
#if defined(USE_MAGMA_DGEMM_GPU)
use magma, only: magmaf_init, magmaf_finalize
use magma, only: magmablasf_dgemm !, magmaf_dgemm_gpu
#endif
implicit none
integer, intent(in) :: ndim
integer, intent(in) :: num_loops
integer, parameter :: dp = kind(1.0d0)
real :: ct_start, ct_stop ! elapsed cpu time relative to an arbitrary fixed time. Expressed in seconds with the granularity of 1 microsecond
integer(8) :: num_ops
real :: gflops
integer :: sc_start, sc_stop ! system clock time of start and stop events, expressed in ticks
integer :: sc_count_rate ! number of system clock ticks per second
integer :: sc_count_max ! the max possible number of system clock ticks returned by system_clock
integer :: s
REAL :: a_diff, diff
REAL :: num_sc_ticks_per_second ! the number of system clock ticks per second
real*8, allocatable :: amat(:,:)
real*8, allocatable :: bmat(:,:)
real*8, allocatable :: cmat(:,:)
real(dp) :: x
integer :: i, j
#if defined(USE_MAGMA_DGEMM_GPU)
write(6,*) 'DEBUG: init magma'
call magmaf_init()
#endif
! First initialize the system_clock
CALL system_clock(count_rate=sc_count_rate)
CALL system_clock(count_max=sc_count_max)
num_sc_ticks_per_second = REAL(sc_count_rate)
WRITE(*,*) "system_clock rate : ", num_sc_ticks_per_second, " ticks per second"
diff = 0.0
a_diff = 0.0
s = 0
allocate(amat(ndim, ndim))
allocate(bmat(ndim, ndim))
allocate(cmat(ndim, ndim))
call set_random_seed(42)
!call random_number(amat)
!amat = 0.5_dp*(amat + transpose(amat))
do j = 1, ndim
do i = 1, ndim
call random_number(x)
amat(i,j) = x
call random_number(x)
bmat(i,j) = x
end do
end do
call cpu_time(ct_start)
call system_clock(sc_start)
do j = 1, num_loops
! playmat = amat
call sqmatmul(amat, bmat, cmat, ndim)
end do
call cpu_time(ct_stop)
call system_clock(sc_stop)
if ( (sc_stop - sc_start)/num_sc_ticks_per_second < (ct_stop - ct_start) ) s = s + 1
diff = (sc_stop - sc_start)/num_sc_ticks_per_second - (ct_stop - ct_start) + diff
a_diff = ABS((sc_stop - sc_start)/num_sc_ticks_per_second - (ct_stop - ct_start)) + a_diff
! check one of the elements of cmat (the last one here: cmat(ndim, ndim))
call check_cmat_element(cmat, 1, 1, amat, bmat, ndim)
call check_cmat_element(cmat, 1, ndim, amat, bmat, ndim)
call check_cmat_element(cmat, ndim, 1, amat, bmat, ndim)
call check_cmat_element(cmat, ndim, ndim, amat, bmat, ndim)
! write(6, *) 'amat = '
! call print_matrix(amat, ndim)
! write(6, *) 'bmat = '
! call print_matrix(bmat, ndim)
! write(6, *) 'cmat = '
! call print_matrix(cmat, ndim)
num_ops = real(ndim) * real(ndim) * real(ndim) * 2 * num_loops
gflops = num_ops / (ct_stop-ct_start) / 1.0e9
write(6, '("Time taken by dgemm for matrix size ",i8," was ",f10.2," seconds")') ndim, ct_stop-ct_start
WRITE(*,*) "gflops (including potential memory transfers) : ", gflops
WRITE(*,*) "system_clock : ",(sc_stop - sc_start)/num_sc_ticks_per_second
WRITE(*,*) "cpu_time : ",(ct_stop - ct_start)
WRITE(*,*) "sys_clock < cpu_time : ",s
WRITE(*,*) "mean diff : ",diff
WRITE(*,*) "abs mean diff : ",a_diff
#if defined(USE_MAGMA_DGEMM_GPU)
write(6,*) 'DEBUG: deinit magma'
call magmaf_finalize()
#endif
deallocate(amat, bmat, cmat)
end

30
test/test_clusterbench.py Normal file
View File

@ -0,0 +1,30 @@
import unittest
import logging
import subprocess
# import importlib.resources
class ClusterBenchTestCase(unittest.TestCase):
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def setUp(self) -> None: # pylint: disable=useless-parent-delegation
return super().setUp()
def test_clusterbench_submit(self):
logging.info('test_clusterbench_submit')
# with importlib.resources.path('iprbench.resources', 'clusterbench-template.job') as job_template_path:
# print(job_template_path)
# assert False
# subprocess.run('pip list', shell=True, check=True, executable='/bin/bash')
command = 'clusterbench-submit --arch-regexp "intel_core.*" --benchmark-id \'mamul1\' --config \'{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10}\' --results-dir /tmp/mamul1_out'
subprocess.run(command, shell=True, check=True, executable='/bin/bash')
# def test_clusterbench_hibench(self):
# logging.info('test_clusterbench_hibench')
# command = 'clusterbench-submit --benchmark-id \'hibench\' --config \'{"compiler_id": "gfortran", "test_id": "arch4_quick"}\' --results-dir /tmp/mamul1_out'
# subprocess.run(command, shell=True, check=True, executable='/bin/bash')
if __name__ == '__main__':
unittest.main()

27
test/test_iprbench.py Normal file
View File

@ -0,0 +1,27 @@
import unittest
import logging
import subprocess
# import importlib.resources
class IprBenchTestCase(unittest.TestCase):
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def setUp(self) -> None: # pylint: disable=useless-parent-delegation
return super().setUp()
def test_iprbench_run(self):
logging.info('test_iprbench_run')
# with importlib.resources.path('iprbench.resources', 'mamul1') as src_dir:
# with open(src_dir / 'mamul1.F90', encoding='utf8') as f:
# print(f.readlines())
# with open(src_dir / 'CMakeLists.txt', encoding='utf8') as f:
# print(f.readlines())
# subprocess.run(f'cat {src_dir / "CMakeLists.txt"}', check=True)
command = 'iprbench-run --benchmark-id \'mamul1\' --config \'{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10, "num_cores":2}\' --results-dir /tmp/mamul1_out'
subprocess.run(command, shell=True, check=True, executable='/bin/bash')
if __name__ == '__main__':
unittest.main()