From 011d4eddf98778311bf4aab1c303ee52d157f50f Mon Sep 17 00:00:00 2001 From: Guillaume Raffy Date: Tue, 22 Oct 2024 09:16:41 +0200 Subject: [PATCH] refactored iprbench to separate ipr benchmark framework from the actual benchmarks This decoupling allows to write benchmarks as modules that can be used in various situations (from a benchmark job or directly from a user), but this design will allow automatic registering of the benchmark results in a user selectable form (sql database, stdout, etc.) - separated `hibenchonphysix.py` into `clusterbench.py` (tool to run a benchmark on a cluster) and `hibench.py` (hibridon benchmark module) so that `clusterbench.py` no longer has a knowledge about hibridon. - there are currently 2 ways to run a bechmark: 1. as a simple run through `clusterbench-run` command (which will eventually be renamed as iprbench-run since it might be completely independent from the concept of cluster) 2. as cluster jobs through `clusterbench-submit` command - added unit test - added another benchmark `mamul1` that is used as a unittest because it has 2 benefits over `hibench` benchmark: 1. it's standalone (no external resources needed) 2. it's quicker to execute note: this refactoring work is not complete yet, but the concept proof is complete (the 2 unittests pass): - still need to provide the user a way to switch between IpRCluster and DummyCluster(which is only intended to only be used for testing clusterbench)) - still need to run multiple configs of the same benchmark in one run (as hibenchonphysix did) work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3958] and [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3372] --- .gitignore | 6 +- README.md | 38 ++ iprbench/{hibench => benchmarks}/__init__.py | 0 iprbench/benchmarks/hibench.py | 75 ++++ iprbench/benchmarks/mamul1.py | 62 ++++ .../{hibench => benchmarks}/showresults.py | 0 .../hibenchonphysix.py => clusterbench.py} | 204 ++++++----- iprbench/core.py | 74 ++++ iprbench/main.py | 48 ++- {tests => iprbench/resources}/__init__.py | 0 ...template.job => clusterbench-template.job} | 41 +-- pyproject.toml | 10 +- test/__init__.py | 0 test/mamul1/CMakeLists.txt | 43 +++ test/mamul1/mamul1.F90 | 339 ++++++++++++++++++ test/test_clusterbench.py | 30 ++ test/test_iprbench.py | 27 ++ 17 files changed, 865 insertions(+), 132 deletions(-) rename iprbench/{hibench => benchmarks}/__init__.py (100%) create mode 100644 iprbench/benchmarks/hibench.py create mode 100644 iprbench/benchmarks/mamul1.py rename iprbench/{hibench => benchmarks}/showresults.py (100%) rename iprbench/{hibench/hibenchonphysix.py => clusterbench.py} (75%) create mode 100644 iprbench/core.py rename {tests => iprbench/resources}/__init__.py (100%) rename iprbench/resources/{starbench-template.job => clusterbench-template.job} (55%) create mode 100644 test/__init__.py create mode 100644 test/mamul1/CMakeLists.txt create mode 100644 test/mamul1/mamul1.F90 create mode 100644 test/test_clusterbench.py create mode 100644 test/test_iprbench.py diff --git a/.gitignore b/.gitignore index 65fbaf7..c561508 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ iprbench.venv/ results/ -iprbench/__pycache__/__init__.cpython-38.pyc +iprbench.egg-info/ +iprbench/benchmarks/__pycache__/ +iprbench/__pycache__/ +test/__pycache__/ +iprbench/resources/__pycache__/ diff --git a/README.md b/README.md index 48aabe7..9f282ea 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,44 @@ Installing collected packages: pytz, tzdata, typing-extensions, starbench, six, Successfully installed contourpy-1.3.0 cycler-0.12.1 fonttools-4.54.1 greenlet-3.1.1 iprbench-0.0.1 kiwisolver-1.4.7 matplotlib-3.9.2 numpy-2.1.2 packaging-24.1 pandas-2.2.3 pillow-10.4.0 pyparsing-3.1.4 python-dateutil-2.9.0.post0 pytz-2024.2 six-1.16.0 sqlalchemy-2.0.35 starbench-1.0.0 typing-extensions-4.12.2 tzdata-2024.2 ``` +## run unit tests + +```sh +20241018-16:56:05 graffy@graffy-ws2:~/work/starbench/iprbench.git$ python3 -m unittest test.test_iprbench +2024-10-18 16:57:42,589 - INFO - test_iprbench_run +creating build directory /tmp/mamul1_out/output/worker +executing the following command in parallel (2 parallel runs) : '['mkdir', '-p', '/tmp/mamul1_out/output/worker/build']' +mean duration : 0.004 s (2 runs) +configuring /home/graffy/work/starbench/iprbench.git/test/mamul1 into /tmp/mamul1_out/output/worker/build ... +executing the following command in parallel (2 parallel runs) : '['/usr/bin/cmake', '-DCMAKE_BUILD_TYPE=Release', '-DCMAKE_Fortran_COMPILER=gfortran', '/home/graffy/work/starbench/iprbench.git/test/mamul1']' +mean duration : 0.098 s (2 runs) +building /tmp/mamul1_out/output/worker/build ... +executing the following command in parallel (2 parallel runs) : '['make']' +mean duration : 0.073 s (2 runs) +benchmarking /tmp/mamul1_out/output/worker/build ... +executing the following command in parallel (2 parallel runs) : '['./mamul1', '1024', '10']' +mean duration : 0.660 s (2 runs) +duration : 0.660 s +. +---------------------------------------------------------------------- +Ran 1 test in 1.035s + +OK +last command status : [0] +``` + +```sh +20241018-16:56:05 graffy@graffy-ws2:~/work/starbench/iprbench.git$ python3 -m unittest test.test_clusterbench +``` + + + +## launch a benchmark on the current system + +```sh +iprbench-run --benchmark-id 'mamul1' --config '{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10, "num_cores":2}' --results-dir /tmp/mamul1_out +``` + ## launch benchmark jobs on alambix cluster ```sh diff --git a/iprbench/hibench/__init__.py b/iprbench/benchmarks/__init__.py similarity index 100% rename from iprbench/hibench/__init__.py rename to iprbench/benchmarks/__init__.py diff --git a/iprbench/benchmarks/hibench.py b/iprbench/benchmarks/hibench.py new file mode 100644 index 0000000..75b883a --- /dev/null +++ b/iprbench/benchmarks/hibench.py @@ -0,0 +1,75 @@ +from ..core import IBenchmark, BenchParam, BenchmarkConfig +from pathlib import Path +import subprocess +import os + + +class HiBench(IBenchmark): + """Hibridon benchmark + """ + + def __init__(self): + bench_params = [] + bench_params.append(BenchParam('hibridon_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of hibridon, in the form of a commit id')) + bench_params.append(BenchParam('compiler_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the compiler used in the benchmark')) + bench_params.append(BenchParam('compiler_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of the used compiler')) + bench_params.append(BenchParam('blas_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the blas library used in the benchmark')) + bench_params.append(BenchParam('blas_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of the blas library used in the benchmark')) + bench_params.append(BenchParam('test_id', BenchParam.Type.PARAM_TYPE_STRING, 'the name of the test to run (eg arch4_quick (about 2s on a core i5 8th generation) or nh3h2_qma_long (about 10min on a core i5 8th generation))')) + + super().__init__(bench_id='hibench', bench_params=bench_params) + + def get_ram_requirements(self, config: BenchmarkConfig) -> int: + GIBIBYTE_TO_BYTE = 1024 * 1024 * 1024 + ram_per_core = 0 # in bytes + benchmark_test = config['test_id'] + if benchmark_test == 'arch4_quick': + ram_per_core = int(1 * GIBIBYTE_TO_BYTE) + elif benchmark_test == 'nh3h2_qma_long': + ram_per_core = int(2.8 * GIBIBYTE_TO_BYTE) # this was enough on physix48, but maybe we can reduce more + else: + assert f'unhandled benchmark_test : {benchmark_test}' + return ram_per_core + + def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path): + git_repos_url = 'https://github.com/hibridon/hibridon' + git_user = 'g-raffy' # os.environ['HIBRIDON_REPOS_USER'] + git_pass_file = f'{os.getenv("HOME")}/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat' + hibridon_version = config['hibridon_version'] + test_id = config['test_id'] # eg arch4_quick or nh3h2_qma_long + compiler_id = config['compiler_id'] + + src_dir = benchmark_output_dir / 'hibridon.git' + output_dir = benchmark_output_dir / 'output' + + password_provider = f'{{"type": "password-file", "password-file-path": "{git_pass_file}"}}' + source_tree_provider = f'{{"type": "git-cloner", "repos-url": "{git_repos_url}", "src-dir": "{src_dir}", "code-version": "{hibridon_version}", "git-user": "{git_user}", "password-provider": {password_provider}}}' + benchmark_command = f'ctest --output-on-failure -L ^{test_id}$' + + cmake_options = [ + '-DCMAKE_BUILD_TYPE=Release', # build in release mode for highest performance + '-DBUILD_TESTING=ON' # enable hibridon tests + ] + + env_vars_bash_commands = '' + if compiler_id == 'ifort': + env_vars_bash_commands = 'module load compilers/ifort/latest' + cmake_options.append('-DCMAKE_Fortran_COMPILER=ifort') # use intel fortran compiler + cmake_options.append('-DBLA_VENDOR=Intel10_64lp') # use 64 bits intel mkl with multithreading + elif compiler_id == 'gfortran': + env_vars_bash_commands = '' + cmake_options.append('-DCMAKE_Fortran_COMPILER=gfortran') # use gfortran compiler + else: + assert f'unhandled compiler_id : {compiler_id}' + + ur1_proxy_url = 'http://proxy-nt.univ-rennes1.fr:3128' + proxy_env_vars = '' + proxy_env_vars = f'{proxy_env_vars} HTTP_PROXY={ur1_proxy_url}' + proxy_env_vars = f'{proxy_env_vars} HTTPS_PROXY={ur1_proxy_url}' + proxy_env_vars = f'{proxy_env_vars} FTP_PROXY={ur1_proxy_url}' + proxy_env_vars = f'{proxy_env_vars} http_proxy={ur1_proxy_url}' + proxy_env_vars = f'{proxy_env_vars} https_proxy={ur1_proxy_url}' + proxy_env_vars = f'{proxy_env_vars} ftp_proxy={ur1_proxy_url}' + + shell_command = f'{env_vars_bash_commands} && {proxy_env_vars} starbench --source-tree-provider \'{source_tree_provider}\' --num-cores 2 --output-dir={output_dir} --cmake-path=/usr/bin/cmake {" ".join([f"--cmake-option={option}" for option in cmake_options])} --benchmark-command=\'{benchmark_command}\'' + subprocess.run(shell_command, shell=True, check=True, encoding='/bin/bash') diff --git a/iprbench/benchmarks/mamul1.py b/iprbench/benchmarks/mamul1.py new file mode 100644 index 0000000..b2b932c --- /dev/null +++ b/iprbench/benchmarks/mamul1.py @@ -0,0 +1,62 @@ +from ..core import IBenchmark, BenchParam, BenchmarkConfig +from pathlib import Path +import subprocess +# import importlib.resources + + +class MaMul1(IBenchmark): + """Matrix multiplication benchmark + """ + + def __init__(self): + bench_params = [] + bench_params.append(BenchParam('compiler_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the compiler used in the benchmark')) + bench_params.append(BenchParam('num_cores', BenchParam.Type.PARAM_TYPE_INT, 'the number of cores to use by this benchmark')) + bench_params.append(BenchParam('matrix_size', BenchParam.Type.PARAM_TYPE_INT, 'the size n of all the the n * n matrices')) + bench_params.append(BenchParam('num_loops', BenchParam.Type.PARAM_TYPE_INT, 'the number of identical multiplications performed in sequence')) + # bench_params.append(BenchParam('source_dir', BenchParam.Type.PARAM_TYPE_STRING, 'the path to the directory containing mamul1 test source files')) + super().__init__(bench_id='mamul1', bench_params=bench_params) + + def get_ram_requirements(self, config: BenchmarkConfig) -> int: + GIBIBYTE_TO_BYTE = 1024 * 1024 * 1024 + SIZE_OF_DOUBLE = 8 # in bytes + matrix_size = config['matrix_size'] + matrix_ram_size = matrix_size * matrix_size * SIZE_OF_DOUBLE + num_matrices = 3 + ram_requirements = int(1 * GIBIBYTE_TO_BYTE) + num_matrices * matrix_ram_size + return ram_requirements + + def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path): + compiler_id = config['compiler_id'] + num_cores = config['num_cores'] + matrix_size = config['matrix_size'] + num_loops = config['num_loops'] + + # src_dir = Path('test/mamul1').absolute() + src_dir = Path('/home/graffy/work/starbench/iprbench.git/test/mamul1') + # with importlib.resources.path('iprbench.resources', 'mamul1') as src_dir: + output_dir = benchmark_output_dir / 'output' + + source_tree_provider = f'{{"type": "existing-dir", "dir-path": "{src_dir}"}}' + benchmark_command = ['./mamul1', f'{matrix_size}', f'{num_loops}'] + + cmake_options = [ + '-DCMAKE_BUILD_TYPE=Release', # build in release mode for highest performance + ] + + env_vars_bash_commands = '' + if compiler_id == 'ifort': + env_vars_bash_commands = 'module load compilers/ifort/latest' + cmake_options.append('-DCMAKE_Fortran_COMPILER=ifort') # use intel fortran compiler + cmake_options.append('-DBLA_VENDOR=Intel10_64lp') # use 64 bits intel mkl with multithreading + elif compiler_id == 'gfortran': + env_vars_bash_commands = '' + cmake_options.append('-DCMAKE_Fortran_COMPILER=gfortran') # use gfortran compiler + else: + assert f'unhandled compiler_id : {compiler_id}' + + shell_command = '' + if len(env_vars_bash_commands) > 0: + shell_command += f'{env_vars_bash_commands} && ' + shell_command += f'starbench --source-tree-provider \'{source_tree_provider}\' --num-cores {num_cores} --output-dir={output_dir} --cmake-path=/usr/bin/cmake {" ".join([f"--cmake-option={option}" for option in cmake_options])} --benchmark-command=\'{" ".join(benchmark_command)}\'' + subprocess.run(shell_command, shell=True, check=True, encoding='/bin/bash') diff --git a/iprbench/hibench/showresults.py b/iprbench/benchmarks/showresults.py similarity index 100% rename from iprbench/hibench/showresults.py rename to iprbench/benchmarks/showresults.py diff --git a/iprbench/hibench/hibenchonphysix.py b/iprbench/clusterbench.py similarity index 75% rename from iprbench/hibench/hibenchonphysix.py rename to iprbench/clusterbench.py index eaea373..5d5e232 100755 --- a/iprbench/hibench/hibenchonphysix.py +++ b/iprbench/clusterbench.py @@ -1,8 +1,10 @@ #!/usr/bin/env python3 # this script launches jobs to run hibridon benchmarks on physix cluster for the given version of hibridon (commit number) from typing import List, Tuple, Dict -from argparse import ArgumentParser +import argparse from os import getenv, makedirs +from .core import IBenchmark, BenchmarkConfig, BenchmarkId +from .main import BenchmarkFactory import shutil from pathlib import Path import subprocess @@ -10,6 +12,8 @@ import re import logging import importlib.resources import venv +import json +import abc HostFqdn = str # eg 'physix90.ipr.univ-rennes1.fr' @@ -63,9 +67,8 @@ class ClusterNodeDb: cluster_nodes_defs: List[ClusterNodeDef] cpu_defs: Dict[str, int] - def __init__(self): + def __init__(self, cluster_id='alambix'): self.cluster_nodes_defs = [] - cluster_id = 'alambix' include_multiqueue_nodes = False # at the moment hibench only works on nodes that have all their cores in the same queue if cluster_id == 'alambix': self.add_cluster_node_def(ClusterNodeDef('alambix50.ipr.univ-rennes.fr', 'intel_xeon_x5650', 2)) @@ -135,8 +138,13 @@ class ClusterNodeDb: self.add_cluster_node_def(ClusterNodeDef('physix100.ipr.univ-rennes1.fr', 'intel_xeon_gold_6248r', 2)) self.add_cluster_node_def(ClusterNodeDef('physix101.ipr.univ-rennes1.fr', 'intel_xeon_gold_6248r', 2)) self.add_cluster_node_def(ClusterNodeDef('physix102.ipr.univ-rennes1.fr', 'intel_xeon_gold_6248r', 2)) + elif cluster_id == 'dummy': + self.add_cluster_node_def(ClusterNodeDef('graffy-ws2.ipr.univ-rennes.fr', 'intel_core_i5_8350u', 1)) + else: + assert False self.cpu_defs = {} + self.add_cpu_def(CpuDef('intel_core_i5_8350u', 4)) self.add_cpu_def(CpuDef('intel_xeon_x5550', 4)) self.add_cpu_def(CpuDef('intel_xeon_x5650', 6)) self.add_cpu_def(CpuDef('intel_xeon_e5-2660', 8)) @@ -169,6 +177,62 @@ class ClusterNodeDb: return (hosts, num_cores) +class ICluster(abc.ABC): + cluster_db: ClusterNodeDb + + def __init__(self, cluster_db: ClusterNodeDb): + self.cluster_db = cluster_db + + @abc.abstractmethod + def path_is_reachable_by_compute_nodes(self, path: Path): + pass + + @abc.abstractmethod + def submit_job(self, qsub_args: List[str], exec_path: Path, exec_args: List[str], working_dir: Path): + """ + qsub_args: the arguments sent to qsub, eg ['-pe', 'smp', '12', 'gaussian.job', 'h2o.gjf'] + """ + + def get_cluster_db(self) -> ClusterNodeDb: + return self.cluster_db + + +class IprCluster(ICluster): + + def __init__(self): + super().__init__(ClusterNodeDb('alambix')) + + def path_is_reachable_by_compute_nodes(self, path: Path): + path_is_reachable = False + for shared_disk_path in [Path('/opt/ipr/cluster/work.global')]: + try: + _ = path.relative_to(shared_disk_path) + except ValueError: + continue + path_is_reachable = True + break + return path_is_reachable + + def submit_job(self, qsub_args: List[str], exec_path: Path, exec_args: List[str], working_dir: Path): + qsub_command = f'qsub {" ".join(qsub_args)} {exec_path} {" ".join(exec_args)}' + logging.debug('qsub_command = %s, working_dir=%s', qsub_command, working_dir) + subprocess.run(qsub_command, cwd=working_dir, check=True, shell=True) + + +class DummyCluster(ICluster): + + def __init__(self): + super().__init__(ClusterNodeDb('dummy')) + + def path_is_reachable_by_compute_nodes(self, path: Path): + return True + + def submit_job(self, qsub_args: List[str], exec_path: Path, exec_args: List[str], working_dir: Path): + qsub_command = f'qsub {" ".join(qsub_args)} {exec_path} {" ".join(exec_args)}' + logging.info('executing %s as a replacement of qsub_command %s, working_dir=%s', exec_path, qsub_command, working_dir) + subprocess.run(exec_path, check=True, cwd=working_dir) + + def duplicate_this_virtualenv_to(duplicate_virtualenv_path: Path): this_virtualenv_path = Path(getenv('VIRTUAL_ENV')) # eg /home/graffy/work/starbench/iprbench.git/iprbench.venv assert this_virtualenv_path.exists(), f'failed to find the root the virtual environment in use (VIRTUAL_ENV environment variable has the value {this_virtualenv_path})' @@ -188,55 +252,22 @@ def archive_this_virtualenv_to(venv_archive_path: Path, venv_hardcoded_path: Pat subprocess.run(f'tar czvf {venv_archive_path} {venv_hardcoded_path.relative_to(venv_hardcoded_path.parent)}', shell=True, check=True, cwd=venv_hardcoded_path.parent, stdout=subprocess.DEVNULL) -def launch_job_for_host_group(hibridon_version: GitCommitTag, host_group_id: HostGroupId, results_dir: Path, compiler_id: CompilerId, cmake_path: str): +def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, host_group_id: HostGroupId, results_dir: Path, cluster: ICluster, cmake_path: str): - cluster_db = ClusterNodeDb() + compiler_id: CompilerId = benchmark_config['compiler_id'] + + cluster_db = cluster.get_cluster_db() (hosts, num_cores) = cluster_db.get_host_group_info(host_group_id) if len(hosts) == 0: logging.warning('skipping benchmarks with compiler %s on architecture %s because no hosts are available for it', compiler_id, host_group_id) return - quick_test = 'arch4_quick' # about 2s on a core i5 8th generation - representative_test = 'nh3h2_qma_long' # about 10min on a core i5 8th generation - use_test_mode = True - if use_test_mode: - benchmark_test = quick_test - else: - benchmark_test = representative_test - - logging.info('using test %s for benchmarking', benchmark_test) - if benchmark_test == 'arch4_quick': - ram_per_core = '1G' - elif benchmark_test == 'nh3h2_qma_long': - ram_per_core = '2.8G' # this was enough on physix48, but maybe we can reduce more - else: - assert f'unhandled benchmark_test : {benchmark_test}' - - git_repos_url = 'https://github.com/hibridon/hibridon' - git_user = 'g-raffy' # os.environ['HIBRIDON_REPOS_USER'] - git_pass_file = f'{getenv("HOME")}/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat' - cmake_options = [ - '-DCMAKE_BUILD_TYPE=Release', # build in release mode for highest performance - '-DBUILD_TESTING=ON' # enable hibridon tests - ] - - benchmark_command = f'ctest --output-on-failure -L ^{benchmark_test}$' - - env_vars_bash_commands = '' - if compiler_id == 'ifort': - env_vars_bash_commands = 'module load compilers/ifort/latest' - cmake_options.append('-DCMAKE_Fortran_COMPILER=ifort') # use intel fortran compiler - cmake_options.append('-DBLA_VENDOR=Intel10_64lp') # use 64 bits intel mkl with multithreading - elif compiler_id == 'gfortran': - env_vars_bash_commands = '' - cmake_options.append('-DCMAKE_Fortran_COMPILER=gfortran') # use gfortran compiler - else: - assert f'unhandled compiler_id : {compiler_id}' + benchmark_config['num_cores'] = num_cores # we expect the benchmark to have the parameter num_cores makedirs(results_dir, exist_ok=True) - this_bench_dir = Path(f'{results_dir}/{hibridon_version}/{benchmark_test}/{host_group_id}/{compiler_id}') + this_bench_dir = Path(f'{results_dir}/{host_group_id}') makedirs(this_bench_dir, exist_ok=True) starbench_job_path = this_bench_dir / 'starbench.job' @@ -253,44 +284,45 @@ def launch_job_for_host_group(hibridon_version: GitCommitTag, host_group_id: Hos # create the job file (which embeds starbench.py) tags_dict = { # '': scripts_dir / 'starbench.py', + '': str(benchmark.bench_id), '': str(starbench_job_path), '': str(iprbench_venv_hardcoded_path), - '': str(job_venv_archive_path) + '': str(job_venv_archive_path), + '': json.dumps(benchmark_config).replace('"', r'\"'), + '': str(results_dir) } - with importlib.resources.path('iprbench.resources', 'starbench-template.job') as job_template_path: + logging.debug('tags_dict = %s', str(tags_dict)) + with importlib.resources.path('iprbench.resources', 'clusterbench-template.job') as job_template_path: # job_template_path = importlib.resources..files('iprbench.resources') / 'hibench' / 'starbench-template.job' substitute_tags(input_file_path=job_template_path, tags_dict=tags_dict, output_file_path=starbench_job_path) subprocess.run(['chmod', 'a+x', starbench_job_path], check=True) - command = f'{starbench_job_path} "{git_repos_url}" "{git_user}" "{git_pass_file}" "{hibridon_version}" "{" ".join(cmake_options)}" "{benchmark_command}" "{env_vars_bash_commands}" "{cmake_path}"' - logging.debug('command = %s', command) + ram_requirements = benchmark.get_ram_requirements(benchmark_config) + ram_per_core = f'{ram_requirements / num_cores / 1.e9}G' - qsub_command = 'qsub' - qsub_command += f' -pe smp {num_cores}' - qsub_command += f' -l "hostname={"|".join(hosts)}"' - qsub_command += ' -S /bin/bash' - qsub_command += ' -cwd' - qsub_command += ' -m ae' - qsub_command += f' -l mem_available={ram_per_core}' - qsub_command += ' -j y' # merge stderr file into stdout file for easier reading of history of events - qsub_command += f' -N hibench_{host_group_id}_{compiler_id}_{hibridon_version}' - qsub_command += f' {command}' - logging.debug('qsub_command = %s', qsub_command) + qsub_args = [] + qsub_args += ['-pe', 'smp', f'{num_cores}'] + qsub_args += ['-l', f'"hostname={"|".join(hosts)}"'] + qsub_args += ['-S', '/bin/bash'] + qsub_args += ['-cwd'] + qsub_args += ['-m', 'ae'] + qsub_args += ['-l', f'mem_available={ram_per_core}'] + qsub_args += ['-j', 'y'] # merge stderr file into stdout file for easier reading of history of events + qsub_args += ['-N', f'hibench_{host_group_id}'] - subprocess.run(qsub_command, cwd=this_bench_dir, check=True, shell=True) + logging.debug('qsub_args = %s', str(qsub_args)) + + exec_path = starbench_job_path + exec_args = [] + + cluster.submit_job(qsub_args, exec_path, exec_args, this_bench_dir) -def launch_perf_jobs(hibridon_version: GitCommitTag, results_dir: Path, arch_regexp: str, cmake_path: str): +def launch_perf_jobs(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, results_dir: Path, cluster: ICluster, arch_regexp: str, cmake_path: str): """ - hibridon_version: the version of hibridon to test, in the form of a valid commit number eg 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad' results_dir: where the results of the benchmark are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench) """ - compilers = [ - 'gfortran', - 'ifort' - ] - cluster_db = ClusterNodeDb() all_host_groups = cluster_db.cpu_defs.keys() @@ -298,43 +330,37 @@ def launch_perf_jobs(hibridon_version: GitCommitTag, results_dir: Path, arch_reg host_groups = [host_group for host_group in all_host_groups if re.match(arch_regexp, host_group) is not None] logging.info('requested host groups: %s', host_groups) - for compiler in compilers: - for host_group in host_groups: - launch_job_for_host_group(hibridon_version, host_group, results_dir, compiler, cmake_path) - - -def path_is_reachable_by_compute_nodes(path: Path): - path_is_reachable = False - for shared_disk_path in [Path('/opt/ipr/cluster/work.global')]: - try: - _ = path.relative_to(shared_disk_path) - except ValueError: - continue - path_is_reachable = True - break - return path_is_reachable + for host_group in host_groups: + launch_job_for_host_group(benchmark, benchmark_config, host_group, results_dir, cluster, cmake_path) def main(): logging.basicConfig(level=logging.DEBUG) - arg_parser = ArgumentParser(description='launches hibridon benchmark jobs on IPR\'s physix cluster', epilog='example:\n --commit-id a3bed1c3ccfbca572003020d3e3d3b1ff3934fad') - arg_parser.add_argument('--commit-id', type=str, required=True, help='the commit id of the version of code to benchmark') + example_text = '''example: + + %(prog)s --benchmark-id 'mamul1' --config '{"matrix_size": 1024, "num_loops":10}' --results-dir /tmp/mamul1_out + + ''' + + arg_parser = argparse.ArgumentParser(description='submits a benchmark on the compute cluster (assuming this is running from a sge cluster machine where qsub command is available)', epilog=example_text, formatter_class=argparse.RawDescriptionHelpFormatter) + arg_parser.add_argument('--benchmark-id', type=BenchmarkId, required=True, help='the benchmark id of the benchmark to perform (eg mamul1)') arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)') + arg_parser.add_argument('--config', type=str, default='cmake', help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}') arg_parser.add_argument('--arch-regexp', type=str, default='.*', help='the regular expression for the architectures the benchmark is allowed to run on (eg "intel_xeon_.*"). By defauls, all available architectures are allowed.') arg_parser.add_argument('--cmake-path', type=str, default='cmake', help='the location of the cmake command to use (eg /opt/cmake/cmake-3.23.0/bin/cmake)') args = arg_parser.parse_args() - hibridon_version = args.commit_id - - # the version of hibridon to test, in the form of a valid commit number eg 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad' - # '53894da48505892bfa05693a52312bacb12c70c9' # latest from branch master as of 10/06/2022 00:30 - # code_version='dd0f413b85cf0f727a5a4e88b2b02d75a28b377f' # latest from branch graffy-issue51 as of 10/06/2022 00:30 + benchmark_id = BenchmarkId(args.benchmark_id) + benchmark = BenchmarkFactory().create_benchmark(benchmark_id) results_dir = Path(args.results_dir) arch_regexp = args.arch_regexp cmake_path = args.cmake_path + benchmark_config = json.loads(args.config) - if not path_is_reachable_by_compute_nodes(results_dir): + cluster = DummyCluster() + + if not cluster.path_is_reachable_by_compute_nodes(results_dir): raise ValueError('the results path is expected to be on a disk that is accessible to all cluster nodes, and it doesn\'t seem to be the case for {results_dir}') - launch_perf_jobs(hibridon_version, results_dir, arch_regexp, cmake_path) + launch_perf_jobs(benchmark, benchmark_config, results_dir, cluster, arch_regexp, cmake_path) diff --git a/iprbench/core.py b/iprbench/core.py new file mode 100644 index 0000000..df502db --- /dev/null +++ b/iprbench/core.py @@ -0,0 +1,74 @@ +from typing import List, Dict, Union +from enum import Enum +import abc +from pathlib import Path + +BenchmarkId = str # a unique name for a benchmark, eg 'matmul1' +BenchParamId = str +BenchParamType = Union[int, str] +BenchmarkConfig = Dict[BenchParamId, BenchParamType] + + +class Singleton(type): + _instances = {} + + def __call__(cls, *args, **kwargs): + if cls not in cls._instances: + cls._instances[cls] = super(type(cls), cls).__call__(*args, **kwargs) # pylint: disable=bad-super-call, no-member + return cls._instances[cls] + + +class BenchParam(): + '''a parameter of a benchmark + + for example the id of the compiler, the cpu id, the size of the matrix, etc. + + ''' + class Type(Enum): + PARAM_TYPE_STRING = 0 + PARAM_TYPE_INT = 1 + + name: BenchParamId # the name of the parameter, eg 'matrix_size' + param_type: Type # the type of the parameter, eg 'PARAM_TYPE_INT' + description: str # the description of the parameter, eg 'the size n of the n*n matrix ' + + def __init__(self, name: str, param_type: Type, description: str): + self.name = name + self.param_type = param_type + self.description = description + + +class IBenchmark(abc.ABC): + + bench_id: BenchmarkId # a unique name for this benchmark, eg 'matmul1' + bench_params: List[BenchParam] + + def __init__(self, bench_id: str, bench_params: List[BenchParam]): + self.bench_id = bench_id + self.bench_params = bench_params + + @abc.abstractmethod + def get_ram_requirements(self, config: BenchmarkConfig) -> int: + """returns the ram requirements for this benchmark, in bytes + """ + + @abc.abstractmethod + def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path): + """execute the benchmark for the given config + """ + + def validate_config(self, config: BenchmarkConfig): + """checks that all benchmark parameters have been set in the given config""" + for bench_param in self.bench_params: + try: + _ = config[bench_param.name] + except KeyError: + assert False, f'failed to find the benchmark parameter {bench_param.name} in the benchmark config' + # check that all parameters in benchmark config exist as parameters for this benchmark + for param_name in config.keys(): + param_exists = False + for bench_param in self.bench_params: + if bench_param.name == param_name: + param_exists = True + break + assert param_exists, f'parameter {param_name} doesn\'t exist for benchmark {self.bench_id}' diff --git a/iprbench/main.py b/iprbench/main.py index 99c4176..7e43fb0 100644 --- a/iprbench/main.py +++ b/iprbench/main.py @@ -1 +1,47 @@ -__version__ = '0.0.1' \ No newline at end of file +from .core import BenchmarkId, IBenchmark, Singleton +from .benchmarks.hibench import HiBench +from .benchmarks.mamul1 import MaMul1 +import logging +import argparse +from pathlib import Path +import json + +__version__ = '0.0.1' + + +class BenchmarkFactory(metaclass=Singleton): + + def __init__(self): + pass + + def create_benchmark(self, bench_id: BenchmarkId) -> IBenchmark: + benchmark = { + 'hibench': HiBench(), + 'mamul1': MaMul1() + }[bench_id] + return benchmark + + +def main(): + """ + + """ + logging.basicConfig(level=logging.DEBUG) + + example_text = '''example: + + %(prog)s --benchmark-id 'mamul1' --config '{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10, "num_cores":2}' --results-dir /tmp/mamul1_out + + ''' + + arg_parser = argparse.ArgumentParser(description='executes a benchmark in a cluster job environment', epilog=example_text, formatter_class=argparse.RawDescriptionHelpFormatter) + arg_parser.add_argument('--benchmark-id', type=BenchmarkId, required=True, help='the benchmark id of the benchmark to perform (eg mamul1)') + arg_parser.add_argument('--results-dir', type=Path, required=True, help='the root directory of the tree where the results of the benchmarks are stored (eg $GLOBAL_WORK_DIR/graffy/benchmarks/hibench)') + arg_parser.add_argument('--config', type=str, default='cmake', help='the benchmark configuration in json format, eg {"compiler_id": "gfortran", "matrix_size": 1024}') + + args = arg_parser.parse_args() + benchmark_id = BenchmarkId(args.benchmark_id) + benchmark = BenchmarkFactory().create_benchmark(benchmark_id) + benchmark_config = json.loads(args.config) + benchmark.validate_config(benchmark_config) + benchmark.execute(benchmark_config, args.results_dir) diff --git a/tests/__init__.py b/iprbench/resources/__init__.py similarity index 100% rename from tests/__init__.py rename to iprbench/resources/__init__.py diff --git a/iprbench/resources/starbench-template.job b/iprbench/resources/clusterbench-template.job similarity index 55% rename from iprbench/resources/starbench-template.job rename to iprbench/resources/clusterbench-template.job index 739cb15..b29fea8 100644 --- a/iprbench/resources/starbench-template.job +++ b/iprbench/resources/clusterbench-template.job @@ -1,13 +1,5 @@ #!/usr/bin/env bash -# this job file is a template file for starbench jobs -git_repos_url="$1" # eg "https://github.com/hibridon/hibridon" -git_user="$2" # eg 'g-raffy' -git_pass_file="$3" # eg "$HOME/.github/personal_access_tokens/bench.hibridon.cluster.ipr.univ-rennes1.fr.pat" -code_version="$4" # git branch id or commit id eg : 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad' -cmake_options="$5" # eg '-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON' -benchmark_command="$6" # eg 'ctest -L ^arch4_quick$' -env_vars_bash_commands="$7" # defines extra environment variables prior to launch starbench. eg "export MKLROOT=/opt/intel/compilers_and_libraries_2020.1.217/linux/mkl" -cmake_path="$8" # eg '/opt/cmake/cmake-3.23.0/bin/cmake' +# this job file is a template file for clusterbench jobs executed_by_sge='' if [ "${JOB_ID}" = '' ] @@ -37,7 +29,7 @@ iprbench_venv_parent=$(dirname "$iprbench_venv_path") iprbench_venv_archive_path='' echo "unarchiving virtual environment ${iprbench_venv_archive_path} to ${iprbench_venv_parent}" pushd "${iprbench_venv_parent}" - tar xzvf "${iprbench_venv_archive_path}" + tar xzvf "${iprbench_venv_archive_path}" > /dev/null popd if [ ! -d "${iprbench_venv_path}" ] then @@ -61,34 +53,9 @@ num_cores=${NSLOTS} # set environment variables -echo "env_vars_bash_commands=$env_vars_bash_commands" -eval $env_vars_bash_commands -# launch starbench - -strUr1ProxyUrl='http://proxy-nt.univ-rennes1.fr:3128' -strProxyVars='' -strProxyVars="$strProxyVars HTTP_PROXY=$strUr1ProxyUrl" -strProxyVars="$strProxyVars HTTPS_PROXY=$strUr1ProxyUrl" -strProxyVars="$strProxyVars FTP_PROXY=$strUr1ProxyUrl" -strProxyVars="$strProxyVars http_proxy=$strUr1ProxyUrl" -strProxyVars="$strProxyVars https_proxy=$strUr1ProxyUrl" -strProxyVars="$strProxyVars ftp_proxy=$strUr1ProxyUrl" - -command="$strProxyVars starbench" -command="${command} --git-repos-url ${git_repos_url}" -command="${command} --git-user ${git_user}" -command="${command} --git-pass-file ${git_pass_file}" -command="${command} --num-cores ${num_cores}" -command="${command} --output-dir ${output_dir}" -command="${command} --code-version ${code_version}" -command="${command} --cmake-path ${cmake_path}" -# echo "cmake_options: @$cmake_options@" -for cmake_option in ${cmake_options} -do - command="${command} --cmake-option=${cmake_option}" -done -command="${command} --benchmark-command=\"${benchmark_command}\"" +# launch the benchmark +command="iprbench-run --benchmark-id '' --config '' --results-dir '${output_dir}'" echo "command: ${command}" eval ${command} diff --git a/pyproject.toml b/pyproject.toml index cb97dff..8729a23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,8 @@ dependencies = [ "sqlalchemy", # "cocluto >= 1.2" # "cocluto@git+https://git.ipr.univ-rennes.fr/cellinfo/cocluto" - "starbench@git+https://github.com/g-raffy/starbench" + "starbench >= 1.0.1" +# "starbench@git+https://github.com/g-raffy/starbench" ] requires-python = ">= 3.8" authors = [ @@ -23,14 +24,15 @@ authors = [ ] [project.scripts] -hibenchonphysix = "iprbench.hibench.hibenchonphysix:main" -showresults = "iprbench.hibench.showresults:main" +clusterbench-submit = "iprbench.clusterbench:main" +iprbench-run = "iprbench.main:main" +showresults = "iprbench.benchmarks.showresults:main" [project.urls] Repository = "https://github.com/g-raffy/starbench" [tool.setuptools] -packages = ["iprbench", "iprbench.hibench"] +packages = ["iprbench", "iprbench.benchmarks"] [tool.setuptools.dynamic] version = {attr = "iprbench.main.__version__"} diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/mamul1/CMakeLists.txt b/test/mamul1/CMakeLists.txt new file mode 100644 index 0000000..80095ea --- /dev/null +++ b/test/mamul1/CMakeLists.txt @@ -0,0 +1,43 @@ + +enable_language (Fortran) + +set(MAMUL1_USE_MAGMA "OFF" CACHE BOOL "if set, mamul1 build uses magma (matrix algebra on gpu)") + +set(MAMUL1_MAGMA_API "CPU_MEM_API" CACHE STRING "which magma API to use when building mamul1: CPU_MEM_API for BLAS compatible API (uses matrices stored on CPU memory) or GPU_MEM_API (use matrices stored on GPU memory)") + +add_executable(mamul1 mamul1.F90) + +if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + # Allow arbitrary long lines. Needed as preprocessing could generate long line lengths. + target_compile_options(mamul1 PUBLIC -ffree-line-length-none) +elseif (Fortran_COMPILER_NAME STREQUAL "ifort") + # Intel (ifort) + target_compile_options(mamul1 PUBLIC -no-wrap-margin) +endif() + + +if (MAMUL1_USE_MAGMA) + find_package( MAGMA REQUIRED ) + if( MAMUL1_MAGMA_API STREQUAL "CPU_MEM_API" ) + target_compile_definitions(mamul1 PUBLIC USE_MAGMA_DGEMM) + elseif( MAMUL1_MAGMA_API STREQUAL "GPU_MEM_API" ) + target_compile_definitions(mamul1 PUBLIC USE_MAGMA_DGEMM_GPU) + else() + message(FATAL_ERROR "unexpected value for MAMUL1_MAGMA_API : ${MAMUL1_MAGMA_API}") + endif() + message(STATUS "MAGMA_INCLUDES=${MAGMA_INCLUDES}") + include_directories("${MAGMA_INCLUDES}") + target_link_libraries(mamul1 "${MAGMA_LIBRARIES}") +else() + find_package( BLAS REQUIRED ) + find_package( LAPACK REQUIRED ) + # message("BLAS_LIBRARIES=${BLAS_LIBRARIES}") + # message("LAPACK_LIBRARIES=${LAPACK_LIBRARIES}") + target_compile_definitions(mamul1 PUBLIC USE_DGEMM) + + # Link Blas and Lapack libraries + target_link_libraries(mamul1 "${LAPACK_LIBRARIES}") + target_link_libraries(mamul1 "${BLAS_LIBRARIES}") +endif() + +install(TARGETS mamul1) diff --git a/test/mamul1/mamul1.F90 b/test/mamul1/mamul1.F90 new file mode 100644 index 0000000..c31447b --- /dev/null +++ b/test/mamul1/mamul1.F90 @@ -0,0 +1,339 @@ +#define MAMUL1_VERSION "1.0.0" + +#define magma_devptr_t integer(kind=8) +subroutine print_usage(prog_path) + character(len=*), intent(in) :: prog_path + character(len=80) :: build_variant +#if defined(USE_MAGMA_DGEMM_GPU) + build_variant='gpu' +#elif defined(USE_DGEMM) + build_variant='cpu' +#else + build_variant='unknown' +#endif + write(6,'("mamul1 v",a," (variant:",a,"): benchmark performs a square matrix multiplication in double precision")') MAMUL1_VERSION, trim(build_variant); + write(6,'()'); + write(6,'("Usage: ",a," ")') trim(prog_path); + write(6,'(" positive integer representing the size of the square matrices to multiply ")'); + write(6,'(" positive integer representing the number of times the multiplication is performed")'); +end subroutine + +program mamul1 + +implicit none + + +integer :: argc, info, ndim, num_loops + +character(len=32) :: arg0, arg1, arg2 + + +call get_command_argument(0,arg0) + +argc = command_argument_count() +if (argc /= 2) then + call print_usage(trim(arg0)) + ! write(6,'("Usage: ",a," NDIM NUM_LOOPS, where NDIM is a positive integer")') trim(arg0); + stop +end if + +call get_command_argument(1,arg1,status=info) +if (info /= 0) then + write(6,'("Error reading argument: info = ",i2)') info + call print_usage(trim(arg0)) +stop +end if + +call get_command_argument(2,arg2,status=info) +if (info /= 0) then + write(6,'("Error reading argument: info = ",i2)') info + call print_usage(trim(arg0)) +stop +end if + +read(arg1,*,iostat=info) ndim +if (info /= 0) then + write(6,'("Error converting ndim argument to integer: info = ",i2)') info + call print_usage(trim(arg0)) +stop +end if + +read(arg2,*,iostat=info) num_loops +if (info /= 0) then + write(6,'("Error converting num_loops argument to integer: info = ",i2)') info + call print_usage(trim(arg0)) +stop +end if + + +if (ndim < 1) then + call print_usage(trim(arg0)) +stop +end if + + call test_dgemm(ndim, num_loops) + +stop +end program mamul1 + +subroutine set_random_seed(seed) + integer :: seed + integer :: seed_array_size + INTEGER, ALLOCATABLE :: seed_array (:) + CALL RANDOM_SEED (SIZE = seed_array_size) ! I is set to the size of + ! ! the seed array + ALLOCATE (seed_array(seed_array_size)) + seed_array = seed + CALL RANDOM_SEED (PUT=seed_array(1:seed_array_size)) +end subroutine + +subroutine print_matrix(mat, ndim) + implicit none + integer, parameter :: dp = kind(1.0d0) + real(dp), intent(in) :: mat(ndim, ndim) + integer, intent(in) :: ndim + integer :: irow + do irow = 1, ndim + write(6, *) mat(irow,:) + end do +end subroutine + +! square matrix multiplication +subroutine sqmatmul(amat, bmat, cmat, ndim) +#if defined(USE_MAGMA_DGEMM_GPU) + use magma, only: magmaf_init, magmaf_finalize + use magma, only: magmaf_queue_create, magmaf_queue_destroy + use magma, only: magmaf_dmalloc, magmaf_free + use magma, only: magmaf_dsetmatrix, magmaf_dgetmatrix + use magma, only: magmablasf_dgemm +#endif + real*8, intent(in) :: amat(ndim,ndim) + real*8, intent(in) :: bmat(ndim,ndim) + real*8, intent(out) :: cmat(ndim,ndim) + integer :: lda, ldb, ldc + integer :: info + + real :: time_before, time_after + integer(8) :: num_ops + real :: gflops + +#ifdef USE_MAGMA_DGEMM_GPU + magma_devptr_t :: d_amat + magma_devptr_t :: d_bmat + magma_devptr_t :: d_cmat + magma_devptr_t :: queue !! really a CPU pointer +#endif + lda = ceiling(real(ndim)/32)*32 + ldb = ceiling(real(ndim)/32)*32 + ldc = ceiling(real(ndim)/32)*32 + + +#if defined(USE_MAGMA_DGEMM_GPU) + !! allocate GPU memory + write(6,'("DEBUG: before matrix A gpu memory allocation (",i0," doubles)")') lda * ndim + info = magmaf_dmalloc( d_amat, lda*ndim ) + if (d_amat == 0) then + print "(a)", "failed to allocate d_amat" + return + endif + write(6,'("DEBUG: before matrix B gpu memory allocation (",i0," doubles)")') ldb * ndim + info = magmaf_dmalloc( d_bmat, ldb*ndim ) + if (d_bmat == 0) then + print "(a)", "failed to allocate d_bmat" + return + endif + write(6,'("DEBUG: before matrix C gpu memory allocation (",i0," doubles)")') ldc * ndim + info = magmaf_dmalloc( d_cmat, ldc*ndim ) + if (d_cmat == 0) then + print "(a)", "failed to allocate d_cmat" + return + endif + + ! copy A to dA and B to dB + call magmaf_queue_create( 0, queue ) + write(6,'("DEBUG: queue = ",i0)') queue + if (queue == 0) then + print "(a)", "failed to create a queue" + return + endif + + write(6,*) 'DEBUG: copying matrix A from CPU to GPU memory' + call magmaf_dsetmatrix( ndim, ndim, amat, ndim, d_amat, lda, queue ) + write(6,*) 'DEBUG: copying matrix B from CPU to GPU memory' + call magmaf_dsetmatrix( ndim, ndim, bmat, ndim, d_bmat, ldb, queue ) + + call cpu_time(time_before) + write (6,*) 'before magmablasf_dgemm, time=', time_before + + call magmablasf_dgemm ('N', 'N', ndim, ndim, ndim, 1.0d0, d_amat, lda, d_bmat, ldb, 0.0d0, d_cmat, ldc, queue) + call magmaf_queue_sync(queue) + + call cpu_time(time_after) + num_ops = real(ndim) * real(ndim) * real(ndim) * 2 + gflops = num_ops / (time_after - time_before) / 1.0e9 + write (6,*) 'after magmablasf_dgemm, time=', time_after + write (6,*) 'magmablasf_dgemm (from gpu memory to gpu memory) duration :', (time_after - time_before), '(', gflops, ' gflops)' + + write(6,*) 'DEBUG: copying matrix C from GPU to CPU memory' + call magmaf_dgetmatrix( ndim, ndim, d_cmat, ldc, cmat, ndim, queue ) + call magmaf_queue_destroy( queue ) + + info = magmaf_free(d_cmat) + info = magmaf_free(d_bmat) + info = magmaf_free(d_amat) + +#endif + +#ifdef USE_DGEMM + ! subroutine dgemm ( character TRANSA, + ! character TRANSB, + ! integer M, + ! integer N, + ! integer K, + ! double precision ALPHA, + ! double precision, dimension(lda,*) A, + ! integer LDA, + ! double precision, dimension(ldb,*) B, + ! integer LDB, + ! double precision BETA, + ! double precision, dimension(ldc,*) C, + ! integer LDC + ! ) + call dgemm('N', 'N', ndim, ndim, ndim, 1.0d0, amat, ndim, bmat, ndim, 0.0d0, cmat, ndim) +#endif + +end subroutine + +subroutine check_cmat_element(cmat, row, col, amat, bmat, ndim) + real(8), intent(in) :: cmat(ndim, ndim) + integer, intent(in) :: row + integer, intent(in) :: col + real(8), intent(in) :: amat(ndim, ndim) + real(8), intent(in) :: bmat(ndim, ndim) + integer, intent(in) :: ndim + + real(8) :: x + x = 0.0d0 + do i = 1, ndim + x = x + amat(row, i) * bmat(i, col) + end do + + write(6, '("expected cmat(", i0, ", ", i0, ")", e23.15e3)') row, col, x + write(6, '("computed cmat(", i0, ", ", i0, ")", e23.15e3)') row, col, cmat(row, col) + if (abs(cmat(row, col) - x) > 1.0e-8) then + stop 'a computed element has a wrong value' + end if +end subroutine + + +subroutine test_dgemm(ndim, num_loops) +#if defined(USE_MAGMA_DGEMM_GPU) + use magma, only: magmaf_init, magmaf_finalize + use magma, only: magmablasf_dgemm !, magmaf_dgemm_gpu +#endif + + implicit none + integer, intent(in) :: ndim + integer, intent(in) :: num_loops + integer, parameter :: dp = kind(1.0d0) + real :: ct_start, ct_stop ! elapsed cpu time relative to an arbitrary fixed time. Expressed in seconds with the granularity of 1 microsecond + integer(8) :: num_ops + real :: gflops + + integer :: sc_start, sc_stop ! system clock time of start and stop events, expressed in ticks + integer :: sc_count_rate ! number of system clock ticks per second + integer :: sc_count_max ! the max possible number of system clock ticks returned by system_clock + integer :: s + REAL :: a_diff, diff + REAL :: num_sc_ticks_per_second ! the number of system clock ticks per second + + real*8, allocatable :: amat(:,:) + real*8, allocatable :: bmat(:,:) + real*8, allocatable :: cmat(:,:) + real(dp) :: x + integer :: i, j + +#if defined(USE_MAGMA_DGEMM_GPU) + write(6,*) 'DEBUG: init magma' + call magmaf_init() +#endif + + ! First initialize the system_clock + CALL system_clock(count_rate=sc_count_rate) + CALL system_clock(count_max=sc_count_max) + num_sc_ticks_per_second = REAL(sc_count_rate) + WRITE(*,*) "system_clock rate : ", num_sc_ticks_per_second, " ticks per second" + + diff = 0.0 + a_diff = 0.0 + s = 0 + + allocate(amat(ndim, ndim)) + allocate(bmat(ndim, ndim)) + allocate(cmat(ndim, ndim)) + + call set_random_seed(42) + + !call random_number(amat) + !amat = 0.5_dp*(amat + transpose(amat)) + do j = 1, ndim + do i = 1, ndim + call random_number(x) + amat(i,j) = x + call random_number(x) + bmat(i,j) = x + end do + end do + + call cpu_time(ct_start) + call system_clock(sc_start) + + do j = 1, num_loops + ! playmat = amat + + call sqmatmul(amat, bmat, cmat, ndim) + + end do + + call cpu_time(ct_stop) + call system_clock(sc_stop) + if ( (sc_stop - sc_start)/num_sc_ticks_per_second < (ct_stop - ct_start) ) s = s + 1 + diff = (sc_stop - sc_start)/num_sc_ticks_per_second - (ct_stop - ct_start) + diff + a_diff = ABS((sc_stop - sc_start)/num_sc_ticks_per_second - (ct_stop - ct_start)) + a_diff + + ! check one of the elements of cmat (the last one here: cmat(ndim, ndim)) + call check_cmat_element(cmat, 1, 1, amat, bmat, ndim) + call check_cmat_element(cmat, 1, ndim, amat, bmat, ndim) + call check_cmat_element(cmat, ndim, 1, amat, bmat, ndim) + call check_cmat_element(cmat, ndim, ndim, amat, bmat, ndim) + + ! write(6, *) 'amat = ' + ! call print_matrix(amat, ndim) + + ! write(6, *) 'bmat = ' + ! call print_matrix(bmat, ndim) + + ! write(6, *) 'cmat = ' + ! call print_matrix(cmat, ndim) + + num_ops = real(ndim) * real(ndim) * real(ndim) * 2 * num_loops + gflops = num_ops / (ct_stop-ct_start) / 1.0e9 + + + write(6, '("Time taken by dgemm for matrix size ",i8," was ",f10.2," seconds")') ndim, ct_stop-ct_start + WRITE(*,*) "gflops (including potential memory transfers) : ", gflops + + WRITE(*,*) "system_clock : ",(sc_stop - sc_start)/num_sc_ticks_per_second + WRITE(*,*) "cpu_time : ",(ct_stop - ct_start) + WRITE(*,*) "sys_clock < cpu_time : ",s + WRITE(*,*) "mean diff : ",diff + WRITE(*,*) "abs mean diff : ",a_diff + +#if defined(USE_MAGMA_DGEMM_GPU) + write(6,*) 'DEBUG: deinit magma' + call magmaf_finalize() +#endif + + + deallocate(amat, bmat, cmat) + end diff --git a/test/test_clusterbench.py b/test/test_clusterbench.py new file mode 100644 index 0000000..311112d --- /dev/null +++ b/test/test_clusterbench.py @@ -0,0 +1,30 @@ +import unittest +import logging +import subprocess +# import importlib.resources + + +class ClusterBenchTestCase(unittest.TestCase): + + logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + + def setUp(self) -> None: # pylint: disable=useless-parent-delegation + return super().setUp() + + def test_clusterbench_submit(self): + logging.info('test_clusterbench_submit') + # with importlib.resources.path('iprbench.resources', 'clusterbench-template.job') as job_template_path: + # print(job_template_path) + # assert False + # subprocess.run('pip list', shell=True, check=True, executable='/bin/bash') + command = 'clusterbench-submit --arch-regexp "intel_core.*" --benchmark-id \'mamul1\' --config \'{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10}\' --results-dir /tmp/mamul1_out' + subprocess.run(command, shell=True, check=True, executable='/bin/bash') + + # def test_clusterbench_hibench(self): + # logging.info('test_clusterbench_hibench') + # command = 'clusterbench-submit --benchmark-id \'hibench\' --config \'{"compiler_id": "gfortran", "test_id": "arch4_quick"}\' --results-dir /tmp/mamul1_out' + # subprocess.run(command, shell=True, check=True, executable='/bin/bash') + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_iprbench.py b/test/test_iprbench.py new file mode 100644 index 0000000..355290e --- /dev/null +++ b/test/test_iprbench.py @@ -0,0 +1,27 @@ +import unittest +import logging +import subprocess +# import importlib.resources + + +class IprBenchTestCase(unittest.TestCase): + + logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + + def setUp(self) -> None: # pylint: disable=useless-parent-delegation + return super().setUp() + + def test_iprbench_run(self): + logging.info('test_iprbench_run') + # with importlib.resources.path('iprbench.resources', 'mamul1') as src_dir: + # with open(src_dir / 'mamul1.F90', encoding='utf8') as f: + # print(f.readlines()) + # with open(src_dir / 'CMakeLists.txt', encoding='utf8') as f: + # print(f.readlines()) + # subprocess.run(f'cat {src_dir / "CMakeLists.txt"}', check=True) + command = 'iprbench-run --benchmark-id \'mamul1\' --config \'{"compiler_id": "gfortran", "matrix_size": 1024, "num_loops":10, "num_cores":2}\' --results-dir /tmp/mamul1_out' + subprocess.run(command, shell=True, check=True, executable='/bin/bash') + + +if __name__ == '__main__': + unittest.main()