iprbench/iprbench/benchmarks/mamul1.py

79 lines
4.2 KiB
Python

from ..core import IBenchmark, BenchParam, BenchmarkConfig, BenchmarkMeasurements, ITargetHost # , Package # PackageVariant
from pathlib import Path
import pandas as pd
import subprocess
from iprbench.util import extract_resource_dir
class MaMul1(IBenchmark):
"""Matrix multiplication benchmark
"""
def __init__(self):
bench_params = []
bench_params.append(BenchParam('fortran_compiler', BenchParam.Type.PARAM_TYPE_PACKAGE, 'the compiler used in the benchmark'))
bench_params.append(BenchParam('num_cores', BenchParam.Type.PARAM_TYPE_INT, 'the number of cores to use by this benchmark'))
bench_params.append(BenchParam('matrix_size', BenchParam.Type.PARAM_TYPE_INT, 'the size n of all the the n * n matrices'))
bench_params.append(BenchParam('num_loops', BenchParam.Type.PARAM_TYPE_INT, 'the number of identical multiplications performed in sequence'))
# bench_params.append(BenchParam('source_dir', BenchParam.Type.PARAM_TYPE_STRING, 'the path to the directory containing mamul1 test source files'))
out_params = []
out_params.append(BenchParam('duration', BenchParam.Type.PARAM_TYPE_FLOAT, 'the average duration of one matrix multiplication, in seconds'))
super().__init__(bench_id='mamul1', bench_params=bench_params, out_params=out_params)
def get_ram_requirements(self, config: BenchmarkConfig) -> int:
GIBIBYTE_TO_BYTE = 1024 * 1024 * 1024
SIZE_OF_DOUBLE = 8 # in bytes
matrix_size = config['matrix_size']
matrix_ram_size = matrix_size * matrix_size * SIZE_OF_DOUBLE
num_matrices = 3
ram_requirements = int(1 * GIBIBYTE_TO_BYTE) + num_matrices * matrix_ram_size
return ram_requirements
def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path, target_host: ITargetHost) -> BenchmarkMeasurements:
fortran_compiler = config['fortran_compiler']
num_cores = config['num_cores']
matrix_size = config['matrix_size']
num_loops = config['num_loops']
# extract the mamul1 source code tree from iprbench's resources
mamul1_source_code_root_path = benchmark_output_dir / 'mamul1'
extract_resource_dir('iprbench.resources', 'mamul1', dest_path=mamul1_source_code_root_path)
output_dir = benchmark_output_dir / 'output'
source_tree_provider = f'{{"type": "existing-dir", "dir-path": "{mamul1_source_code_root_path}"}}'
benchmark_command = ['./mamul1', f'{matrix_size}', f'{num_loops}']
cmake_options = [
'-DCMAKE_BUILD_TYPE=Release', # build in release mode for highest performance
]
env_vars_bash_commands = target_host.get_package_activation_command(fortran_compiler.package_id, fortran_compiler.package_version)
cmake_options.append(f'-DCMAKE_Fortran_COMPILER={fortran_compiler.package_id}')
if fortran_compiler.package_id == 'ifort':
cmake_options.append('-DBLA_VENDOR=Intel10_64lp') # use 64 bits intel mkl with multithreading
elif fortran_compiler.package_id == 'gfortran':
pass
else:
assert f'unhandled fortran_compiler_id : {fortran_compiler.package_id}'
output_measurements_file_path = output_dir / "measurements.tsv"
shell_command = ''
if len(env_vars_bash_commands) > 0:
shell_command += f'{env_vars_bash_commands} && '
shell_command += f'starbench --source-tree-provider \'{source_tree_provider}\' --num-cores {num_cores} --output-dir={output_dir} --cmake-path=/usr/bin/cmake {" ".join([f"--cmake-option={option}" for option in cmake_options])} --benchmark-command=\'{" ".join(benchmark_command)}\' --output-measurements={output_measurements_file_path}'
subprocess.run(shell_command, shell=True, check=True, encoding='/bin/bash')
measurements: BenchmarkMeasurements = {}
df = pd.read_csv(output_measurements_file_path, sep='\t')
selected_rows = df[df['worker_id'] == '<average>']
assert len(selected_rows) == 1
row = selected_rows.loc[0]
duration = row["duration"]
measurements['duration'] = duration
return measurements
# def get_measurements(self, benchmark_output_dir: Path) -> BenchmarkMeasurements:
# raise NotImplementedError()