from ..core import IBenchmark, BenchParam, BenchmarkConfig, BenchmarkMeasurements, ITargetHost # , Package # PackageVariant from pathlib import Path import pandas as pd import subprocess from iprbench.util import extract_resource_dir class MaMul1(IBenchmark): """Matrix multiplication benchmark """ def __init__(self): bench_params = [] bench_params.append(BenchParam('fortran_compiler', BenchParam.Type.PARAM_TYPE_PACKAGE, 'the compiler used in the benchmark')) bench_params.append(BenchParam('num_cores', BenchParam.Type.PARAM_TYPE_INT, 'the number of cores to use by this benchmark')) bench_params.append(BenchParam('matrix_size', BenchParam.Type.PARAM_TYPE_INT, 'the size n of all the the n * n matrices')) bench_params.append(BenchParam('num_loops', BenchParam.Type.PARAM_TYPE_INT, 'the number of identical multiplications performed in sequence')) # bench_params.append(BenchParam('source_dir', BenchParam.Type.PARAM_TYPE_STRING, 'the path to the directory containing mamul1 test source files')) out_params = [] out_params.append(BenchParam('duration', BenchParam.Type.PARAM_TYPE_FLOAT, 'the average duration of one matrix multiplication, in seconds')) super().__init__(bench_id='mamul1', bench_params=bench_params, out_params=out_params) def get_ram_requirements(self, config: BenchmarkConfig) -> int: GIBIBYTE_TO_BYTE = 1024 * 1024 * 1024 SIZE_OF_DOUBLE = 8 # in bytes matrix_size = config['matrix_size'] matrix_ram_size = matrix_size * matrix_size * SIZE_OF_DOUBLE num_matrices = 3 ram_requirements = int(1 * GIBIBYTE_TO_BYTE) + num_matrices * matrix_ram_size return ram_requirements def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path, target_host: ITargetHost) -> BenchmarkMeasurements: fortran_compiler = config['fortran_compiler'] num_cores = config['num_cores'] matrix_size = config['matrix_size'] num_loops = config['num_loops'] # extract the mamul1 source code tree from iprbench's resources mamul1_source_code_root_path = benchmark_output_dir / 'mamul1' extract_resource_dir('iprbench.resources', 'mamul1', dest_path=mamul1_source_code_root_path) output_dir = benchmark_output_dir / 'output' source_tree_provider = f'{{"type": "existing-dir", "dir-path": "{mamul1_source_code_root_path}"}}' benchmark_command = ['./mamul1', f'{matrix_size}', f'{num_loops}'] cmake_options = [ '-DCMAKE_BUILD_TYPE=Release', # build in release mode for highest performance ] env_vars_bash_commands = target_host.get_package_activation_command(fortran_compiler.package_id, fortran_compiler.package_version) cmake_options.append(f'-DCMAKE_Fortran_COMPILER={fortran_compiler.package_id}') if fortran_compiler.package_id == 'ifort': cmake_options.append('-DBLA_VENDOR=Intel10_64lp') # use 64 bits intel mkl with multithreading elif fortran_compiler.package_id == 'gfortran': pass else: assert f'unhandled fortran_compiler_id : {fortran_compiler.package_id}' output_measurements_file_path = output_dir / "measurements.tsv" shell_command = '' if len(env_vars_bash_commands) > 0: shell_command += f'{env_vars_bash_commands} && ' shell_command += f'starbench --source-tree-provider \'{source_tree_provider}\' --num-cores {num_cores} --output-dir={output_dir} --cmake-path=/usr/bin/cmake {" ".join([f"--cmake-option={option}" for option in cmake_options])} --benchmark-command=\'{" ".join(benchmark_command)}\' --output-measurements={output_measurements_file_path}' subprocess.run(shell_command, shell=True, check=True, encoding='/bin/bash') measurements: BenchmarkMeasurements = {} df = pd.read_csv(output_measurements_file_path, sep='\t') selected_rows = df[df['worker_id'] == ''] assert len(selected_rows) == 1 row = selected_rows.loc[0] duration = row["duration"] measurements['duration'] = duration return measurements # def get_measurements(self, benchmark_output_dir: Path) -> BenchmarkMeasurements: # raise NotImplementedError()