- added support for package params. As a result it is now possible to choose the fortran compiler version (implemented in mamul1 and hibench).

work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3958]
This commit is contained in:
Guillaume Raffy 2024-11-19 21:37:55 +01:00
parent a5b5345d39
commit 7fd25890ec
11 changed files with 162 additions and 38 deletions

View File

@ -2,7 +2,7 @@ import pandas as pd
from pathlib import Path
import subprocess
import shutil
from ..core import IBenchmark, BenchParam, BenchmarkConfig, BenchmarkMeasurements
from ..core import IBenchmark, BenchParam, BenchmarkConfig, BenchmarkMeasurements, ITargetHost
from ..util import get_proxy_env_vars
@ -14,7 +14,7 @@ class HiBench(IBenchmark):
bench_params = []
bench_params.append(BenchParam('num_cores', BenchParam.Type.PARAM_TYPE_INT, 'the number of cores to use by this benchmark'))
bench_params.append(BenchParam('hibridon_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of hibridon, in the form of a commit id'))
bench_params.append(BenchParam('compiler_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the compiler used in the benchmark'))
bench_params.append(BenchParam('fortran_compiler', BenchParam.Type.PARAM_TYPE_PACKAGE, 'the compiler used in the benchmark'))
# bench_params.append(BenchParam('compiler_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of the used compiler'))
# bench_params.append(BenchParam('blas_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the blas library used in the benchmark'))
# bench_params.append(BenchParam('blas_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of the blas library used in the benchmark'))
@ -38,12 +38,12 @@ class HiBench(IBenchmark):
assert f'unhandled benchmark_test : {benchmark_test}'
return ram_per_core
def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path) -> BenchmarkMeasurements:
def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path, target_host: ITargetHost) -> BenchmarkMeasurements:
git_repos_url = 'https://github.com/hibridon/hibridon'
hibridon_version = config['hibridon_version']
test_id = config['test_id'] # eg arch4_quick or nh3h2_qma_long
compiler_id = config['compiler_id']
fortran_compiler = config['fortran_compiler']
cmake_path = config['cmake_path']
num_cores = config['num_cores']
@ -62,16 +62,14 @@ class HiBench(IBenchmark):
'-DBUILD_TESTING=ON' # enable hibridon tests
]
env_vars_bash_commands = ''
if compiler_id == 'ifort':
env_vars_bash_commands = 'module load compilers/ifort/latest'
cmake_options.append('-DCMAKE_Fortran_COMPILER=ifort') # use intel fortran compiler
env_vars_bash_commands = target_host.get_package_activation_command(fortran_compiler.package_id, fortran_compiler.package_version)
cmake_options.append(f'-DCMAKE_Fortran_COMPILER={fortran_compiler.package_id}')
if fortran_compiler.package_id == 'ifort':
cmake_options.append('-DBLA_VENDOR=Intel10_64lp') # use 64 bits intel mkl with multithreading
elif compiler_id == 'gfortran':
env_vars_bash_commands = ''
cmake_options.append('-DCMAKE_Fortran_COMPILER=gfortran') # use gfortran compiler
elif fortran_compiler.package_id == 'gfortran':
pass
else:
assert f'unhandled compiler_id : {compiler_id}'
assert f'unhandled compiler_id : {fortran_compiler.package_id}'
output_measurements_file_path = output_dir / "measurements.tsv"

View File

@ -1,4 +1,4 @@
from ..core import IBenchmark, BenchParam, BenchmarkConfig, BenchmarkMeasurements
from ..core import IBenchmark, BenchParam, BenchmarkConfig, BenchmarkMeasurements, ITargetHost # , Package # PackageVariant
from pathlib import Path
import pandas as pd
import subprocess
@ -8,10 +8,9 @@ from iprbench.util import extract_resource_dir
class MaMul1(IBenchmark):
"""Matrix multiplication benchmark
"""
def __init__(self):
bench_params = []
bench_params.append(BenchParam('compiler_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the compiler used in the benchmark'))
bench_params.append(BenchParam('fortran_compiler', BenchParam.Type.PARAM_TYPE_PACKAGE, 'the compiler used in the benchmark'))
bench_params.append(BenchParam('num_cores', BenchParam.Type.PARAM_TYPE_INT, 'the number of cores to use by this benchmark'))
bench_params.append(BenchParam('matrix_size', BenchParam.Type.PARAM_TYPE_INT, 'the size n of all the the n * n matrices'))
bench_params.append(BenchParam('num_loops', BenchParam.Type.PARAM_TYPE_INT, 'the number of identical multiplications performed in sequence'))
@ -31,8 +30,8 @@ class MaMul1(IBenchmark):
ram_requirements = int(1 * GIBIBYTE_TO_BYTE) + num_matrices * matrix_ram_size
return ram_requirements
def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path) -> BenchmarkMeasurements:
compiler_id = config['compiler_id']
def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path, target_host: ITargetHost) -> BenchmarkMeasurements:
fortran_compiler = config['fortran_compiler']
num_cores = config['num_cores']
matrix_size = config['matrix_size']
num_loops = config['num_loops']
@ -50,16 +49,14 @@ class MaMul1(IBenchmark):
'-DCMAKE_BUILD_TYPE=Release', # build in release mode for highest performance
]
env_vars_bash_commands = ''
if compiler_id == 'ifort':
env_vars_bash_commands = 'module load compilers/ifort/latest'
cmake_options.append('-DCMAKE_Fortran_COMPILER=ifort') # use intel fortran compiler
env_vars_bash_commands = target_host.get_package_activation_command(fortran_compiler.package_id, fortran_compiler.package_version)
cmake_options.append(f'-DCMAKE_Fortran_COMPILER={fortran_compiler.package_id}')
if fortran_compiler.package_id == 'ifort':
cmake_options.append('-DBLA_VENDOR=Intel10_64lp') # use 64 bits intel mkl with multithreading
elif compiler_id == 'gfortran':
env_vars_bash_commands = ''
cmake_options.append('-DCMAKE_Fortran_COMPILER=gfortran') # use gfortran compiler
elif fortran_compiler.package_id == 'gfortran':
pass
else:
assert f'unhandled compiler_id : {compiler_id}'
assert f'unhandled fortran_compiler_id : {fortran_compiler.package_id}'
output_measurements_file_path = output_dir / "measurements.tsv"

View File

@ -273,7 +273,7 @@ def archive_this_virtualenv_to(venv_archive_path: Path, venv_hardcoded_path: Pat
def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, host_group_id: HostGroupId, results_dir: Path, cluster: ICluster, resultsdb_params: ResultsDbParams):
compiler_id: CompilerId = benchmark_config['compiler_id']
compiler_id: CompilerId = benchmark_config['fortran_compiler']
cluster_db = cluster.get_cluster_db()

View File

@ -1,14 +1,50 @@
from typing import List, Dict, Union, Any
from typing import List, Dict, Union, Any, Optional
from enum import Enum
import abc
from pathlib import Path
from datetime import datetime
from .util import Singleton
import json
class ITargetHost(abc.ABC):
"""the host that runs the benchmark"""
@abc.abstractmethod
def get_package_default_version(self, package_id: str) -> str:
"""returns the latest installed version of the given package (eg '2021.1.2' for 'ifort')"""
@abc.abstractmethod
def get_package_activation_command(self, package_id: str, package_version: str) -> str:
"""returns the bash command to activate the given package
eg for package_id=='ifort' and package_version=='2021.1.2' return 'module load compilers/ifort/2021.1.2'
"""
raise NotImplementedError()
class Package():
"""a software component required by a benchmark (eg ifort 2019.1.2 as fortran_compiler)"""
package_id: str # eg 'ifort'
package_version: str # eg '2021.1.2'
def __init__(self, package_id: str, package_version: str, target_host: ITargetHost):
self.target_host = target_host
if package_version == '<default>':
resolved_package_version = target_host.get_package_default_version(package_id)
else:
resolved_package_version = package_version
self.package_id = package_id
self.package_version = resolved_package_version
def __repr__(self) -> str:
return f'{self.package_id}:{self.package_version}'
BenchmarkId = str # a unique name for a benchmark, eg 'matmul1'
ResultsDbId = str # a unique name for a results database, eg 'tsv-files'
BenchParamId = str
BenchParamType = Union[int, str, float, datetime]
BenchParamType = Union[int, str, float, datetime, Package]
BenchmarkConfig = Dict[BenchParamId, BenchParamType] # eg { 'compiler_id': 'gfortran', 'matrix_size': 1024 }
BenchmarkMeasurements = Dict[BenchParamId, BenchParamType] # eg { 'matrix_multiplication_avg_duration': 3.14 }
BenchmarkParamValues = Dict[BenchParamId, BenchParamType]
@ -25,6 +61,7 @@ class BenchParam():
PARAM_TYPE_INT = 1
PARAM_TYPE_FLOAT = 2
PARAM_TYPE_TIME = 3
PARAM_TYPE_PACKAGE = 4
name: BenchParamId # the name of the parameter, eg 'matrix_size'
param_type: Type # the type of the parameter, eg 'PARAM_TYPE_INT'
@ -61,7 +98,7 @@ class IBenchmark(abc.ABC):
"""
@abc.abstractmethod
def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path) -> BenchmarkMeasurements:
def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path, target_host: ITargetHost) -> BenchmarkMeasurements:
"""execute the benchmark for the given config
"""
@ -70,6 +107,31 @@ class IBenchmark(abc.ABC):
# """parses benchmark_output_dir to collect the benchmark's measurements
# """
def load_config(self, config_as_json: str, target_host: ITargetHost) -> BenchmarkConfig:
benchmark_config = json.loads(config_as_json)
self.validate_config(benchmark_config)
resolved_benchmark_config = self.resolve_config(benchmark_config, target_host)
return resolved_benchmark_config
def get_bench_param(self, param_name: str) -> Optional[BenchParam]:
for bench_param in self.bench_params:
if bench_param.name == param_name:
return bench_param
return None
def resolve_config(self, config: BenchmarkConfig, target_host: ITargetHost) -> BenchmarkConfig:
resolved_benchmark_config = {}
for param_name, param_value in config.items():
bench_param = self.get_bench_param(param_name)
if bench_param.param_type == BenchParam.Type.PARAM_TYPE_PACKAGE:
[package_id, package_version] = param_value.split(':')
resolved_value = Package(package_id, package_version, target_host)
else:
resolved_value = param_value
resolved_benchmark_config[param_name] = resolved_value
return resolved_benchmark_config
def validate_config(self, config: BenchmarkConfig):
"""checks that all benchmark parameters have been set in the given config"""
for bench_param in self.bench_params:
@ -175,4 +237,3 @@ class ResultsDbFactory(metaclass=Singleton):
def create_resultsdb(self, resultsdb_id: ResultsDbId, params: ResultsDbParams) -> IResultsDb:
return self.resultsdb_creators[resultsdb_id].create_resultsdb(params)

View File

@ -1,4 +1,5 @@
from .core import BenchmarkId, IBenchmark, ResultsDbFactory
from .targethosts import GraffyWs2
from .benchmarks.hibench import HiBench
from .benchmarks.mamul1 import MaMul1
from .resultsdb.tsvresultsdb import TsvResultsDbCreator
@ -47,8 +48,8 @@ def main():
benchmark_id = BenchmarkId(args.benchmark_id)
benchmark = BenchmarkFactory().create_benchmark(benchmark_id)
benchmark_config = json.loads(args.config)
benchmark.validate_config(benchmark_config)
target_host = GraffyWs2()
benchmark_config = benchmark.load_config(args.config, target_host)
results_dir = args.results_dir
ResultsDbFactory().register_resultsdb_creator(TsvResultsDbCreator())
@ -66,7 +67,7 @@ def main():
results_table = results_db.get_table(benchmark)
measurements = benchmark.execute(benchmark_config, results_dir)
measurements = benchmark.execute(benchmark_config, results_dir, target_host)
results_table.add_results(benchmark_config, measurements)
# out_params.append(BenchParam('host_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the host running the benchmark'))

View File

@ -33,6 +33,7 @@ class SqlResultsTable(IResultsTable):
BenchParam.Type.PARAM_TYPE_INT: SqlTableField.Type.FIELD_TYPE_INT,
BenchParam.Type.PARAM_TYPE_STRING: SqlTableField.Type.FIELD_TYPE_STRING,
BenchParam.Type.PARAM_TYPE_TIME: SqlTableField.Type.FIELD_TYPE_TIME,
BenchParam.Type.PARAM_TYPE_PACKAGE: SqlTableField.Type.FIELD_TYPE_STRING, # packages are stored as strings in the form ifort:2021.1.2
}[param.param_type]
fields.append(SqlTableField(param.name, sql_field_type, param.description))
self.sql_backend.create_table(table_name, fields)

66
iprbench/targethosts.py Normal file
View File

@ -0,0 +1,66 @@
from typing import Set
from .core import ITargetHost
import subprocess
import re
import logging
class GraffyWs2(ITargetHost):
host_name: str
available_packages: Set[str]
def __init__(self):
self.host_name = 'graffy-ws2'
self.available_packages = {'gfortran'}
def get_package_default_version(self, package_id: str) -> str:
if package_id not in self.available_packages:
raise ValueError(f'ifort is not available on {self.host_name}')
elif package_id == 'gfortran':
completed_process = subprocess.run('gfortran --version', capture_output=True, check=False, shell=True)
if completed_process.returncode != 0:
raise ValueError(f'gfortran is not available on {self.host_name}')
else:
# GNU Fortran (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0
# Copyright (C) 2019 Free Software Foundation, Inc.
# This is free software; see the source for copying conditions. There is NO
# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
first_line = completed_process.stdout.decode('utf-8').split('\n')[0]
logging.debug('first line: %s', first_line)
gfortran_version = first_line.split(' ')[-1]
assert re.match(r'[0-9]+\.[0-9]+\.[0-9]+', gfortran_version), f'unexpected format for gfortran version {gfortran_version}'
return gfortran_version
else:
assert False, f'unhandled package: {package_id}'
def get_package_activation_command(self, package_id: str, package_version: str) -> str:
if package_id not in self.available_packages:
raise ValueError(f'ifort is not available on {self.host_name}')
elif package_id == 'gfortran':
current_version = self.get_package_default_version(package_id)
if current_version != package_version:
raise ValueError(f'gfortran version {package_version} only gfortran version {current_version} is available on {self.host_name}')
return '' # no special instructions are required to activate the current gfortran version
else:
assert False, f'unhandled package: {package_id}'
class IprClusterNode(ITargetHost):
def get_latest_version_for_env_module(self, package_env_module: str):
# package_env_module: eg compilers/ifort
# graffy@alambix-frontal:~$ module help compilers/ifort/latest
# -------------------------------------------------------------------
# Module Specific Help for /usr/share/modules/modulefiles/compilers/ifort/latest:
# Provides the same functionality as the command '/opt/intel/oneapi-2024.2.1/compiler/latest/env/vars.sh intel64'
# -------------------------------------------------------------------
# graffy@alambix-frontal:~$ ls -l /usr/share/modules/modulefiles/compilers/ifort/latest
# lrwxrwxrwx 1 root root 9 18 nov. 02:11 /usr/share/modules/modulefiles/compilers/ifort/latest -> 2021.13.1
raise NotImplementedError()
def get_package_default_version(self, package_id: str) -> str:
if package_id == 'ifort':
return self.get_latest_version_for_env_module('compilers/ifort')
else:
assert False, f'unhandled package: {package_id}'

View File

@ -1 +1 @@
__version__ = '0.0.4'
__version__ = '0.0.5'

View File

@ -39,7 +39,7 @@ class BenchmarksTestCase(unittest.TestCase):
def test_mamul1(self):
benchmark_id = 'mamul1'
benchmark_config = {
'compiler_id': 'gfortran',
'fortran_compiler': 'gfortran:<default>',
'matrix_size': 1024,
'num_loops': 10,
'num_cores': 2
@ -49,7 +49,7 @@ class BenchmarksTestCase(unittest.TestCase):
def test_hibench(self):
benchmark_id = 'hibench'
benchmark_config = {
'compiler_id': 'gfortran',
'fortran_compiler': 'gfortran:<default>',
'test_id': 'arch4_quick',
'hibridon_version': 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad',
'cmake_path': 'cmake',

View File

@ -36,7 +36,7 @@ class ClusterBenchTestCase(unittest.TestCase):
def test_clusterbench_submit1(self):
benchmark_id = 'mamul1'
benchmark_config = {
'compiler_id': 'gfortran',
'fortran_compiler': 'gfortran:<default>',
'matrix_size': 1024,
'num_loops': 10,
}

View File

@ -20,7 +20,7 @@ def test_resultsdb(resultsdb_params: ResultsDbParams, results_root_path: Path):
results_dir.mkdir(parents=True)
benchmark_id = 'mamul1'
benchmark_config = {
'compiler_id': 'gfortran',
'fortran_compiler': 'gfortran:<default>',
'matrix_size': 1024,
'num_loops': 10,
'num_cores': 2