v0.0.5

- added support for package params. As a result it is now possible to choose the fortran compiler version (implemented in mamul1 and hibench). work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3958]
2024-11-19 21:37:55 +01:00 · 2024-11-19 21:37:55 +01:00 · 7fd25890ec
parent a5b5345d39
commit 7fd25890ec
11 changed files with 162 additions and 38 deletions
--- a/iprbench/benchmarks/hibench.py
+++ b/iprbench/benchmarks/hibench.py
@ -2,7 +2,7 @@ import pandas as pd
 from pathlib import Path
 import subprocess
 import shutil
-from ..core import IBenchmark, BenchParam, BenchmarkConfig, BenchmarkMeasurements
+from ..core import IBenchmark, BenchParam, BenchmarkConfig, BenchmarkMeasurements, ITargetHost
 from ..util import get_proxy_env_vars


@ -14,7 +14,7 @@ class HiBench(IBenchmark):
        bench_params = []
        bench_params.append(BenchParam('num_cores', BenchParam.Type.PARAM_TYPE_INT, 'the number of cores to use by this benchmark'))
        bench_params.append(BenchParam('hibridon_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of hibridon, in the form of a commit id'))
-        bench_params.append(BenchParam('compiler_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the compiler used in the benchmark'))
+        bench_params.append(BenchParam('fortran_compiler', BenchParam.Type.PARAM_TYPE_PACKAGE, 'the compiler used in the benchmark'))
        # bench_params.append(BenchParam('compiler_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of the used compiler'))
        # bench_params.append(BenchParam('blas_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the blas library used in the benchmark'))
        # bench_params.append(BenchParam('blas_version', BenchParam.Type.PARAM_TYPE_STRING, 'the version of the blas library used in the benchmark'))
@ -38,12 +38,12 @@ class HiBench(IBenchmark):
            assert f'unhandled benchmark_test : {benchmark_test}'
        return ram_per_core

-    def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path) -> BenchmarkMeasurements:
+    def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path, target_host: ITargetHost) -> BenchmarkMeasurements:

        git_repos_url = 'https://github.com/hibridon/hibridon'
        hibridon_version = config['hibridon_version']
        test_id = config['test_id']  # eg arch4_quick or nh3h2_qma_long
-        compiler_id = config['compiler_id']
+        fortran_compiler = config['fortran_compiler']
        cmake_path = config['cmake_path']
        num_cores = config['num_cores']

@ -62,16 +62,14 @@ class HiBench(IBenchmark):
            '-DBUILD_TESTING=ON'  # enable hibridon tests
        ]

-        env_vars_bash_commands = ''
-        if compiler_id == 'ifort':
-            env_vars_bash_commands = 'module load compilers/ifort/latest'
-            cmake_options.append('-DCMAKE_Fortran_COMPILER=ifort')  # use intel fortran compiler
+        env_vars_bash_commands = target_host.get_package_activation_command(fortran_compiler.package_id, fortran_compiler.package_version)
+        cmake_options.append(f'-DCMAKE_Fortran_COMPILER={fortran_compiler.package_id}')
+        if fortran_compiler.package_id == 'ifort':
            cmake_options.append('-DBLA_VENDOR=Intel10_64lp')  # use 64 bits intel mkl with multithreading
-        elif compiler_id == 'gfortran':
-            env_vars_bash_commands = ''
-            cmake_options.append('-DCMAKE_Fortran_COMPILER=gfortran')  # use gfortran compiler
+        elif fortran_compiler.package_id == 'gfortran':
+            pass
        else:
-            assert f'unhandled compiler_id : {compiler_id}'
+            assert f'unhandled compiler_id : {fortran_compiler.package_id}'

        output_measurements_file_path = output_dir / "measurements.tsv"

--- a/iprbench/benchmarks/mamul1.py
+++ b/iprbench/benchmarks/mamul1.py
@ -1,4 +1,4 @@
-from ..core import IBenchmark, BenchParam, BenchmarkConfig, BenchmarkMeasurements
+from ..core import IBenchmark, BenchParam, BenchmarkConfig, BenchmarkMeasurements, ITargetHost  # , Package  # PackageVariant
 from pathlib import Path
 import pandas as pd
 import subprocess
@ -8,10 +8,9 @@ from iprbench.util import extract_resource_dir
 class MaMul1(IBenchmark):
    """Matrix multiplication benchmark
    """
-
    def __init__(self):
        bench_params = []
-        bench_params.append(BenchParam('compiler_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the compiler used in the benchmark'))
+        bench_params.append(BenchParam('fortran_compiler', BenchParam.Type.PARAM_TYPE_PACKAGE, 'the compiler used in the benchmark'))
        bench_params.append(BenchParam('num_cores', BenchParam.Type.PARAM_TYPE_INT, 'the number of cores to use by this benchmark'))
        bench_params.append(BenchParam('matrix_size', BenchParam.Type.PARAM_TYPE_INT, 'the size n of all the the n * n matrices'))
        bench_params.append(BenchParam('num_loops', BenchParam.Type.PARAM_TYPE_INT, 'the number of identical multiplications performed in sequence'))
@ -31,8 +30,8 @@ class MaMul1(IBenchmark):
        ram_requirements = int(1 * GIBIBYTE_TO_BYTE) + num_matrices * matrix_ram_size
        return ram_requirements

-    def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path) -> BenchmarkMeasurements:
-        compiler_id = config['compiler_id']
+    def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path, target_host: ITargetHost) -> BenchmarkMeasurements:
+        fortran_compiler = config['fortran_compiler']
        num_cores = config['num_cores']
        matrix_size = config['matrix_size']
        num_loops = config['num_loops']
@ -50,16 +49,14 @@ class MaMul1(IBenchmark):
            '-DCMAKE_BUILD_TYPE=Release',  # build in release mode for highest performance
        ]

-        env_vars_bash_commands = ''
-        if compiler_id == 'ifort':
-            env_vars_bash_commands = 'module load compilers/ifort/latest'
-            cmake_options.append('-DCMAKE_Fortran_COMPILER=ifort')  # use intel fortran compiler
+        env_vars_bash_commands = target_host.get_package_activation_command(fortran_compiler.package_id, fortran_compiler.package_version)
+        cmake_options.append(f'-DCMAKE_Fortran_COMPILER={fortran_compiler.package_id}')
+        if fortran_compiler.package_id == 'ifort':
            cmake_options.append('-DBLA_VENDOR=Intel10_64lp')  # use 64 bits intel mkl with multithreading
-        elif compiler_id == 'gfortran':
-            env_vars_bash_commands = ''
-            cmake_options.append('-DCMAKE_Fortran_COMPILER=gfortran')  # use gfortran compiler
+        elif fortran_compiler.package_id == 'gfortran':
+            pass
        else:
-            assert f'unhandled compiler_id : {compiler_id}'
+            assert f'unhandled fortran_compiler_id : {fortran_compiler.package_id}'

        output_measurements_file_path = output_dir / "measurements.tsv"

--- a/iprbench/clusterbench.py
+++ b/iprbench/clusterbench.py
@ -273,7 +273,7 @@ def archive_this_virtualenv_to(venv_archive_path: Path, venv_hardcoded_path: Pat

 def launch_job_for_host_group(benchmark: IBenchmark, benchmark_config: BenchmarkConfig, host_group_id: HostGroupId, results_dir: Path, cluster: ICluster, resultsdb_params: ResultsDbParams):

-    compiler_id: CompilerId = benchmark_config['compiler_id']
+    compiler_id: CompilerId = benchmark_config['fortran_compiler']

    cluster_db = cluster.get_cluster_db()

--- a/iprbench/core.py
+++ b/iprbench/core.py
@ -1,14 +1,50 @@
-from typing import List, Dict, Union, Any
+from typing import List, Dict, Union, Any, Optional
 from enum import Enum
 import abc
 from pathlib import Path
 from datetime import datetime
 from .util import Singleton
+import json
+
+
+class ITargetHost(abc.ABC):
+    """the host that runs the benchmark"""
+
+    @abc.abstractmethod
+    def get_package_default_version(self, package_id: str) -> str:
+        """returns the latest installed version of the given package (eg '2021.1.2' for 'ifort')"""
+
+    @abc.abstractmethod
+    def get_package_activation_command(self, package_id: str, package_version: str) -> str:
+        """returns the bash command to activate the given package
+
+        eg for package_id=='ifort' and package_version=='2021.1.2' return 'module load compilers/ifort/2021.1.2'
+        """
+        raise NotImplementedError()
+
+
+class Package():
+    """a software component required by a benchmark (eg ifort 2019.1.2 as fortran_compiler)"""
+    package_id: str  # eg 'ifort'
+    package_version: str  # eg '2021.1.2'
+
+    def __init__(self, package_id: str, package_version: str, target_host: ITargetHost):
+        self.target_host = target_host
+        if package_version == '<default>':
+            resolved_package_version = target_host.get_package_default_version(package_id)
+        else:
+            resolved_package_version = package_version
+        self.package_id = package_id
+        self.package_version = resolved_package_version
+
+    def __repr__(self) -> str:
+        return f'{self.package_id}:{self.package_version}'
+

 BenchmarkId = str  # a unique name for a benchmark, eg 'matmul1'
 ResultsDbId = str  # a unique name for a results database, eg 'tsv-files'
 BenchParamId = str
-BenchParamType = Union[int, str, float, datetime]
+BenchParamType = Union[int, str, float, datetime, Package]
 BenchmarkConfig = Dict[BenchParamId, BenchParamType]  # eg { 'compiler_id': 'gfortran', 'matrix_size': 1024 }
 BenchmarkMeasurements = Dict[BenchParamId, BenchParamType]  # eg { 'matrix_multiplication_avg_duration': 3.14 }
 BenchmarkParamValues = Dict[BenchParamId, BenchParamType]
@ -25,6 +61,7 @@ class BenchParam():
        PARAM_TYPE_INT = 1
        PARAM_TYPE_FLOAT = 2
        PARAM_TYPE_TIME = 3
+        PARAM_TYPE_PACKAGE = 4

    name: BenchParamId  # the name of the parameter, eg 'matrix_size'
    param_type: Type  # the type of the parameter, eg 'PARAM_TYPE_INT'
@ -61,7 +98,7 @@ class IBenchmark(abc.ABC):
        """

    @abc.abstractmethod
-    def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path) -> BenchmarkMeasurements:
+    def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path, target_host: ITargetHost) -> BenchmarkMeasurements:
        """execute the benchmark for the given config
        """

@ -70,6 +107,31 @@ class IBenchmark(abc.ABC):
    #     """parses benchmark_output_dir to collect the benchmark's measurements
    #     """

+    def load_config(self, config_as_json: str, target_host: ITargetHost) -> BenchmarkConfig:
+        benchmark_config = json.loads(config_as_json)
+        self.validate_config(benchmark_config)
+        resolved_benchmark_config = self.resolve_config(benchmark_config, target_host)
+        return resolved_benchmark_config
+
+    def get_bench_param(self, param_name: str) -> Optional[BenchParam]:
+        for bench_param in self.bench_params:
+            if bench_param.name == param_name:
+                return bench_param
+        return None
+
+    def resolve_config(self, config: BenchmarkConfig, target_host: ITargetHost) -> BenchmarkConfig:
+
+        resolved_benchmark_config = {}
+        for param_name, param_value in config.items():
+            bench_param = self.get_bench_param(param_name)
+            if bench_param.param_type == BenchParam.Type.PARAM_TYPE_PACKAGE:
+                [package_id, package_version] = param_value.split(':')
+                resolved_value = Package(package_id, package_version, target_host)
+            else:
+                resolved_value = param_value
+            resolved_benchmark_config[param_name] = resolved_value
+        return resolved_benchmark_config
+
    def validate_config(self, config: BenchmarkConfig):
        """checks that all benchmark parameters have been set in the given config"""
        for bench_param in self.bench_params:
@ -175,4 +237,3 @@ class ResultsDbFactory(metaclass=Singleton):

    def create_resultsdb(self, resultsdb_id: ResultsDbId, params: ResultsDbParams) -> IResultsDb:
        return self.resultsdb_creators[resultsdb_id].create_resultsdb(params)
-
--- a/iprbench/main.py
+++ b/iprbench/main.py
@ -1,4 +1,5 @@
 from .core import BenchmarkId, IBenchmark, ResultsDbFactory
+from .targethosts import GraffyWs2
 from .benchmarks.hibench import HiBench
 from .benchmarks.mamul1 import MaMul1
 from .resultsdb.tsvresultsdb import TsvResultsDbCreator
@ -47,8 +48,8 @@ def main():

    benchmark_id = BenchmarkId(args.benchmark_id)
    benchmark = BenchmarkFactory().create_benchmark(benchmark_id)
-    benchmark_config = json.loads(args.config)
-    benchmark.validate_config(benchmark_config)
+    target_host = GraffyWs2()
+    benchmark_config = benchmark.load_config(args.config, target_host)
    results_dir = args.results_dir

    ResultsDbFactory().register_resultsdb_creator(TsvResultsDbCreator())
@ -66,7 +67,7 @@ def main():

    results_table = results_db.get_table(benchmark)

-    measurements = benchmark.execute(benchmark_config, results_dir)
+    measurements = benchmark.execute(benchmark_config, results_dir, target_host)
    results_table.add_results(benchmark_config, measurements)

    # out_params.append(BenchParam('host_id', BenchParam.Type.PARAM_TYPE_STRING, 'the id of the host running the benchmark'))
--- a/iprbench/resultsdb/sqlresultsdb.py
+++ b/iprbench/resultsdb/sqlresultsdb.py
@ -33,6 +33,7 @@ class SqlResultsTable(IResultsTable):
                    BenchParam.Type.PARAM_TYPE_INT: SqlTableField.Type.FIELD_TYPE_INT,
                    BenchParam.Type.PARAM_TYPE_STRING: SqlTableField.Type.FIELD_TYPE_STRING,
                    BenchParam.Type.PARAM_TYPE_TIME: SqlTableField.Type.FIELD_TYPE_TIME,
+                    BenchParam.Type.PARAM_TYPE_PACKAGE: SqlTableField.Type.FIELD_TYPE_STRING,  # packages are stored as strings in the form ifort:2021.1.2
                }[param.param_type]
                fields.append(SqlTableField(param.name, sql_field_type, param.description))
            self.sql_backend.create_table(table_name, fields)
--- a/iprbench/targethosts.py
+++ b/iprbench/targethosts.py
@ -0,0 +1,66 @@
+from typing import Set
+from .core import ITargetHost
+import subprocess
+import re
+import logging
+
+
+class GraffyWs2(ITargetHost):
+    host_name: str
+    available_packages: Set[str]
+
+    def __init__(self):
+        self.host_name = 'graffy-ws2'
+        self.available_packages = {'gfortran'}
+
+    def get_package_default_version(self, package_id: str) -> str:
+        if package_id not in self.available_packages:
+            raise ValueError(f'ifort is not available on {self.host_name}')
+        elif package_id == 'gfortran':
+            completed_process = subprocess.run('gfortran --version', capture_output=True, check=False, shell=True)
+            if completed_process.returncode != 0:
+                raise ValueError(f'gfortran is not available on {self.host_name}')
+            else:
+                # GNU Fortran (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0
+                # Copyright (C) 2019 Free Software Foundation, Inc.
+                # This is free software; see the source for copying conditions.  There is NO
+                # warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+                first_line = completed_process.stdout.decode('utf-8').split('\n')[0]
+                logging.debug('first line: %s', first_line)
+                gfortran_version = first_line.split(' ')[-1]
+                assert re.match(r'[0-9]+\.[0-9]+\.[0-9]+', gfortran_version), f'unexpected format for gfortran version {gfortran_version}'
+                return gfortran_version
+        else:
+            assert False, f'unhandled package: {package_id}'
+
+    def get_package_activation_command(self, package_id: str, package_version: str) -> str:
+        if package_id not in self.available_packages:
+            raise ValueError(f'ifort is not available on {self.host_name}')
+        elif package_id == 'gfortran':
+            current_version = self.get_package_default_version(package_id)
+            if current_version != package_version:
+                raise ValueError(f'gfortran version {package_version} only gfortran version {current_version} is available on {self.host_name}')
+            return ''  # no special instructions are required to activate the current gfortran version
+        else:
+            assert False, f'unhandled package: {package_id}'
+
+
+class IprClusterNode(ITargetHost):
+
+    def get_latest_version_for_env_module(self, package_env_module: str):
+        # package_env_module: eg compilers/ifort
+        # graffy@alambix-frontal:~$ module help compilers/ifort/latest
+        # -------------------------------------------------------------------
+        # Module Specific Help for /usr/share/modules/modulefiles/compilers/ifort/latest:
+
+        # Provides the same functionality as the command '/opt/intel/oneapi-2024.2.1/compiler/latest/env/vars.sh intel64'
+        # -------------------------------------------------------------------
+        # graffy@alambix-frontal:~$ ls -l /usr/share/modules/modulefiles/compilers/ifort/latest
+        # lrwxrwxrwx 1 root root 9 18 nov.  02:11 /usr/share/modules/modulefiles/compilers/ifort/latest -> 2021.13.1
+        raise NotImplementedError()
+
+    def get_package_default_version(self, package_id: str) -> str:
+        if package_id == 'ifort':
+            return self.get_latest_version_for_env_module('compilers/ifort')
+        else:
+            assert False, f'unhandled package: {package_id}'
--- a/iprbench/version.py
+++ b/iprbench/version.py
@ -1 +1 @@
-__version__ = '0.0.4'
+__version__ = '0.0.5'
--- a/test/test_benchmarks.py
+++ b/test/test_benchmarks.py
@ -39,7 +39,7 @@ class BenchmarksTestCase(unittest.TestCase):
    def test_mamul1(self):
        benchmark_id = 'mamul1'
        benchmark_config = {
-            'compiler_id': 'gfortran',
+            'fortran_compiler': 'gfortran:<default>',
            'matrix_size': 1024,
            'num_loops': 10,
            'num_cores': 2
@ -49,7 +49,7 @@ class BenchmarksTestCase(unittest.TestCase):
    def test_hibench(self):
        benchmark_id = 'hibench'
        benchmark_config = {
-            'compiler_id': 'gfortran',
+            'fortran_compiler': 'gfortran:<default>',
            'test_id': 'arch4_quick',
            'hibridon_version': 'a3bed1c3ccfbca572003020d3e3d3b1ff3934fad',
            'cmake_path': 'cmake',
--- a/test/test_clusterbench.py
+++ b/test/test_clusterbench.py
@ -36,7 +36,7 @@ class ClusterBenchTestCase(unittest.TestCase):
    def test_clusterbench_submit1(self):
        benchmark_id = 'mamul1'
        benchmark_config = {
-            'compiler_id': 'gfortran',
+            'fortran_compiler': 'gfortran:<default>',
            'matrix_size': 1024,
            'num_loops': 10,
        }
--- a/test/test_resultsdb.py
+++ b/test/test_resultsdb.py
@ -20,7 +20,7 @@ def test_resultsdb(resultsdb_params: ResultsDbParams, results_root_path: Path):
    results_dir.mkdir(parents=True)
    benchmark_id = 'mamul1'
    benchmark_config = {
-        'compiler_id': 'gfortran',
+        'fortran_compiler': 'gfortran:<default>',
        'matrix_size': 1024,
        'num_loops': 10,
        'num_cores': 2