iprbench/iprbench/core.py

from typing import List, Dict, Union, Any, Optional
from enum import Enum
import abc
from pathlib import Path
from datetime import datetime
from .util import Singleton
import json
import re

PackageVersion = str  # a version string, such as 4.9.3
PackageId = str  # a generic identifier of a package (eg libopenblas-pthread)

HostTypeId = str  # uniquely identifies a ITargetHost instance eg fr.univ-rennes.ipr.cluster-node


class ITargetHost(abc.ABC):
    """the host that runs the benchmark"""

    @abc.abstractmethod
    def get_host_type_id(self) -> HostTypeId:
        """returns the unique identifier of tyis host type"""

    @abc.abstractmethod
    def get_package_default_version(self, package_id: PackageId) -> PackageVersion:
        """returns the latest installed version of the given package (eg '2021.1.2' for 'ifort')"""

    @abc.abstractmethod
    def get_package_activation_command(self, package_id: PackageId, package_version: PackageVersion) -> str:
        """returns the bash command to activate the given package

        eg for package_id=='ifort' and package_version=='2021.1.2' return 'module load compilers/ifort/2021.1.2'
        """
        raise NotImplementedError()


class Package():
    """a software component required by a benchmark (eg ifort 2019.1.2 as fortran_compiler)"""
    package_id: str  # eg 'ifort'
    package_version: str  # eg '2021.1.2'

    def __init__(self, package_id: str, package_version: str, target_host: ITargetHost):
        self.target_host = target_host

        # resolve the package id, in case it contains keywords
        resolved_package_id = ''
        match = re.match(r'^<(?P<keyword>[a-z_]+)-(?P<arg1>[^>]+)>$', package_id)
        if match:
            keyword = match['keyword']
            arg1 = match['arg1']
            if keyword == 'default':
                package_type = arg1  # eg 'libblas'
                resolved_package_id = target_host.get_default_alternative(package_type)
            else:
                raise ValueError(f'unknown keyword {keyword}')
        else:
            if package_id.find('<') != -1 or package_id.find('>') != -1:
                raise ValueError(f'unexpected syntax for package id {package_id}')
            resolved_package_id = package_id
        assert resolved_package_id != ''

        if package_version == '<default>':
            resolved_package_version = target_host.get_package_default_version(resolved_package_id)
        else:
            resolved_package_version = package_version
        self.package_id = resolved_package_id
        self.package_version = resolved_package_version

    def __repr__(self) -> str:
        return f'{self.package_id}:{self.package_version}'


BenchmarkId = str  # a unique name for a benchmark, eg 'matmul1'
ResultsDbId = str  # a unique name for a results database, eg 'tsv-files'
BenchParamId = str
BenchParamType = Union[int, str, float, datetime, Package]
BenchmarkConfig = Dict[BenchParamId, BenchParamType]  # eg { 'compiler_id': 'gfortran', 'matrix_size': 1024 }
BenchmarkMeasurements = Dict[BenchParamId, BenchParamType]  # eg { 'matrix_multiplication_avg_duration': 3.14 }
BenchmarkParamValues = Dict[BenchParamId, BenchParamType]


class BenchParam():
    '''a parameter of a benchmark

    for example the id of the compiler, the cpu id, the size of the matrix, etc.

    '''
    class Type(Enum):
        PARAM_TYPE_STRING = 0
        PARAM_TYPE_INT = 1
        PARAM_TYPE_FLOAT = 2
        PARAM_TYPE_TIME = 3
        PARAM_TYPE_PACKAGE = 4

    name: BenchParamId  # the name of the parameter, eg 'matrix_size'
    param_type: Type  # the type of the parameter, eg 'PARAM_TYPE_INT'
    description: str  # the description of the parameter, eg 'the size n of the n*n matrix '

    def __init__(self, name: str, param_type: Type, description: str):
        self.name = name
        self.param_type = param_type
        self.description = description


BenchmarkAutoParams = List[BenchParam]
BenchmarkInputParams = List[BenchParam]
BenchmarkOutputParams = List[BenchParam]


class IBenchmark(abc.ABC):

    # resultsdb_id: ResultsDbId
    bench_id: BenchmarkId  # a unique name for this benchmark, eg 'matmul1'
    common_params: List[BenchParam]
    bench_params: BenchmarkInputParams
    out_params: BenchmarkOutputParams

    # def __init__(self, resultsdb_id: ResultsDbId, bench_id: str, bench_params: BenchmarkInputParams, out_params: BenchmarkOutputParams):
    def __init__(self, bench_id: str, bench_params: BenchmarkInputParams, out_params: BenchmarkOutputParams, common_params: List[BenchParam]):
        # self.resultsdb_id = resultsdb_id
        self.bench_id = bench_id
        self.common_params = common_params
        self.bench_params = bench_params
        self.out_params = out_params

    @abc.abstractmethod
    def get_ram_requirements(self, config: BenchmarkConfig) -> int:
        """returns the ram requirements for this benchmark, in bytes
        """

    @abc.abstractmethod
    def execute(self, config: BenchmarkConfig, benchmark_output_dir: Path, target_host: ITargetHost) -> BenchmarkMeasurements:
        """execute the benchmark for the given config
        """

    # @abc.abstractmethod
    # def get_measurements(self, benchmark_output_dir: Path) -> BenchmarkMeasurements:
    #     """parses benchmark_output_dir to collect the benchmark's measurements
    #     """

    def load_config(self, config_as_json: str, target_host: ITargetHost) -> BenchmarkConfig:
        benchmark_config = json.loads(config_as_json)
        IBenchmark.validate_config(benchmark_config, self.bench_params + self.common_params, self.bench_id)
        resolved_benchmark_config = self.resolve_config(benchmark_config, target_host)
        return resolved_benchmark_config

    def get_bench_param(self, param_name: str) -> Optional[BenchParam]:
        for bench_param in self.bench_params + self.common_params:
            if bench_param.name == param_name:
                return bench_param
        return None

    def resolve_config(self, config: BenchmarkConfig, target_host: ITargetHost) -> BenchmarkConfig:

        resolved_benchmark_config = {}
        for param_name, param_value in config.items():
            bench_param = self.get_bench_param(param_name)
            if bench_param.param_type == BenchParam.Type.PARAM_TYPE_PACKAGE:
                [package_id, package_version] = param_value.split(':')
                resolved_value = Package(package_id, package_version, target_host)
            else:
                resolved_value = param_value
            resolved_benchmark_config[param_name] = resolved_value
        return resolved_benchmark_config

    @staticmethod
    def validate_config(config: BenchmarkConfig, params: List[BenchParam], benchid: BenchmarkId):
        """checks that all benchmark parameters have been set in the given config"""
        for bench_param in params:
            try:
                _ = config[bench_param.name]
            except KeyError:
                assert False, f'failed to find the benchmark parameter {bench_param.name} in the benchmark config'
        # check that all parameters in benchmark config exist as parameters for this benchmark
        for param_name in config.keys():
            param_exists = False
            for bench_param in params:
                if bench_param.name == param_name:
                    param_exists = True
                    break
            assert param_exists, f'parameter {param_name} doesn\'t exist for benchmark {benchid}'


class IResultsTable(abc.ABC):
    """"""
    results_db: 'IResultsDb'
    benchmark: IBenchmark  # the benchmark recorded by this table

    def __init__(self, results_db: 'IResultsDb', benchmark: IBenchmark):
        self.results_db = results_db
        self.benchmark = benchmark

    @abc.abstractmethod
    def add_benchmark(self, benchmark_record: BenchmarkParamValues):
        """adds a benchmark record to this table

            a benchmark record represents a row of values in a benchmark results table; it contains the benchmark's results, along with the configuration parameters and the BenchmarkAutoParams. For exemple { 'measurement_time': datetime.(2024, 10, 24, 16, 34, 41), 'cpu': 'intel_xeon_6348r', 'matrix_size': 1024, 'duration': 0.522}
        """

    def add_results(self, benchmark_config: BenchmarkConfig, benchmark_measurements: BenchmarkMeasurements):
        auto_values = self.results_db.get_auto_param_values()
        benchmark_record = {**auto_values, **benchmark_config, **benchmark_measurements}
        self.add_benchmark(benchmark_record)

    def get_params(self) -> List[BenchParam]:
        """returns the ordered list of all columns in this table (a column is described by a parameter)"""
        params = [auto_param.bench_param for auto_param in self.results_db.auto_params] + self.results_db.common_params + self.benchmark.bench_params + self.benchmark.out_params
        return params


class IAutoParam(abc.ABC):
    bench_param: BenchParam

    def __init__(self, bench_param: BenchParam):
        self.bench_param = bench_param

    @abc.abstractmethod
    def get_value(self) -> BenchParamType:
        pass


class IResultsDb(abc.ABC):
    """the results database (contains IResultsTable instances)"""
    auto_params: List[IAutoParam]  # parameters that are common to all benchmarks and that are filled automatically
    common_params: List[BenchParam]  # parameters that are common to all benchmarks but are not filled automatically (they have to be filled by the user)

    def __init__(self):
        self.auto_params = []
        self.common_params = []

    def add_auto_param(self, auto_param: IAutoParam):
        self.auto_params.append(auto_param)

    def add_common_param(self, param: BenchParam):
        self.common_params.append(param)

    def get_auto_param_values(self) -> BenchmarkParamValues:
        param_values = {}
        for auto_param in self.auto_params:
            param_values[auto_param.bench_param.name] = auto_param.get_value()
        return param_values

    @abc.abstractmethod
    def get_table(self, benchmark: IBenchmark) -> IResultsTable:
        pass


ResultsDbParams = Dict[str, Any]


class IResultsDbCreator(abc.ABC):
    resultsdb_id: ResultsDbId

    def __init__(self, resultsdb_id: ResultsDbId):
        self.resultsdb_id = resultsdb_id

    def get_resultsdb_id(self) -> ResultsDbId:
        return self.resultsdb_id

    @abc.abstractmethod
    def create_resultsdb(self, resultsdb_config: ResultsDbParams) -> IResultsDb:
        pass


class ResultsDbFactory(metaclass=Singleton):

    resultsdb_creators: Dict[ResultsDbId, IResultsDbCreator]

    def __init__(self):
        self.resultsdb_creators = {}

    def register_resultsdb_creator(self, resultsdb_creator: IResultsDbCreator):
        self.resultsdb_creators[resultsdb_creator.get_resultsdb_id()] = resultsdb_creator

    def create_resultsdb(self, resultsdb_id: ResultsDbId, params: ResultsDbParams) -> IResultsDb:
        return self.resultsdb_creators[resultsdb_id].create_resultsdb(params)