concho/concho/config.py

import re
from abc import abstractmethod
import numpy
# from concho import dell
import math


class Item():

    def __init__(self, uid):
        self.uid = uid


class Chassis(Item):

    def __init__(self, uid):
        super().__init__(uid)
        self.max_num_servers = 1
        self.num_cpu_slots_per_server = 2
        if re.match('dell-poweredge-r9.*', uid):
            self.num_cpu_slots_per_server = 4
        if re.match('dell-poweredge-c6.*', uid):
            self.max_num_servers = 4
        self.num_dimm_slots_per_channel = 2


class Dimm(Item):

    def __init__(self, num_gb, num_mhz, mem_type):
        uid = "%s-%s-%s" % (mem_type, num_gb, num_mhz)
        super().__init__(uid)
        self.num_gb = num_gb
        self.num_mhz = num_mhz
        self.mem_type = mem_type


class Cpu(Item):

    def __init__(self, proc_id):
        super().__init__(proc_id)
        cpuTable = numpy.genfromtxt('cpu_table.tsv', dtype=("|U32", float, int, float, float, float), names=True, delimiter='\t')
        for cpu_id, clock, num_cores, max_cpus, tdp, cpumark in zip(cpuTable['id'], cpuTable['clock'], cpuTable['num_cores'], cpuTable['max_cpus'], cpuTable['tdp'], cpuTable['cpumark_1_cpu']):
            # print(cpu_id)
            if cpu_id == proc_id:
                # print('found '+procId)
                break
        assert cpu_id == proc_id, 'Failed to find %s in cputable' % proc_id
        self.clock = clock
        self.num_cores = num_cores
        self.max_cpus = max_cpus
        self.tdp = tdp
        self.cpumark = cpumark

    @property
    def architecture(self):
        proc_id = self.uid
        if re.match('intel-core-i[357]-8[0-9][0-9][0-9][ktbuh]', proc_id):
            return 'coffeelake'
        elif re.match('intel-xeon-silver-[0-9]3[0-9][0-9]', proc_id):
            return 'icelake'
        elif re.match('intel-xeon-gold-[0-9]3[0-9][0-9]', proc_id):
            return 'icelake'
        elif re.match('intel-xeon-platinum-[0-9]3[0-9][0-9]', proc_id):
            return 'icelake'
        elif re.match('intel-xeon-silver-[0-9]2[0-9][0-9]', proc_id):
            return 'cascadelake'
        elif re.match('intel-xeon-gold-[0-9]2[0-9][0-9]', proc_id):
            return 'cascadelake'
        elif re.match('intel-xeon-platinum-[0-9]2[0-9][0-9]', proc_id):
            return 'cascadelake'
        elif re.match('intel-xeon-gold-[0-9]1[0-9][0-9]', proc_id):
            return 'skylake'
        elif re.match('intel-xeon-platinum-[0-9]1[0-9][0-9]', proc_id):
            return 'skylake'
        elif re.match('intel-xeon-e5-26[0-9][0-9][lwa]*v4', proc_id):
            return 'broadwell'
        elif re.match('intel-xeon-e5-26[0-9][0-9][lwa]*v3', proc_id):
            return 'haswell'
        elif re.match('intel-xeon-e5-26[0-9][0-9][lwa]*v2', proc_id):
            return 'ivy bridge'
        elif re.match('intel-xeon-e5-26[0-9][0-9][lwa]*', proc_id):
            return 'sandy bridge'
        elif re.match('intel-xeon-x56[0-9][0-9]', proc_id):
            return 'gulftown'
        elif re.match('intel-xeon-x55[0-9][0-9]', proc_id):
            return 'gainestown'
        elif re.match('intel-xeon-e54[0-9][0-9]', proc_id):
            return 'harpertown'
        elif re.match('intel-xeon-51[0-9][0-9]', proc_id):
            return 'woodcrest'
        elif re.match('amd-epyc-[0-9][0-9fh][0-9]1', proc_id):
            return 'naples'
        elif re.match('amd-epyc-[0-9][0-9fh][0-9]2', proc_id):
            return 'rome'
        elif re.match('amd-epyc-[0-9][0-9fh][0-9f]3', proc_id):
            return 'milan'
        else:
            assert False, 'unhandled processor id : %s' % proc_id

    @property
    def num_dp_flop_per_cycle(self):
        proc_arch = self.architecture
        simd_id = get_simd_id(proc_arch)
        num_simd_per_core = 1
        if proc_arch in ['skylake', 'cascadelake']:
            # from https://en.wikipedia.org/wiki/List_of_Intel_Xeon_microprocessors : Xeon Platinum, Gold 61XX, and Gold 5122 have two AVX-512 FMA units per core; Xeon Gold 51XX (except 5122), Silver, and Bronze have a single AVX-512 FMA unit per core
            if re.match('intel-xeon-gold-5122', self.uid):
                num_simd_per_core = 2

            # https://en.wikichip.org/wiki/intel/xeon_gold/5222 : 'Note that this is the only processor in the Xeon Gold 52xx series with two 512b FMA units.'
            if re.match('intel-xeon-gold-5222', self.uid):
                num_simd_per_core = 2

            if re.match('intel-xeon-gold-61[0-9][0-9]', self.uid):
                num_simd_per_core = 2
            if re.match('intel-xeon-gold-62[0-9][0-9]', self.uid):
                num_simd_per_core = 2
            if re.match('intel-xeon-gold-63[0-9][0-9]', self.uid):
                num_simd_per_core = 2
        # from https://www.microway.com/knowledge-center-articles/detailed-specifications-of-the-amd-epyc-rome-cpus/:
        # - Full support for 256-bit AVX2 instructions with two 256-bit FMA units per CPU core. The previous “Naples” architecture split 256-bit instructions into two separate 128-bit operations
        # - Up to 16 double-precision FLOPS per cycle per core
        # - Double-precision floating point multiplies complete in 3 cycles (down from 4)
        # note : zen2 rome core has 2 256 bits fma units per core, which corresponds to avx2 technology according to https://stackoverflow.com/questions/15655835/flops-per-cycle-for-sandy-bridge-and-haswell-sse2-avx-avx2:
        # Intel Haswell/Broadwell/Skylake/Kaby Lake/Coffee/... (AVX+FMA3):
        # - 16 DP FLOPs/cycle: two 4-wide FMA (fused multiply-add) instructions
        # - 32 SP FLOPs/cycle: two 8-wide FMA (fused multiply-add) instructions
        # - (Using 256-bit vector instructions can reduce max turbo clock speed on some CPUs.)
        # so, rome core have one avx2 simd, which has 2 256-bit fmadd units. Each 256-bit fma unit is able to perform 4*2 = 8 dflops/cycle; and in total we have 16 dflops per cycle per rome core, which is confirmed by internet

        if proc_arch in ['icelake']:
            # https://www.microway.com/knowledge-center-articles/detailed-specifications-of-the-ice-lake-sp-intel-xeon-processor-scalable-family-cpus/

            # > AVX-512 instructions (up to 16 double-precision FLOPS per cycle per AVX-512 FMA unit)
            # > Two AVX-512 FMA units per CPU core (available in all Ice Lake-SP CPU SKUs)
            # https://www.intel.com/content/www/us/en/products/sku/215269/intel-xeon-silver-4314-processor-24m-cache-2-40-ghz/specifications.html shows that even xeon silver 4314 has 2 AVX 512 fma units
            num_simd_per_core = 2

        if proc_arch == 'rome':
            num_simd_per_core = 1

        dp_flops_per_cycle = num_simd_per_core * simd_id_to_dp_flops_per_cycle(simd_id)
        # print(self.uid, dp_flops_per_cycle)
        return dp_flops_per_cycle

    @property
    def num_ram_channels(self):
        return {
            'skylake': 6,
            'coffeelake': 6,
            'cascadelake': 6,
            'icelake': 8,
            'rome': 8,
            'milan': 8
        }[self.architecture]


def get_proc_architecture(proc_id):
    return Cpu(proc_id).architecture


def get_proc_arch_transistor_size(proc_id):
    return {
        'woodcrest': 65,
        'harpertown': 45,
        'gainestown': 45,
        'gulftown': 32,
        'sandy bridge': 32,
        'ivy bridge': 22,
        'haswell': 22,
        'broadwell': 14,
        'skylake': 14,
        'coffeelake': 14,
        'cascadelake': 14
    }[get_proc_architecture(proc_id)]


def simd_id_to_dp_flops_per_cycle(simd_id):
    """
    :param str simd_id: eg 'avx2'

    """
    # from https://stackoverflow.com/questions/15655835/flops-per-cycle-for-sandy-bridge-and-haswell-sse2-avx-avx2
    # Intel Core 2 and Nehalem:
    #
    #     4 DP FLOPs/cycle: 2-wide SSE2 addition + 2-wide SSE2 multiplication
    #     8 SP FLOPs/cycle: 4-wide SSE addition + 4-wide SSE multiplication
    #
    # Intel Sandy Bridge/Ivy Bridge:
    #
    #     8 DP FLOPs/cycle: 4-wide AVX addition + 4-wide AVX multiplication
    #     16 SP FLOPs/cycle: 8-wide AVX addition + 8-wide AVX multiplication
    #
    # Intel Haswell/Broadwell/Skylake/Kaby Lake:
    #
    #     16 DP FLOPs/cycle: two 4-wide FMA (fused multiply-add) instructions
    #     32 SP FLOPs/cycle: two 8-wide FMA (fused multiply-add) instructions

    # https://www.dell.com/support/kbdoc/fr-fr/000137696/amd-rome-is-it-for-real-architecture-and-initial-hpc-performance
    # The Rome micro-architecture can retire 16 DP FLOP/cycle, double that of Naples which was 8 FLOPS/cycle

    return {
        'sse4.1': 4,
        'sse4.2': 4,
        'avx': 8,
        'avx2': 16,
        'avx-512': 16,
    }[simd_id]


def get_simd_id(proc_arch):
    """
        :param str proc_arch: eg 'broadwell'
        :return str: eg 'sse4'
    """
    return {
        'woodcrest': 'sse4.1',
        'harpertown': 'sse4.1',
        'gainestown': 'sse4.2',
        'gulftown': 'sse4.2',
        'sandy bridge': 'avx',
        'ivy bridge': 'avx',
        'haswell': 'avx2',
        'broadwell': 'avx2',
        'skylake': 'avx-512',
        'cascadelake': 'avx-512',
        'icelake': 'avx-512',
        'coffeelake': 'avx2',
        # from https://www.microway.com/knowledge-center-articles/detailed-specifications-of-the-amd-epyc-rome-cpus/:
        # - Full support for 256-bit AVX2 instructions with two 256-bit FMA units per CPU core. The previous “Naples” architecture split 256-bit instructions into two separate 128-bit operations
        # - Up to 16 double-precision FLOPS per cycle per core
        # - Double-precision floating point multiplies complete in 3 cycles (down from 4)
        'rome': 'avx2',
        'milan': 'avx2',
    }[proc_arch]


class MemChannel():

    def __init__(self):
        self.dimms = []


class CpuSlotMem():

    def __init__(self):
        self.mem_channels = []


class Config():

    def __init__(self, configurator):
        self.configurator = configurator
        self.num_servers = 0
        self._num_cpu_per_server = 0
        self.cpu = None
        self.cpu_slots_mem = []

    @property
    def chassis(self):
        return self.configurator.chassis.item

    @staticmethod
    def _find_dimm_combination(num_dimm_slots_per_channel, min_ram_per_channel, available_dimms):
        available_dimms.append(Option(Dimm(0, 0, 'dummy'), 0.0))  # fake dimm to represent empty slot
        slot_options = []

        # try all combinations of dimms
        best_slot_options = None
        best_price = None
        for slot_index in range(num_dimm_slots_per_channel):
            slot_options.append(0)
        no_more_configs = False
        while no_more_configs is False:
            config_capacity = 0
            config_price = 0
            for slot_index in range(num_dimm_slots_per_channel):
                dimm_option = available_dimms[slot_options[slot_index]]
                config_capacity += float(dimm_option.item.num_gb) * math.pow(2.0, 30.0)
                config_price += dimm_option.price
            if config_capacity >= min_ram_per_channel:  # only remember the combination if it complies with the minimal memory constraint
                if best_price is None or config_price < best_price:
                    best_price = config_price
                    best_slot_options = slot_options.copy()
            # generate the next combination of dimms
            for slot_index in range(num_dimm_slots_per_channel):
                slot_options[slot_index] += 1
                if slot_options[slot_index] < len(available_dimms):
                    break
                else:
                    if slot_index == num_dimm_slots_per_channel - 1:
                        no_more_configs = True  # all combinations of dimm in the slots have been covered
                    else:
                        slot_options[slot_index] = 0

        assert best_slot_options is not None, "Failed to find a dimm combination that provides %f bytes per channel." % min_ram_per_channel
        slot_dimms = []
        for dimm_slot_index in range(num_dimm_slots_per_channel):
            dimm = available_dimms[best_slot_options[dimm_slot_index]].item
            if dimm.num_gb == 0:
                dimm = None
            slot_dimms.append(dimm)
        return slot_dimms

    def set_ram(self, ram_per_core=None, ram_per_server=None, ram_per_cpu=None):

        # ramUpgradePrice128Gb = {
        #     'c6220':3520.0,
        #     'r620':2010.0,
        #     'r630':1778.0,
        #     'r640':1780.0,
        #     'r730':1778.0,
        #     'r940':960.0,   # 32 Gb 2933 MHz RDIMM : 320 €
        #     'c6320':6222.6,
        #     'c4310':1778.0,
        #     'precision3630': 1536.0 }
        cpu = self.cpu
        if ram_per_cpu:
            assert not ram_per_core
            assert not ram_per_server
        if ram_per_core:
            assert not ram_per_server
            assert not ram_per_cpu
            ram_per_cpu = cpu.num_cores * ram_per_core
        if ram_per_server:
            assert not ram_per_core
            assert not ram_per_cpu
            ram_per_cpu = ram_per_server / self.num_cpu_per_server

        ram_per_channel = ram_per_cpu / cpu.num_ram_channels

        slot_dimms = Config._find_dimm_combination(self.configurator.chassis.item.num_dimm_slots_per_channel, ram_per_channel, self.configurator.get_dimm_options())

        # print(cpu.uid, cpu.num_cores, ram_per_channel, [0 if dimm is None else dimm.num_gb for dimm in slot_dimms])
        for cpu_slot_mem in self.cpu_slots_mem:
            for mem_channel in cpu_slot_mem.mem_channels:
                for dimm_slot_index in range(self.configurator.chassis.item.num_dimm_slots_per_channel):
                    mem_channel.dimms[dimm_slot_index] = slot_dimms[dimm_slot_index]

    @property
    def ram_size(self):
        ram_size = 0
        for cpu_slot_mem in self.cpu_slots_mem:
            for mem_channel in cpu_slot_mem.mem_channels:
                for dimm_slot_index in range(self.configurator.chassis.item.num_dimm_slots_per_channel):
                    dimm = mem_channel.dimms[dimm_slot_index]
                    if dimm is not None:
                        dimm = self.configurator.get_item(dimm.uid)
                        ram_size += self.num_servers * dimm.num_gb
        return ram_size

    @property
    def ram_price(self):
        ram_price = 0.0
        for cpu_slot_mem in self.cpu_slots_mem:
            for mem_channel in cpu_slot_mem.mem_channels:
                for dimm_slot_index in range(self.configurator.chassis.item.num_dimm_slots_per_channel):
                    dimm = mem_channel.dimms[dimm_slot_index]
                    if dimm is not None:
                        dimm_price = self.configurator.get_item_price(dimm.uid)
                        ram_price += self.num_servers * dimm_price
        return ram_price

    def get_price(self):
        price = self.configurator.chassis.price

        price += self.num_servers * self.num_cpu_per_server * self.configurator.get_item_price(self.cpu.uid) + self.ram_price
        assert price > 0.0
        return price

    def get_power_consumption(self):
        server_base_power_consumption = 100.0  # rough estimation in watts
        power_consumption = (self.cpu.tdp * self.num_cpu_per_server + server_base_power_consumption) * self.num_servers
        return power_consumption

    def get_flops(self):
        # print('%d servers * %d cpu %s * %d cores @ %f (%d flops/cycle)' % (self.num_servers, self.num_cpu_per_server, str(self.cpu.uid), self.cpu.num_cores, self.cpu.clock, self.cpu.num_dp_flop_per_cycle))
        flops = self.cpu.num_dp_flop_per_cycle * self.cpu.clock * 1.e9 * self.cpu.num_cores * self.num_cpu_per_server * self.num_servers
        return flops

    def _init_dimm_slots(self):
        # create the dimm slots
        self.cpu_slots_mem = []

        if self.cpu is None:
            return

        for cpu_index in range(self.num_cpu_per_server):
            cpu_slot_mem = CpuSlotMem()

            for channel_index in range(self.cpu.num_ram_channels):
                mem_channel = MemChannel()
                for dimm_slot_index in range(self.configurator.chassis.item.num_dimm_slots_per_channel):
                    mem_channel.dimms.append(None)  # dimm slots are empty
                cpu_slot_mem.mem_channels.append(mem_channel)
            self.cpu_slots_mem.append(cpu_slot_mem)

    def set_cpu(self, cpu):
        self.cpu = cpu
        # update the dimm slots accordingly
        self._init_dimm_slots()

    @property
    def num_cpu_per_server(self):
        return self._num_cpu_per_server

    @num_cpu_per_server.setter
    def num_cpu_per_server(self, num_cpu_per_server):
        self._num_cpu_per_server = num_cpu_per_server
        # update the dimm slots accordingly
        self._init_dimm_slots()

    @property
    def num_cpus(self):
        return self.num_cpu_per_server * self.num_servers


class Option():

    def __init__(self, item, price):
        self.item = item
        self.price = price


class Module():

    def __init__(self, name):
        self.name = name
        self.options = {}

    def add_option(self, option):
        self.options[option.item.uid] = option


class Configurator():

    def __init__(self, name):
        self.modules = {}

    @abstractmethod
    def create_config(self):
        assert False

    def add_module(self, module):
        self.modules[module.name] = module

    def get_cpu_options(self):
        return [Cpu(option.item.uid) for option in self.modules['processor'].options.values()]

    def get_ram_options(self):
        return self.modules['ram'].values()

    def get_dimm(self, dimm_capacity):
        for dimm_option in self.modules['ram'].options.values():
            dimm = dimm_option.item
            # print(dimm.num_gb)
            if dimm.num_gb == dimm_capacity:
                return dimm
        assert False, 'failed to find an option for a dimm of capacity %d gb' % dimm_capacity

    def get_dimm_options(self):
        return list(self.modules['ram'].options.values())

    def get_item(self, item_uid):
        for module in self.modules.values():
            if item_uid in module.options:
                return module.options[item_uid].item

    def get_item_price(self, item_uid):
        for module in self.modules.values():
            if item_uid in module.options:
                return module.options[item_uid].price


class TableBasedConfigurator(Configurator):

    def __init__(self, host_type_id, num_cpu_per_server, num_servers=1):
        self.host_type_id = host_type_id
        self.num_cpu_per_server = num_cpu_per_server
        self.num_servers = num_servers
        self.dell_price_table = numpy.genfromtxt('dell_procoptions_table.dat', dtype=("|U15", "|U15", float), names=True, delimiter='\t')
        self.base_config = Config(self)
        self.base_config.num_servers = self.num_servers
        self.base_config.num_cpu_per_server = self.num_cpu_per_server

    @abstractmethod
    def get_empty_price(self):
        pass

    @abstractmethod
    def get_dimm_price(self, dimm_capacity):
        pass

    @abstractmethod
    def get_guarantee_price(self, guarantee_duration):
        pass

    @abstractmethod
    def get_disk_upgrade_price(self, disk_capacity):
        pass

    def get_cpu_options(self):
        supported_cpus = []
        for host_type_id, proc_id, proc_option_price in zip(self.dell_price_table['host_type_id'], self.dell_price_table['proc_id'], self.dell_price_table['proc_option_price']):
            if host_type_id == self.host_type_id:
                supported_cpus.append(Cpu(proc_id))
        return supported_cpus


# def create_host_type(host_type_id):
#     if host_type_id == 'c6420':
#         return dell.DellPowerEdgeC6420(host_type_id)
#     if host_type_id == 'c6320':
#         return dell.DellPowerEdgeC6320(host_type_id)
#     if host_type_id == 'c4130':
#         return dell.DellPowerEdgeC4130(host_type_id)
#     if host_type_id == 'r620':
#         return dell.DellPowerEdgeR620(host_type_id)
#     if host_type_id == 'r630':
#         return dell.DellPowerEdgeR630(host_type_id)
#     if host_type_id == 'r640':
#         return dell.DellPowerEdgeR640(host_type_id)
#     if host_type_id == 'r940':
#         return dell.DellPowerEdgeR940(host_type_id)
#     if host_type_id == 'precision3630':
#         return dell.DellPrecision3630(host_type_id)
#     assert False

    # dom = parse(dell_configurator_html_file_path)