concho/concho/config.py

530 lines
20 KiB
Python

import re
from abc import abstractmethod
import numpy
# from concho import dell
import math
class Item():
def __init__(self, uid):
self.uid = uid
class Chassis(Item):
def __init__(self, uid):
super().__init__(uid)
self.max_num_servers = 1
self.num_cpu_slots_per_server = 2
if re.match('dell-poweredge-r9.*', uid):
self.num_cpu_slots_per_server = 4
if re.match('dell-poweredge-c6.*', uid):
self.max_num_servers = 4
self.num_dimm_slots_per_channel = 2
class Dimm(Item):
def __init__(self, num_gb, num_mhz, mem_type):
uid = "%s-%s-%s" % (mem_type, num_gb, num_mhz)
super().__init__(uid)
self.num_gb = num_gb
self.num_mhz = num_mhz
self.mem_type = mem_type
class Cpu(Item):
def __init__(self, proc_id):
super().__init__(proc_id)
cpuTable = numpy.genfromtxt('cpu_table.tsv', dtype=("|U32", float, int, float, float, float), names=True, delimiter='\t')
for cpu_id, clock, num_cores, max_cpus, tdp, cpumark in zip(cpuTable['id'], cpuTable['clock'], cpuTable['num_cores'], cpuTable['max_cpus'], cpuTable['tdp'], cpuTable['cpumark_1_cpu']):
# print(cpu_id)
if cpu_id == proc_id:
# print('found '+procId)
break
assert cpu_id == proc_id, 'Failed to find %s in cputable' % proc_id
self.clock = clock
self.num_cores = num_cores
self.max_cpus = max_cpus
self.tdp = tdp
self.cpumark = cpumark
@property
def architecture(self):
proc_id = self.uid
if re.match('intel-core-i[357]-8[0-9][0-9][0-9][ktbuh]', proc_id):
return 'coffeelake'
elif re.match('intel-xeon-silver-[0-9]3[0-9][0-9]', proc_id):
return 'icelake'
elif re.match('intel-xeon-gold-[0-9]3[0-9][0-9]', proc_id):
return 'icelake'
elif re.match('intel-xeon-platinum-[0-9]3[0-9][0-9]', proc_id):
return 'icelake'
elif re.match('intel-xeon-silver-[0-9]2[0-9][0-9]', proc_id):
return 'cascadelake'
elif re.match('intel-xeon-gold-[0-9]2[0-9][0-9]', proc_id):
return 'cascadelake'
elif re.match('intel-xeon-platinum-[0-9]2[0-9][0-9]', proc_id):
return 'cascadelake'
elif re.match('intel-xeon-gold-[0-9]1[0-9][0-9]', proc_id):
return 'skylake'
elif re.match('intel-xeon-platinum-[0-9]1[0-9][0-9]', proc_id):
return 'skylake'
elif re.match('intel-xeon-e5-26[0-9][0-9][lwa]*v4', proc_id):
return 'broadwell'
elif re.match('intel-xeon-e5-26[0-9][0-9][lwa]*v3', proc_id):
return 'haswell'
elif re.match('intel-xeon-e5-26[0-9][0-9][lwa]*v2', proc_id):
return 'ivy bridge'
elif re.match('intel-xeon-e5-26[0-9][0-9][lwa]*', proc_id):
return 'sandy bridge'
elif re.match('intel-xeon-x56[0-9][0-9]', proc_id):
return 'gulftown'
elif re.match('intel-xeon-x55[0-9][0-9]', proc_id):
return 'gainestown'
elif re.match('intel-xeon-e54[0-9][0-9]', proc_id):
return 'harpertown'
elif re.match('intel-xeon-51[0-9][0-9]', proc_id):
return 'woodcrest'
elif re.match('amd-epyc-[0-9][0-9fh][0-9]1', proc_id):
return 'naples'
elif re.match('amd-epyc-[0-9][0-9fh][0-9]2', proc_id):
return 'rome'
elif re.match('amd-epyc-[0-9][0-9fh][0-9f]3', proc_id):
return 'milan'
else:
assert False, 'unhandled processor id : %s' % proc_id
@property
def num_dp_flop_per_cycle(self):
proc_arch = self.architecture
simd_id = get_simd_id(proc_arch)
num_simd_per_core = 1
if proc_arch in ['skylake', 'cascadelake']:
# from https://en.wikipedia.org/wiki/List_of_Intel_Xeon_microprocessors : Xeon Platinum, Gold 61XX, and Gold 5122 have two AVX-512 FMA units per core; Xeon Gold 51XX (except 5122), Silver, and Bronze have a single AVX-512 FMA unit per core
if re.match('intel-xeon-gold-5122', self.uid):
num_simd_per_core = 2
# https://en.wikichip.org/wiki/intel/xeon_gold/5222 : 'Note that this is the only processor in the Xeon Gold 52xx series with two 512b FMA units.'
if re.match('intel-xeon-gold-5222', self.uid):
num_simd_per_core = 2
if re.match('intel-xeon-gold-61[0-9][0-9]', self.uid):
num_simd_per_core = 2
if re.match('intel-xeon-gold-62[0-9][0-9]', self.uid):
num_simd_per_core = 2
if re.match('intel-xeon-gold-63[0-9][0-9]', self.uid):
num_simd_per_core = 2
# from https://www.microway.com/knowledge-center-articles/detailed-specifications-of-the-amd-epyc-rome-cpus/:
# - Full support for 256-bit AVX2 instructions with two 256-bit FMA units per CPU core. The previous “Naples” architecture split 256-bit instructions into two separate 128-bit operations
# - Up to 16 double-precision FLOPS per cycle per core
# - Double-precision floating point multiplies complete in 3 cycles (down from 4)
# note : zen2 rome core has 2 256 bits fma units per core, which corresponds to avx2 technology according to https://stackoverflow.com/questions/15655835/flops-per-cycle-for-sandy-bridge-and-haswell-sse2-avx-avx2:
# Intel Haswell/Broadwell/Skylake/Kaby Lake/Coffee/... (AVX+FMA3):
# - 16 DP FLOPs/cycle: two 4-wide FMA (fused multiply-add) instructions
# - 32 SP FLOPs/cycle: two 8-wide FMA (fused multiply-add) instructions
# - (Using 256-bit vector instructions can reduce max turbo clock speed on some CPUs.)
# so, rome core have one avx2 simd, which has 2 256-bit fmadd units. Each 256-bit fma unit is able to perform 4*2 = 8 dflops/cycle; and in total we have 16 dflops per cycle per rome core, which is confirmed by internet
if proc_arch in ['icelake']:
# https://www.microway.com/knowledge-center-articles/detailed-specifications-of-the-ice-lake-sp-intel-xeon-processor-scalable-family-cpus/
# > AVX-512 instructions (up to 16 double-precision FLOPS per cycle per AVX-512 FMA unit)
# > Two AVX-512 FMA units per CPU core (available in all Ice Lake-SP CPU SKUs)
# https://www.intel.com/content/www/us/en/products/sku/215269/intel-xeon-silver-4314-processor-24m-cache-2-40-ghz/specifications.html shows that even xeon silver 4314 has 2 AVX 512 fma units
num_simd_per_core = 2
if proc_arch == 'rome':
num_simd_per_core = 1
dp_flops_per_cycle = num_simd_per_core * simd_id_to_dp_flops_per_cycle(simd_id)
# print(self.uid, dp_flops_per_cycle)
return dp_flops_per_cycle
@property
def num_ram_channels(self):
return {
'skylake': 6,
'coffeelake': 6,
'cascadelake': 6,
'icelake': 8,
'rome': 8,
'milan': 8
}[self.architecture]
def get_proc_architecture(proc_id):
return Cpu(proc_id).architecture
def get_proc_arch_transistor_size(proc_id):
return {
'woodcrest': 65,
'harpertown': 45,
'gainestown': 45,
'gulftown': 32,
'sandy bridge': 32,
'ivy bridge': 22,
'haswell': 22,
'broadwell': 14,
'skylake': 14,
'coffeelake': 14,
'cascadelake': 14
}[get_proc_architecture(proc_id)]
def simd_id_to_dp_flops_per_cycle(simd_id):
"""
:param str simd_id: eg 'avx2'
"""
# from https://stackoverflow.com/questions/15655835/flops-per-cycle-for-sandy-bridge-and-haswell-sse2-avx-avx2
# Intel Core 2 and Nehalem:
#
# 4 DP FLOPs/cycle: 2-wide SSE2 addition + 2-wide SSE2 multiplication
# 8 SP FLOPs/cycle: 4-wide SSE addition + 4-wide SSE multiplication
#
# Intel Sandy Bridge/Ivy Bridge:
#
# 8 DP FLOPs/cycle: 4-wide AVX addition + 4-wide AVX multiplication
# 16 SP FLOPs/cycle: 8-wide AVX addition + 8-wide AVX multiplication
#
# Intel Haswell/Broadwell/Skylake/Kaby Lake:
#
# 16 DP FLOPs/cycle: two 4-wide FMA (fused multiply-add) instructions
# 32 SP FLOPs/cycle: two 8-wide FMA (fused multiply-add) instructions
# https://www.dell.com/support/kbdoc/fr-fr/000137696/amd-rome-is-it-for-real-architecture-and-initial-hpc-performance
# The Rome micro-architecture can retire 16 DP FLOP/cycle, double that of Naples which was 8 FLOPS/cycle
return {
'sse4.1': 4,
'sse4.2': 4,
'avx': 8,
'avx2': 16,
'avx-512': 16,
}[simd_id]
def get_simd_id(proc_arch):
"""
:param str proc_arch: eg 'broadwell'
:return str: eg 'sse4'
"""
return {
'woodcrest': 'sse4.1',
'harpertown': 'sse4.1',
'gainestown': 'sse4.2',
'gulftown': 'sse4.2',
'sandy bridge': 'avx',
'ivy bridge': 'avx',
'haswell': 'avx2',
'broadwell': 'avx2',
'skylake': 'avx-512',
'cascadelake': 'avx-512',
'icelake': 'avx-512',
'coffeelake': 'avx2',
# from https://www.microway.com/knowledge-center-articles/detailed-specifications-of-the-amd-epyc-rome-cpus/:
# - Full support for 256-bit AVX2 instructions with two 256-bit FMA units per CPU core. The previous “Naples” architecture split 256-bit instructions into two separate 128-bit operations
# - Up to 16 double-precision FLOPS per cycle per core
# - Double-precision floating point multiplies complete in 3 cycles (down from 4)
'rome': 'avx2',
'milan': 'avx2',
}[proc_arch]
class MemChannel():
def __init__(self):
self.dimms = []
class CpuSlotMem():
def __init__(self):
self.mem_channels = []
class Config():
def __init__(self, configurator):
self.configurator = configurator
self.num_servers = 0
self._num_cpu_per_server = 0
self.cpu = None
self.cpu_slots_mem = []
@property
def chassis(self):
return self.configurator.chassis.item
@staticmethod
def _find_dimm_combination(num_dimm_slots_per_channel, min_ram_per_channel, available_dimms):
available_dimms.append(Option(Dimm(0, 0, 'dummy'), 0.0)) # fake dimm to represent empty slot
slot_options = []
# try all combinations of dimms
best_slot_options = None
best_price = None
for slot_index in range(num_dimm_slots_per_channel):
slot_options.append(0)
no_more_configs = False
while no_more_configs is False:
config_capacity = 0
config_price = 0
for slot_index in range(num_dimm_slots_per_channel):
dimm_option = available_dimms[slot_options[slot_index]]
config_capacity += float(dimm_option.item.num_gb) * math.pow(2.0, 30.0)
config_price += dimm_option.price
if config_capacity >= min_ram_per_channel: # only remember the combination if it complies with the minimal memory constraint
if best_price is None or config_price < best_price:
best_price = config_price
best_slot_options = slot_options.copy()
# generate the next combination of dimms
for slot_index in range(num_dimm_slots_per_channel):
slot_options[slot_index] += 1
if slot_options[slot_index] < len(available_dimms):
break
else:
if slot_index == num_dimm_slots_per_channel - 1:
no_more_configs = True # all combinations of dimm in the slots have been covered
else:
slot_options[slot_index] = 0
assert best_slot_options is not None, "Failed to find a dimm combination that provides %f bytes per channel." % min_ram_per_channel
slot_dimms = []
for dimm_slot_index in range(num_dimm_slots_per_channel):
dimm = available_dimms[best_slot_options[dimm_slot_index]].item
if dimm.num_gb == 0:
dimm = None
slot_dimms.append(dimm)
return slot_dimms
def set_ram(self, ram_per_core=None, ram_per_server=None, ram_per_cpu=None):
# ramUpgradePrice128Gb = {
# 'c6220':3520.0,
# 'r620':2010.0,
# 'r630':1778.0,
# 'r640':1780.0,
# 'r730':1778.0,
# 'r940':960.0, # 32 Gb 2933 MHz RDIMM : 320 €
# 'c6320':6222.6,
# 'c4310':1778.0,
# 'precision3630': 1536.0 }
cpu = self.cpu
if ram_per_cpu:
assert not ram_per_core
assert not ram_per_server
if ram_per_core:
assert not ram_per_server
assert not ram_per_cpu
ram_per_cpu = cpu.num_cores * ram_per_core
if ram_per_server:
assert not ram_per_core
assert not ram_per_cpu
ram_per_cpu = ram_per_server / self.num_cpu_per_server
ram_per_channel = ram_per_cpu / cpu.num_ram_channels
slot_dimms = Config._find_dimm_combination(self.configurator.chassis.item.num_dimm_slots_per_channel, ram_per_channel, self.configurator.get_dimm_options())
# print(cpu.uid, cpu.num_cores, ram_per_channel, [0 if dimm is None else dimm.num_gb for dimm in slot_dimms])
for cpu_slot_mem in self.cpu_slots_mem:
for mem_channel in cpu_slot_mem.mem_channels:
for dimm_slot_index in range(self.configurator.chassis.item.num_dimm_slots_per_channel):
mem_channel.dimms[dimm_slot_index] = slot_dimms[dimm_slot_index]
@property
def ram_size(self):
ram_size = 0
for cpu_slot_mem in self.cpu_slots_mem:
for mem_channel in cpu_slot_mem.mem_channels:
for dimm_slot_index in range(self.configurator.chassis.item.num_dimm_slots_per_channel):
dimm = mem_channel.dimms[dimm_slot_index]
if dimm is not None:
dimm = self.configurator.get_item(dimm.uid)
ram_size += self.num_servers * dimm.num_gb
return ram_size
@property
def ram_price(self):
ram_price = 0.0
for cpu_slot_mem in self.cpu_slots_mem:
for mem_channel in cpu_slot_mem.mem_channels:
for dimm_slot_index in range(self.configurator.chassis.item.num_dimm_slots_per_channel):
dimm = mem_channel.dimms[dimm_slot_index]
if dimm is not None:
dimm_price = self.configurator.get_item_price(dimm.uid)
ram_price += self.num_servers * dimm_price
return ram_price
def get_price(self):
price = self.configurator.chassis.price
price += self.num_servers * self.num_cpu_per_server * self.configurator.get_item_price(self.cpu.uid) + self.ram_price
assert price > 0.0
return price
def get_power_consumption(self):
server_base_power_consumption = 100.0 # rough estimation in watts
power_consumption = (self.cpu.tdp * self.num_cpu_per_server + server_base_power_consumption) * self.num_servers
return power_consumption
def get_flops(self):
# print('%d servers * %d cpu %s * %d cores @ %f (%d flops/cycle)' % (self.num_servers, self.num_cpu_per_server, str(self.cpu.uid), self.cpu.num_cores, self.cpu.clock, self.cpu.num_dp_flop_per_cycle))
flops = self.cpu.num_dp_flop_per_cycle * self.cpu.clock * 1.e9 * self.cpu.num_cores * self.num_cpu_per_server * self.num_servers
return flops
def _init_dimm_slots(self):
# create the dimm slots
self.cpu_slots_mem = []
if self.cpu is None:
return
for cpu_index in range(self.num_cpu_per_server):
cpu_slot_mem = CpuSlotMem()
for channel_index in range(self.cpu.num_ram_channels):
mem_channel = MemChannel()
for dimm_slot_index in range(self.configurator.chassis.item.num_dimm_slots_per_channel):
mem_channel.dimms.append(None) # dimm slots are empty
cpu_slot_mem.mem_channels.append(mem_channel)
self.cpu_slots_mem.append(cpu_slot_mem)
def set_cpu(self, cpu):
self.cpu = cpu
# update the dimm slots accordingly
self._init_dimm_slots()
@property
def num_cpu_per_server(self):
return self._num_cpu_per_server
@num_cpu_per_server.setter
def num_cpu_per_server(self, num_cpu_per_server):
self._num_cpu_per_server = num_cpu_per_server
# update the dimm slots accordingly
self._init_dimm_slots()
@property
def num_cpus(self):
return self.num_cpu_per_server * self.num_servers
class Option():
def __init__(self, item, price):
self.item = item
self.price = price
class Module():
def __init__(self, name):
self.name = name
self.options = {}
def add_option(self, option):
self.options[option.item.uid] = option
class Configurator():
def __init__(self, name):
self.modules = {}
@abstractmethod
def create_config(self):
assert False
def add_module(self, module):
self.modules[module.name] = module
def get_cpu_options(self):
return [Cpu(option.item.uid) for option in self.modules['processor'].options.values()]
def get_ram_options(self):
return self.modules['ram'].values()
def get_dimm(self, dimm_capacity):
for dimm_option in self.modules['ram'].options.values():
dimm = dimm_option.item
# print(dimm.num_gb)
if dimm.num_gb == dimm_capacity:
return dimm
assert False, 'failed to find an option for a dimm of capacity %d gb' % dimm_capacity
def get_dimm_options(self):
return list(self.modules['ram'].options.values())
def get_item(self, item_uid):
for module in self.modules.values():
if item_uid in module.options:
return module.options[item_uid].item
def get_item_price(self, item_uid):
for module in self.modules.values():
if item_uid in module.options:
return module.options[item_uid].price
class TableBasedConfigurator(Configurator):
def __init__(self, host_type_id, num_cpu_per_server, num_servers=1):
self.host_type_id = host_type_id
self.num_cpu_per_server = num_cpu_per_server
self.num_servers = num_servers
self.dell_price_table = numpy.genfromtxt('dell_procoptions_table.dat', dtype=("|U15", "|U15", float), names=True, delimiter='\t')
self.base_config = Config(self)
self.base_config.num_servers = self.num_servers
self.base_config.num_cpu_per_server = self.num_cpu_per_server
@abstractmethod
def get_empty_price(self):
pass
@abstractmethod
def get_dimm_price(self, dimm_capacity):
pass
@abstractmethod
def get_guarantee_price(self, guarantee_duration):
pass
@abstractmethod
def get_disk_upgrade_price(self, disk_capacity):
pass
def get_cpu_options(self):
supported_cpus = []
for host_type_id, proc_id, proc_option_price in zip(self.dell_price_table['host_type_id'], self.dell_price_table['proc_id'], self.dell_price_table['proc_option_price']):
if host_type_id == self.host_type_id:
supported_cpus.append(Cpu(proc_id))
return supported_cpus
# def create_host_type(host_type_id):
# if host_type_id == 'c6420':
# return dell.DellPowerEdgeC6420(host_type_id)
# if host_type_id == 'c6320':
# return dell.DellPowerEdgeC6320(host_type_id)
# if host_type_id == 'c4130':
# return dell.DellPowerEdgeC4130(host_type_id)
# if host_type_id == 'r620':
# return dell.DellPowerEdgeR620(host_type_id)
# if host_type_id == 'r630':
# return dell.DellPowerEdgeR630(host_type_id)
# if host_type_id == 'r640':
# return dell.DellPowerEdgeR640(host_type_id)
# if host_type_id == 'r940':
# return dell.DellPowerEdgeR940(host_type_id)
# if host_type_id == 'precision3630':
# return dell.DellPrecision3630(host_type_id)
# assert False
# dom = parse(dell_configurator_html_file_path)