concho/procs_chooser.py

492 lines
16 KiB
Python
Raw Normal View History

2020-09-15 18:46:53 +02:00
# -*- coding: utf-8 -*-
import numpy
import pylab
import matplotlib.pyplot as plt
import matplotlib.colors
import itertools
import re
import hashlib
from string import ascii_lowercase
markerTypes=[',', '+', '.', '^', 'v', '<', '>', 'o', '*', '1', '2', '3', '4', '8', 's', 'p', 'h', 'H', 'x', 'X', 'D', 'd', '|', '_']
#for c in ascii_lowercase:
# markerTypes.append('$%s$' % c)
#markerColors=('r', 'g', 'b')
markerColors=('r')
def get_marker(proc_id):
hash_object = hashlib.md5(proc_id.encode('utf-8'))
hash = int(hash_object.hexdigest(), 16)
return markerTypes[ hash % len(markerTypes) ]
def plotCpuPassmark():
cpuTable = numpy.genfromtxt('cpu_table.dat', dtype=("|U10", float, int, float, float), names=True, delimiter='\t')
plt.subplot(1,1,0)
plt.subplots_adjust(bottom = 0.1)
markersCycler = itertools.cycle(itertools.product(markerTypes, markerColors))
labels = cpuTable['id']
x = cpuTable['clock'] * cpuTable['num_cores']
y = cpuTable['cpumark']
markerSize = 50
color = 'b'
for label, x1, y1 in zip(labels, x, y):
if y1 <= 0.0:
continue # no passmark available fo this data
generation=label[-1]
if generation == '2':
color = 'b'
else:
color = 'r'
marker = markersCycler.next()
plt.scatter( x1, y1, color = color, s = markerSize, marker = marker[0], label = label)
plt.xlabel(u'theoretical cpu speed [core.GHz]')
plt.ylabel(u'passmark [?]')
plt.title(u'comparison between cpu theoretical and effective speed')
plt.xlim( xmin = 0.0 )
plt.ylim( ymin = 0.0 )
plt.legend(bbox_to_anchor=(0.2, 1.0))
#plt.legend()
plt.draw()
plt.show()
def get_proc_architecture(proc_id):
if re.match('core-i[357]-8[0-9][0-9][0-9][ktbuh]', proc_id):
return 'coffeelake'
elif re.match('Silver-[0-9]2[0-9][0-9]', proc_id):
return 'cascadelake'
elif re.match('Gold-[0-9]2[0-9][0-9]', proc_id):
return 'cascadelake'
elif re.match('Platinum-[0-9]2[0-9][0-9]', proc_id):
return 'cascadelake'
elif re.match('Gold-[0-9]1[0-9][0-9]', proc_id):
return 'skylake'
elif re.match('Platinum-[0-9]1[0-9][0-9]', proc_id):
return 'skylake'
elif re.match('E5-26[0-9][0-9][LWA]*v4', proc_id):
return 'broadwell'
elif re.match('E5-26[0-9][0-9][LWA]*v3', proc_id):
return 'haswell'
elif re.match('E5-26[0-9][0-9][LWA]*v2', proc_id):
return 'ivy bridge'
elif re.match('E5-26[0-9][0-9][LWA]*', proc_id):
return 'sandy bridge'
elif re.match('X56[0-9][0-9]', proc_id):
return 'gulftown'
elif re.match('X55[0-9][0-9]', proc_id):
return 'gainestown'
elif re.match('E54[0-9][0-9]', proc_id):
return 'harpertown'
elif re.match('51[0-9][0-9]', proc_id):
return 'woodcrest'
else:
assert False
def get_proc_arch_transistor_size(proc_arch):
return {
'woodcrest':65,
'harpertown':45,
'gainestown':45,
'gulftown':32,
'sandy bridge':32,
'ivy bridge':22,
'haswell':22,
'broadwell':14,
'skylake':14,
'coffeelake':14,
'cascadelake':14
}[get_proc_architecture(proc_arch)]
def simd_id_to_dp_flops_per_cycle(simd_id):
"""
:param str simd_id: eg 'avx2'
"""
# from https://stackoverflow.com/questions/15655835/flops-per-cycle-for-sandy-bridge-and-haswell-sse2-avx-avx2
# Intel Core 2 and Nehalem:
#
# 4 DP FLOPs/cycle: 2-wide SSE2 addition + 2-wide SSE2 multiplication
# 8 SP FLOPs/cycle: 4-wide SSE addition + 4-wide SSE multiplication
#
# Intel Sandy Bridge/Ivy Bridge:
#
# 8 DP FLOPs/cycle: 4-wide AVX addition + 4-wide AVX multiplication
# 16 SP FLOPs/cycle: 8-wide AVX addition + 8-wide AVX multiplication
#
# Intel Haswell/Broadwell/Skylake/Kaby Lake:
#
# 16 DP FLOPs/cycle: two 4-wide FMA (fused multiply-add) instructions
# 32 SP FLOPs/cycle: two 8-wide FMA (fused multiply-add) instructions
return {
'sse4.1':4,
'sse4.2':4,
'avx':8,
'avx2':16,
'avx-512':16,
}[simd_id]
def get_simd_id(proc_arch):
"""
:param str proc_arch: eg 'broadwell'
:return str: eg 'sse4'
"""
return {
'woodcrest':'sse4.1',
'harpertown':'sse4.1',
'gainestown':'sse4.2',
'gulftown':'sse4.2',
'sandy bridge':'avx',
'ivy bridge':'avx',
'haswell':'avx2',
'broadwell':'avx2',
'skylake':'avx-512',
'cascadelake':'avx-512',
'coffeelake':'avx2'
}[proc_arch]
def num_dp_flop_per_cycle(proc_id):
proc_arch = get_proc_architecture(proc_id)
simd_id = get_simd_id(proc_arch)
num_simd_per_core = 1
if proc_arch == 'skylake' or proc_arch == 'cascadelake':
# from https://en.wikipedia.org/wiki/List_of_Intel_Xeon_microprocessors : Xeon Platinum, Gold 61XX, and Gold 5122 have two AVX-512 FMA units per core; Xeon Gold 51XX (except 5122), Silver, and Bronze have a single AVX-512 FMA unit per core
if re.match('Gold-5122', proc_id):
num_simd_per_core = 2
if re.match('Gold-61[0-9][0-9]', proc_id):
num_simd_per_core = 2
if re.match('Gold-62[0-9][0-9]', proc_id):
num_simd_per_core = 2
dp_flops_per_cycle = num_simd_per_core * simd_id_to_dp_flops_per_cycle(simd_id)
print(proc_id, dp_flops_per_cycle)
return dp_flops_per_cycle
def get_system_base_price( host_id ):
# for r730 on 06/10/2016
# (x: price without procs, p1 : price of e5-2603v4, p2: price of e5-2609v4)
# we want to know x, given dell's web site, where we can get the price for multiple proc but not 0
# x + p1 = 1014.0
# x + 2 * p1 = 1014.0 + 216
# => p1 approx= 215.5
# => x = 1014. - 215. = 799.0
# x + p2 = 1123.0
# => p2 = 324.0
# x + 2 * p2 = 1447.0
# for r630 on 14/10/2016
# (x: price without procs, p2603: price of e5-2603v4, p2609: price of e5-2609v4)
# we want to know x, given dell's web site, where we can get the price for multiple proc but not 0
# x + p2603 = 948.0
# x + 2 * p2603 = 948.0 + 216
# => p2603 approx= 215.5
# => x = 948. - 215. = 733.0
# verification :
# x + p2609 = 1057.0
# => p2609 = 1057-733=324.0
# x + 2 * p2609 = 1381.0
# for 4xc6320 on 14/10/2016
# (x: price without procs, p2603: price of e5-2603v4, p2609: price of e5-2609v4)
# x + 4 x (2 x p2620 + p32G) = 5135 € HT
# x + 4 x (2 x p2640 + p128G + pX520 + p5years) = 15590 € HT
# x + 4 x (2 x p2650 + p128G + pX520 + p5years) = 17340 € HT
# x + 4 x (2 x p2660 + p128G + pX520 + p5years) = 19490 € HT
# by examining this and the price of processors on R630
# - E5-2620v4 : 458€
# - E5-2640v4 : 951€
# - E5-2650v4 : 1209€
# - E5-2660v4 : 1525€
# - E5-2680v4 : 1867€
# - E5-2690v4 : 2261€
# I could work out that :
# - the price of procs on c6320 is the price of procs on r630 * 85%
# - the price of the base c6320 with 32 Go and no proc at all is 2020.6
# - the price of the 32G to 128G upgrade is 6222.6 euros (cheaper price of 16G->128G upgrade on r630 : (1778*4 = 7112))
# details :
# >>> (19490.-17340)/8
# 268.75
# >>> (17340.-15590)/8
# 218.75
# >>> 218.75/258.
# 0.8478682170542635
# >>> 268.75/316
# 0.8504746835443038
# >>> 15590.0+((1209.0-951.0)*0.85)*8
# 17344.4
# >>> 15590.0+((1525.0-951.0)*0.85)*8
# 19493.2
# price of 128G ram upgrade assuming that 5years guarantee costs 880€ (same as c6220),
# >>> 15590.0+((458.0-951.0)*0.85)*8-210.0*4-880.0 - 5135.0
# 6222.6
# >>> 5135.0 - (458.0*0.85)*8
# 2020.6
# for c4130 on 14/10/2016
# x + 2 x E5-2640v4 + 128G + 2 * K80 + X520 + p5years = 12281€
# x + 2 x E5-2640v4 + 128G + 4 * K80 + X520 + p5years = 19317€
# price of a K80
# >>> (19317.-12281)/2
# 3518.0
# assuming the options cost the same as for R630 (X520=210€, p5years=240€, 128G=1778€, E5-2640v4=951€), the cost of the base system is :
# >>> 12281-951-951-1778-210-240-3518-3518
# 1115
# but if we integrate the X520 card so that we have a 10Gb ethernet in the base, the cost of the base system becomes :
# >>> 1115+210
# 1325
# on 29/09/2017
# (x: price without procs, p3106: price of Bronze-3106, p6126: price of Gold6126)
# we want to know x, given dell's web site, where we can get the price for multiple proc but not 0
# x + p3106 = 1067.0
# x + 2 * p3106 = 1067.0 + 320.0
# => p3106 = 320
# => x = 1067.0 - 320.0 = 747.0
# check if x computation is consistent with p6126
# x + p6126 = 2767
# x + 2 * p6126 = 4787.0
# => p6126 = 2020.0
# => x = 747.0 --> yes !
# price of r940 (with 2x xeon gold 5215 and 32 Go DDR4 @ 2933GHz) on 09/06/2020 : 3784€
# (x: price without procs, p5215: price of gold-5215, p6248: price of Gold6248)
# p6240 = 2684
# p6248 = 3442
# p8280l = 12075
# x + 2 * p5215 = 3784
# x + 4 * p6240 = 11886 => x = 1150
# x + 4 * p6248 = 14918 => x = 1150
# x + 4 * p8280l = 49450 => x = 1150
# => p5215 = 1317 (agrees with proc price on r640)
return {
'c6220':4890.0,
'r620':860.0,
'r630':733.0,
'r640':747.0,
'r730':799.0,
'r940':1150.0,
'c6320':2020.6,
'c4310':1325.0,
'precision3630':449.0
}[host_id]
def plotSystemEfficiency():
cpuTable = numpy.genfromtxt('cpu_table.dat', dtype=("|U15", float, int, float, float, float), names=True, delimiter='\t')
#cpuTable = numpy.genfromtxt('dell_ivybridge_table.dat', dtype=(('id', "|S10"), ('clock', float), ('num_cores', int), ('price', float, float)), names=None, delimiter='\t')
print(type(cpuTable))
print(cpuTable.dtype)
print(cpuTable)
print(cpuTable['id'])
dellPriceTable = numpy.genfromtxt('dell_procoptions_table.dat', dtype=("|U15", "|U15", float), names=True, delimiter='\t')
#cpuTable = numpy.genfromtxt('dell_ivybridge_table.dat', dtype=(('id', "|S10"), ('clock', float), ('num_cores', int), ('price', float, float)), names=None, delimiter='\t')
#for (x, y) in clusters:
serverBasePowerConsumption = 100.0 # rough estimation in watts
def GHzToMHz( frequency ):
return frequency * 1000.0
kWHPrice = 0.07 * 1.5
containerLifetime = 7.0 # in years
powerUsageEfficiency = 0.5
ramUpgradePrice128Gb = {
'c6220':3520.0,
'r620':2010.0,
'r630':1778.0,
'r640':1780.0,
'r730':1778.0,
'r940':960.0, # 32 Gb 2933 MHz RDIMM : 320 €
'c6320':6222.6,
'c4310':1778.0,
'precision3630': 1536.0 }
guarantee5YearsPrice = {
'c6220':880.0,
'r620':240.0,
'r630':240.0,
'r640':0.0,
'r730':240.0,
'r940':0.0,
'c6320':880.0,
'c4310':240.0,
'precision3630': 0.0 }
hddUpgradePrice2To = {
'c6220':320.0,
'r620':-20.0,
'r630':0.0,
'r640':70.0,
'r730':0.0,
'r940':70.0,
'c6320':0.0,
'c4310':0.0,
'precision3630': 0.0}
def getColorCodeFromItemLabel(label):
generation=label[-1]
(model, proc_id) = re.split('_', label)
saturation = {
'sandy bridge':0.0,
'ivy bridge':0.2,
'haswell':0.2,
'broadwell':0.2,
'skylake':0.4,
'coffeelake':0.6,
'cascadelake':1.0
}[get_proc_architecture(proc_id)]
# if model == 'r620':
# color = 'r'
# elif model == 'r630':
# color = 'g'
# elif model == 'r730':
# color = 'm'
# elif model == 'c6220':
# if generation == '2':
# color = 'b'
# else:
# color = 'y'
hue = {
'r620': 0.6,
'r630': 0.6,
'r640': 0.6,
'c4310': 0.6,
'r730': 0.4,
'r940': 0.8,
'c6220': 1.0,
'c6320': 1.0,
'precision3630': 0.2
}[model]
value = 0.9
return matplotlib.colors.hsv_to_rgb((hue, saturation, value))
def get_marker_from_label(label):
(model, proc_id) = re.split('_', label)
return get_marker(proc_id)
itemPrice = numpy.array([])
itemPowerConsumption = numpy.array([])
itemSpeed = numpy.array([])
itemLabel = numpy.array([])
itemGeneration = numpy.array([])
for hostTypeId, procId, procOptionPrice in zip(dellPriceTable['host_type_id'], dellPriceTable['proc_id'], dellPriceTable['proc_option_price']):
#print(hostTypeId)
#if hostTypeId == 'r630':
# continue
proc_arch = get_proc_architecture(procId)
if not proc_arch in ['coffeelake', 'skylake','cascadelake']:
continue
itemGeneration = procId[-1]
itemLabel = numpy.append( itemLabel, hostTypeId + '_' + procId )
itemPrice = numpy.append( itemPrice, procOptionPrice + get_system_base_price(hostTypeId) + ramUpgradePrice128Gb[hostTypeId] + guarantee5YearsPrice[hostTypeId] + hddUpgradePrice2To[hostTypeId] )
if hostTypeId == 'c6220' or hostTypeId == 'c6320' :
numServersPerContainer = 4
else:
numServersPerContainer = 1
for id, clock, numCores, tdp, cpumark in zip(cpuTable['id'], cpuTable['clock'], cpuTable['num_cores'], cpuTable['tdp'], cpuTable['cpumark_1_cpu']):
if id == procId:
# print('found '+procId)
break
assert id == procId, 'Failed to find %s in cputable' % procId
#print(tdp)
if hostTypeId == 'precision3630':
numProcsPerServer = 1
elif hostTypeId in ['r940']: # re.match('r9[0-9]0', hostTypeId):
numProcsPerServer = 4
else:
numProcsPerServer = 2
print(hostTypeId, numProcsPerServer)
itemPowerConsumption = numpy.append( itemPowerConsumption, (tdp*numProcsPerServer+serverBasePowerConsumption)*numServersPerContainer )
# print(hostTypeId, procId, itemPowerConsumption[-1])
itemSpeed = numpy.append( itemSpeed, num_dp_flop_per_cycle(procId)*clock*1.e9*numCores*numProcsPerServer*numServersPerContainer)
#itemSpeed = numpy.append( itemSpeed, GHzToMHz(clock)*numCores*numProcsPerServer*numServersPerContainer)
#itemSpeed = numpy.append( itemSpeed, cpumark * numProcsPerServer*numServersPerContainer )
#pylab.plot(x, y, '+')
#pylab.xlabel('speed/price ratio [core.MHz/euros]')
#pylab.ylabel('speed/power consumption ratio [core.MHz/W]')
#pylab.show() # or savefig(<filename>)
#print("items = ")
#print(itemLabel)
markerSize = 50
if False:
plt.subplot(1,2,1)
plt.subplots_adjust(bottom = 0.1)
markersCycler = itertools.cycle(itertools.product(markerTypes, markerColors))
x = itemSpeed / itemPrice
y = itemSpeed / itemPowerConsumption
for label, x1, y1, power, speed, price, in zip(itemLabel, x, y, itemPowerConsumption, itemSpeed, itemPrice):
marker = markersCycler.next()
color = getColorCodeFromItemLabel(label)
plt.scatter( x1, y1, color = color, s = markerSize, marker = marker[0], label = label)
#print(x1, y1, color, markerSize, marker[0], label)
if False:
plt.scatter( x, y, marker = 'o')
for label, x1, y1, power, speed, price, in zip(itemLabel, x, y, itemPowerConsumption, itemSpeed, itemPrice):
#print(label)
plt.annotate( u'%s (%.1f core.GHz, %.0f W, %.0f €)' % (label,speed/1000.0, power, price),
xy = (x1, y1), xytext = (-50, 50),
textcoords = 'offset points', ha = 'right', va = 'bottom',
bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))
plt.xlabel(u'speed/price ratio [core.MHz/€]')
plt.ylabel(u'speed/power consumption ratio [core.MHz/W]')
plt.xlim( xmin = 0.0 )
plt.ylim( ymin = 0.0 )
plt.subplot(1,2,1)
#fig = plt.figure()
#ax = fig.gca()
#ax.set_xticks(numpy.arange(0,1,0.1))
#ax.set_yticks(numpy.arange(0,1.,0.1))
powerUsedInLifetime = (itemPowerConsumption * containerLifetime * 365 * 24) / powerUsageEfficiency
itemTotalCost = itemPrice + (powerUsedInLifetime / 1000.0 * kWHPrice )
markersCycler = itertools.cycle(itertools.product(markerTypes, markerColors))
item_flops = itemSpeed
# print item_flops
item_total_num_ops = item_flops * containerLifetime * 365 * 24 * 3600
# print(itemPrice)
x = itemPrice
y = item_total_num_ops / itemTotalCost
for i in range(len(itemLabel)):
print(itemLabel[i], itemPrice[i], y[i])
print('itemTotalCost', itemTotalCost[i])
print('flops', item_flops[i])
# print y
for label, x1, y1, power, speed, price, in zip(itemLabel, x, y, itemPowerConsumption, itemSpeed, itemPrice):
if y1 > 0.0001:
color = getColorCodeFromItemLabel(label)
# marker = markersCycler.next()
marker = get_marker_from_label( label )
#print(x1, y1)
plt.scatter( x1, y1, facecolors = color, s = markerSize, marker = marker[0], label = label)
if y1 > 5.7e16:
plt.annotate( u'%s' % label,
xy = (x1, y1), xytext = (x1*4.0, (y1-5.5e16)*7.1),
textcoords = 'data', ha = 'right', va = 'bottom',
bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))
plt.xlabel(u'purchase price [€]')
plt.ylabel(u'num total DP operations/total cost [€/^-1]')
plt.title(u'total cost including electricity')
plt.xlim( xmin = 0.0 )
plt.ylim( ymin = 0.0 )
plt.minorticks_on()
plt.grid(b=True, which='major', color='b', linestyle='-', linewidth=0.5)
plt.grid(b=True, which='minor', color='b', linestyle='-', linewidth=0.2)
plt.legend(bbox_to_anchor=(1.1, 1.1), ncol=3)
plt.draw()
plt.show()
#plotCpuPassmark():
plotSystemEfficiency()