# -*- coding: utf-8 -*- import numpy import pylab import matplotlib.pyplot as plt import matplotlib.colors import itertools import re import hashlib from string import ascii_lowercase markerTypes=[',', '+', '.', '^', 'v', '<', '>', 'o', '*', '1', '2', '3', '4', '8', 's', 'p', 'h', 'H', 'x', 'X', 'D', 'd', '|', '_'] #for c in ascii_lowercase: # markerTypes.append('$%s$' % c) #markerColors=('r', 'g', 'b') markerColors=('r') def get_marker(proc_id): hash_object = hashlib.md5(proc_id.encode('utf-8')) hash = int(hash_object.hexdigest(), 16) return markerTypes[ hash % len(markerTypes) ] def plotCpuPassmark(): cpuTable = numpy.genfromtxt('cpu_table.dat', dtype=("|U10", float, int, float, float), names=True, delimiter='\t') plt.subplot(1,1,0) plt.subplots_adjust(bottom = 0.1) markersCycler = itertools.cycle(itertools.product(markerTypes, markerColors)) labels = cpuTable['id'] x = cpuTable['clock'] * cpuTable['num_cores'] y = cpuTable['cpumark'] markerSize = 50 color = 'b' for label, x1, y1 in zip(labels, x, y): if y1 <= 0.0: continue # no passmark available fo this data generation=label[-1] if generation == '2': color = 'b' else: color = 'r' marker = markersCycler.next() plt.scatter( x1, y1, color = color, s = markerSize, marker = marker[0], label = label) plt.xlabel(u'theoretical cpu speed [core.GHz]') plt.ylabel(u'passmark [?]') plt.title(u'comparison between cpu theoretical and effective speed') plt.xlim( xmin = 0.0 ) plt.ylim( ymin = 0.0 ) plt.legend(bbox_to_anchor=(0.2, 1.0)) #plt.legend() plt.draw() plt.show() def get_proc_architecture(proc_id): if re.match('core-i[357]-8[0-9][0-9][0-9][ktbuh]', proc_id): return 'coffeelake' elif re.match('Silver-[0-9]2[0-9][0-9]', proc_id): return 'cascadelake' elif re.match('Gold-[0-9]2[0-9][0-9]', proc_id): return 'cascadelake' elif re.match('Platinum-[0-9]2[0-9][0-9]', proc_id): return 'cascadelake' elif re.match('Gold-[0-9]1[0-9][0-9]', proc_id): return 'skylake' elif re.match('Platinum-[0-9]1[0-9][0-9]', proc_id): return 'skylake' elif re.match('E5-26[0-9][0-9][LWA]*v4', proc_id): return 'broadwell' elif re.match('E5-26[0-9][0-9][LWA]*v3', proc_id): return 'haswell' elif re.match('E5-26[0-9][0-9][LWA]*v2', proc_id): return 'ivy bridge' elif re.match('E5-26[0-9][0-9][LWA]*', proc_id): return 'sandy bridge' elif re.match('X56[0-9][0-9]', proc_id): return 'gulftown' elif re.match('X55[0-9][0-9]', proc_id): return 'gainestown' elif re.match('E54[0-9][0-9]', proc_id): return 'harpertown' elif re.match('51[0-9][0-9]', proc_id): return 'woodcrest' else: assert False def get_proc_arch_transistor_size(proc_arch): return { 'woodcrest':65, 'harpertown':45, 'gainestown':45, 'gulftown':32, 'sandy bridge':32, 'ivy bridge':22, 'haswell':22, 'broadwell':14, 'skylake':14, 'coffeelake':14, 'cascadelake':14 }[get_proc_architecture(proc_arch)] def simd_id_to_dp_flops_per_cycle(simd_id): """ :param str simd_id: eg 'avx2' """ # from https://stackoverflow.com/questions/15655835/flops-per-cycle-for-sandy-bridge-and-haswell-sse2-avx-avx2 # Intel Core 2 and Nehalem: # # 4 DP FLOPs/cycle: 2-wide SSE2 addition + 2-wide SSE2 multiplication # 8 SP FLOPs/cycle: 4-wide SSE addition + 4-wide SSE multiplication # # Intel Sandy Bridge/Ivy Bridge: # # 8 DP FLOPs/cycle: 4-wide AVX addition + 4-wide AVX multiplication # 16 SP FLOPs/cycle: 8-wide AVX addition + 8-wide AVX multiplication # # Intel Haswell/Broadwell/Skylake/Kaby Lake: # # 16 DP FLOPs/cycle: two 4-wide FMA (fused multiply-add) instructions # 32 SP FLOPs/cycle: two 8-wide FMA (fused multiply-add) instructions return { 'sse4.1':4, 'sse4.2':4, 'avx':8, 'avx2':16, 'avx-512':16, }[simd_id] def get_simd_id(proc_arch): """ :param str proc_arch: eg 'broadwell' :return str: eg 'sse4' """ return { 'woodcrest':'sse4.1', 'harpertown':'sse4.1', 'gainestown':'sse4.2', 'gulftown':'sse4.2', 'sandy bridge':'avx', 'ivy bridge':'avx', 'haswell':'avx2', 'broadwell':'avx2', 'skylake':'avx-512', 'cascadelake':'avx-512', 'coffeelake':'avx2' }[proc_arch] def num_dp_flop_per_cycle(proc_id): proc_arch = get_proc_architecture(proc_id) simd_id = get_simd_id(proc_arch) num_simd_per_core = 1 if proc_arch == 'skylake' or proc_arch == 'cascadelake': # from https://en.wikipedia.org/wiki/List_of_Intel_Xeon_microprocessors : Xeon Platinum, Gold 61XX, and Gold 5122 have two AVX-512 FMA units per core; Xeon Gold 51XX (except 5122), Silver, and Bronze have a single AVX-512 FMA unit per core if re.match('Gold-5122', proc_id): num_simd_per_core = 2 if re.match('Gold-61[0-9][0-9]', proc_id): num_simd_per_core = 2 if re.match('Gold-62[0-9][0-9]', proc_id): num_simd_per_core = 2 dp_flops_per_cycle = num_simd_per_core * simd_id_to_dp_flops_per_cycle(simd_id) print(proc_id, dp_flops_per_cycle) return dp_flops_per_cycle def get_system_base_price( host_id ): # for r730 on 06/10/2016 # (x: price without procs, p1 : price of e5-2603v4, p2: price of e5-2609v4) # we want to know x, given dell's web site, where we can get the price for multiple proc but not 0 # x + p1 = 1014.0 # x + 2 * p1 = 1014.0 + 216 # => p1 approx= 215.5 # => x = 1014. - 215. = 799.0 # x + p2 = 1123.0 # => p2 = 324.0 # x + 2 * p2 = 1447.0 # for r630 on 14/10/2016 # (x: price without procs, p2603: price of e5-2603v4, p2609: price of e5-2609v4) # we want to know x, given dell's web site, where we can get the price for multiple proc but not 0 # x + p2603 = 948.0 # x + 2 * p2603 = 948.0 + 216 # => p2603 approx= 215.5 # => x = 948. - 215. = 733.0 # verification : # x + p2609 = 1057.0 # => p2609 = 1057-733=324.0 # x + 2 * p2609 = 1381.0 # for 4xc6320 on 14/10/2016 # (x: price without procs, p2603: price of e5-2603v4, p2609: price of e5-2609v4) # x + 4 x (2 x p2620 + p32G) = 5135 € HT # x + 4 x (2 x p2640 + p128G + pX520 + p5years) = 15590 € HT # x + 4 x (2 x p2650 + p128G + pX520 + p5years) = 17340 € HT # x + 4 x (2 x p2660 + p128G + pX520 + p5years) = 19490 € HT # by examining this and the price of processors on R630 # - E5-2620v4 : 458€ # - E5-2640v4 : 951€ # - E5-2650v4 : 1209€ # - E5-2660v4 : 1525€ # - E5-2680v4 : 1867€ # - E5-2690v4 : 2261€ # I could work out that : # - the price of procs on c6320 is the price of procs on r630 * 85% # - the price of the base c6320 with 32 Go and no proc at all is 2020.6 # - the price of the 32G to 128G upgrade is 6222.6 euros (cheaper price of 16G->128G upgrade on r630 : (1778*4 = 7112)) # details : # >>> (19490.-17340)/8 # 268.75 # >>> (17340.-15590)/8 # 218.75 # >>> 218.75/258. # 0.8478682170542635 # >>> 268.75/316 # 0.8504746835443038 # >>> 15590.0+((1209.0-951.0)*0.85)*8 # 17344.4 # >>> 15590.0+((1525.0-951.0)*0.85)*8 # 19493.2 # price of 128G ram upgrade assuming that 5years guarantee costs 880€ (same as c6220), # >>> 15590.0+((458.0-951.0)*0.85)*8-210.0*4-880.0 - 5135.0 # 6222.6 # >>> 5135.0 - (458.0*0.85)*8 # 2020.6 # for c4130 on 14/10/2016 # x + 2 x E5-2640v4 + 128G + 2 * K80 + X520 + p5years = 12281€ # x + 2 x E5-2640v4 + 128G + 4 * K80 + X520 + p5years = 19317€ # price of a K80 # >>> (19317.-12281)/2 # 3518.0 # assuming the options cost the same as for R630 (X520=210€, p5years=240€, 128G=1778€, E5-2640v4=951€), the cost of the base system is : # >>> 12281-951-951-1778-210-240-3518-3518 # 1115 # but if we integrate the X520 card so that we have a 10Gb ethernet in the base, the cost of the base system becomes : # >>> 1115+210 # 1325 # on 29/09/2017 # (x: price without procs, p3106: price of Bronze-3106, p6126: price of Gold6126) # we want to know x, given dell's web site, where we can get the price for multiple proc but not 0 # x + p3106 = 1067.0 # x + 2 * p3106 = 1067.0 + 320.0 # => p3106 = 320 # => x = 1067.0 - 320.0 = 747.0 # check if x computation is consistent with p6126 # x + p6126 = 2767 # x + 2 * p6126 = 4787.0 # => p6126 = 2020.0 # => x = 747.0 --> yes ! # price of r940 (with 2x xeon gold 5215 and 32 Go DDR4 @ 2933GHz) on 09/06/2020 : 3784€ # (x: price without procs, p5215: price of gold-5215, p6248: price of Gold6248) # p6240 = 2684 # p6248 = 3442 # p8280l = 12075 # x + 2 * p5215 = 3784 # x + 4 * p6240 = 11886 => x = 1150 # x + 4 * p6248 = 14918 => x = 1150 # x + 4 * p8280l = 49450 => x = 1150 # => p5215 = 1317 (agrees with proc price on r640) return { 'c6220':4890.0, 'r620':860.0, 'r630':733.0, 'r640':747.0, 'r730':799.0, 'r940':1150.0, 'c6320':2020.6, 'c4310':1325.0, 'precision3630':449.0 }[host_id] def plotSystemEfficiency(): cpuTable = numpy.genfromtxt('cpu_table.dat', dtype=("|U15", float, int, float, float, float), names=True, delimiter='\t') #cpuTable = numpy.genfromtxt('dell_ivybridge_table.dat', dtype=(('id', "|S10"), ('clock', float), ('num_cores', int), ('price', float, float)), names=None, delimiter='\t') print(type(cpuTable)) print(cpuTable.dtype) print(cpuTable) print(cpuTable['id']) dellPriceTable = numpy.genfromtxt('dell_procoptions_table.dat', dtype=("|U15", "|U15", float), names=True, delimiter='\t') #cpuTable = numpy.genfromtxt('dell_ivybridge_table.dat', dtype=(('id', "|S10"), ('clock', float), ('num_cores', int), ('price', float, float)), names=None, delimiter='\t') #for (x, y) in clusters: serverBasePowerConsumption = 100.0 # rough estimation in watts def GHzToMHz( frequency ): return frequency * 1000.0 kWHPrice = 0.07 * 1.5 containerLifetime = 7.0 # in years powerUsageEfficiency = 0.5 ramUpgradePrice128Gb = { 'c6220':3520.0, 'r620':2010.0, 'r630':1778.0, 'r640':1780.0, 'r730':1778.0, 'r940':960.0, # 32 Gb 2933 MHz RDIMM : 320 € 'c6320':6222.6, 'c4310':1778.0, 'precision3630': 1536.0 } guarantee5YearsPrice = { 'c6220':880.0, 'r620':240.0, 'r630':240.0, 'r640':0.0, 'r730':240.0, 'r940':0.0, 'c6320':880.0, 'c4310':240.0, 'precision3630': 0.0 } hddUpgradePrice2To = { 'c6220':320.0, 'r620':-20.0, 'r630':0.0, 'r640':70.0, 'r730':0.0, 'r940':70.0, 'c6320':0.0, 'c4310':0.0, 'precision3630': 0.0} def getColorCodeFromItemLabel(label): generation=label[-1] (model, proc_id) = re.split('_', label) saturation = { 'sandy bridge':0.0, 'ivy bridge':0.2, 'haswell':0.2, 'broadwell':0.2, 'skylake':0.4, 'coffeelake':0.6, 'cascadelake':1.0 }[get_proc_architecture(proc_id)] # if model == 'r620': # color = 'r' # elif model == 'r630': # color = 'g' # elif model == 'r730': # color = 'm' # elif model == 'c6220': # if generation == '2': # color = 'b' # else: # color = 'y' hue = { 'r620': 0.6, 'r630': 0.6, 'r640': 0.6, 'c4310': 0.6, 'r730': 0.4, 'r940': 0.8, 'c6220': 1.0, 'c6320': 1.0, 'precision3630': 0.2 }[model] value = 0.9 return matplotlib.colors.hsv_to_rgb((hue, saturation, value)) def get_marker_from_label(label): (model, proc_id) = re.split('_', label) return get_marker(proc_id) itemPrice = numpy.array([]) itemPowerConsumption = numpy.array([]) itemSpeed = numpy.array([]) itemLabel = numpy.array([]) itemGeneration = numpy.array([]) for hostTypeId, procId, procOptionPrice in zip(dellPriceTable['host_type_id'], dellPriceTable['proc_id'], dellPriceTable['proc_option_price']): #print(hostTypeId) #if hostTypeId == 'r630': # continue proc_arch = get_proc_architecture(procId) if not proc_arch in ['coffeelake', 'skylake','cascadelake']: continue itemGeneration = procId[-1] itemLabel = numpy.append( itemLabel, hostTypeId + '_' + procId ) itemPrice = numpy.append( itemPrice, procOptionPrice + get_system_base_price(hostTypeId) + ramUpgradePrice128Gb[hostTypeId] + guarantee5YearsPrice[hostTypeId] + hddUpgradePrice2To[hostTypeId] ) if hostTypeId == 'c6220' or hostTypeId == 'c6320' : numServersPerContainer = 4 else: numServersPerContainer = 1 for id, clock, numCores, tdp, cpumark in zip(cpuTable['id'], cpuTable['clock'], cpuTable['num_cores'], cpuTable['tdp'], cpuTable['cpumark_1_cpu']): if id == procId: # print('found '+procId) break assert id == procId, 'Failed to find %s in cputable' % procId #print(tdp) if hostTypeId == 'precision3630': numProcsPerServer = 1 elif hostTypeId in ['r940']: # re.match('r9[0-9]0', hostTypeId): numProcsPerServer = 4 else: numProcsPerServer = 2 print(hostTypeId, numProcsPerServer) itemPowerConsumption = numpy.append( itemPowerConsumption, (tdp*numProcsPerServer+serverBasePowerConsumption)*numServersPerContainer ) # print(hostTypeId, procId, itemPowerConsumption[-1]) itemSpeed = numpy.append( itemSpeed, num_dp_flop_per_cycle(procId)*clock*1.e9*numCores*numProcsPerServer*numServersPerContainer) #itemSpeed = numpy.append( itemSpeed, GHzToMHz(clock)*numCores*numProcsPerServer*numServersPerContainer) #itemSpeed = numpy.append( itemSpeed, cpumark * numProcsPerServer*numServersPerContainer ) #pylab.plot(x, y, '+') #pylab.xlabel('speed/price ratio [core.MHz/euros]') #pylab.ylabel('speed/power consumption ratio [core.MHz/W]') #pylab.show() # or savefig() #print("items = ") #print(itemLabel) markerSize = 50 if False: plt.subplot(1,2,1) plt.subplots_adjust(bottom = 0.1) markersCycler = itertools.cycle(itertools.product(markerTypes, markerColors)) x = itemSpeed / itemPrice y = itemSpeed / itemPowerConsumption for label, x1, y1, power, speed, price, in zip(itemLabel, x, y, itemPowerConsumption, itemSpeed, itemPrice): marker = markersCycler.next() color = getColorCodeFromItemLabel(label) plt.scatter( x1, y1, color = color, s = markerSize, marker = marker[0], label = label) #print(x1, y1, color, markerSize, marker[0], label) if False: plt.scatter( x, y, marker = 'o') for label, x1, y1, power, speed, price, in zip(itemLabel, x, y, itemPowerConsumption, itemSpeed, itemPrice): #print(label) plt.annotate( u'%s (%.1f core.GHz, %.0f W, %.0f €)' % (label,speed/1000.0, power, price), xy = (x1, y1), xytext = (-50, 50), textcoords = 'offset points', ha = 'right', va = 'bottom', bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5), arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0')) plt.xlabel(u'speed/price ratio [core.MHz/€]') plt.ylabel(u'speed/power consumption ratio [core.MHz/W]') plt.xlim( xmin = 0.0 ) plt.ylim( ymin = 0.0 ) plt.subplot(1,2,1) #fig = plt.figure() #ax = fig.gca() #ax.set_xticks(numpy.arange(0,1,0.1)) #ax.set_yticks(numpy.arange(0,1.,0.1)) powerUsedInLifetime = (itemPowerConsumption * containerLifetime * 365 * 24) / powerUsageEfficiency itemTotalCost = itemPrice + (powerUsedInLifetime / 1000.0 * kWHPrice ) markersCycler = itertools.cycle(itertools.product(markerTypes, markerColors)) item_flops = itemSpeed # print item_flops item_total_num_ops = item_flops * containerLifetime * 365 * 24 * 3600 # print(itemPrice) x = itemPrice y = item_total_num_ops / itemTotalCost for i in range(len(itemLabel)): print(itemLabel[i], itemPrice[i], y[i]) print('itemTotalCost', itemTotalCost[i]) print('flops', item_flops[i]) # print y for label, x1, y1, power, speed, price, in zip(itemLabel, x, y, itemPowerConsumption, itemSpeed, itemPrice): if y1 > 0.0001: color = getColorCodeFromItemLabel(label) # marker = markersCycler.next() marker = get_marker_from_label( label ) #print(x1, y1) plt.scatter( x1, y1, facecolors = color, s = markerSize, marker = marker[0], label = label) if y1 > 5.7e16: plt.annotate( u'%s' % label, xy = (x1, y1), xytext = (x1*4.0, (y1-5.5e16)*7.1), textcoords = 'data', ha = 'right', va = 'bottom', bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5), arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0')) plt.xlabel(u'purchase price [€]') plt.ylabel(u'num total DP operations/total cost [€/^-1]') plt.title(u'total cost including electricity') plt.xlim( xmin = 0.0 ) plt.ylim( ymin = 0.0 ) plt.minorticks_on() plt.grid(b=True, which='major', color='b', linestyle='-', linewidth=0.5) plt.grid(b=True, which='minor', color='b', linestyle='-', linewidth=0.2) plt.legend(bbox_to_anchor=(1.1, 1.1), ncol=3) plt.draw() plt.show() #plotCpuPassmark(): plotSystemEfficiency()