Bug 2120 - ajouter des stats sur l'évolution du cluster

- the cluster evolution graphs are now displayed on the intranet :
   - I had to make python graphing code handle debian 7 version of matplotlib; this was rather painful)
   - I had to handle the case where the default matplotlib backend doesn't work (because of no display)
This commit is contained in:
Guillaume Raffy 2018-02-08 10:49:39 +00:00
parent 7ca596a609
commit c309da0ef4
2 changed files with 304 additions and 1 deletions

304
cluster_stats.py Normal file
View File

@ -0,0 +1,304 @@
# encoding: utf-8
import sys
import os
import re
import datetime
import numpy as np
# fix to prevent the following error when run from www-data
# Failed to create /var/www/.matplotlib; consider setting MPLCONFIGDIR to a writable directory for matplotlib configuration data
# root@intranet:~# echo ~www-data
# /var/www
os.environ['MPLCONFIGDIR'] = "/tmp/cluster_stats"
import matplotlib
# fix to TclError at /cluster/ClusterEvolution/
# no display name and no $DISPLAY environment variable
# https://matplotlib.org/tutorials/introductory/usage.html#what-is-a-backend
r = os.system('python -c "import matplotlib.pyplot as plt;plt.figure()"')
if r != 0:
matplotlib.use('Agg') # use Anti Grain Geometry backend for non-interactive rendering into pngs, svg, etc...
import matplotlib.pyplot as plt
#import matplotlib.pyplot as plt
import matplotlib.dates
import abc
from SimpaDbUtil import SqlDatabaseReader, SqlFile
from inventory import Inventory
def get_investment_over_time(time_value, price, purchase_time):
percent_decay_per_day = 0.0 # 1.0/(7.0*365.0)
f1 = (purchase_time-time_value)*percent_decay_per_day+1.0
f2 = np.where( f1 < 0.0, 0.0, f1 )
f3 = np.where( time_value < purchase_time, 0.0, f2 )
return f3 * price
def get_flops_over_time(inventory, time_value, computer_serial_number, purchase_time):
"""
:param Inventory inventory:
"""
return np.where( time_value < purchase_time, 0.0, inventory.get_computer_dflops(computer_serial_number) )
def get_flops_price_over_time(inventory, time_value):
"""
:param Inventory inventory: the inventory database
"""
rows = inventory.query("SELECT * FROM machines")
def get_key(item):
return item['time']
flops_prices = []
for row in rows:
(name, serial_number, affectation, machine_spec_id, command_id, price_ex_vat, pos_x, pos_y, pos_z, inv_number)=row
is_cluster_node = re.match('^simpatix[0-9]+$', name)
if is_cluster_node:
purchase_date = inventory.get_machine_purchase_date(serial_number)
if purchase_date is not None:
# print(name, price_ex_vat)
purchase_time = matplotlib.dates.date2num(purchase_date.date())
computer_flops = inventory.get_computer_dflops(serial_number )
flops_price = ( price_ex_vat-inventory.get_computer_options_price(name) ) / computer_flops
# print ( purchase_date, name, price_ex_vat, computer_flops, flops_price )
flops_prices.append({'time':purchase_time, 'flops_price':flops_price, 'purchase_date':purchase_date})
flops_prices = sorted(flops_prices, key=get_key)
flops_price_over_time = np.where( True, 0.0, 0.0 )
for item in flops_prices:
# print(item)
flops_price_over_time = np.where( time_value < item['time'], flops_price_over_time, item['flops_price'])
return flops_price_over_time
def get_computer_value_over_time(inventory, computer_serial_number, time_value, flops_price_over_time, purchase_time):
# print('flops_price_over_time = ', flops_price_over_time)
computer_flops = inventory.get_computer_dflops(computer_serial_number)
computer_flops_over_time = np.where(time_value < purchase_time, 0.0, computer_flops)
computer_value_over_time = computer_flops_over_time * flops_price_over_time
return computer_value_over_time
# def stackplot(ax, x_signal, y_signals):
# """
# :param matplotlib.Axes ax:
# :param numpy.array x_signal:
# :param dict(str,numpy.array) y_signals:
# """
# # matplot 1.1.1 doesn't have the stackplot method in Axes
# if 'toto_stackplot' in dir(ax):
# ax.stackplot(x_signal, list(y_signals.itervalues()) )
# plt.legend(list(y_signals.keys()))
# else:
# colors = ['blue', 'orange', 'green', 'purple', 'yellow']
# # emulating missing Axes.stackplot method
# y = np.row_stack(list(y_signals.itervalues()))
# # this call to 'cumsum' (cumulative sum), passing in your y data,
# # is necessary to avoid having to manually order the datasets
# y_stack = np.cumsum(y, axis=0) # a 3x10 array
# for series_index in range(len(y_signals)):
# if series_index == 0:
# from_signal = 0
# else:
# from_signal = y_stack[series_index-1,:]
# ax.fill_between(x_signal, from_signal, y_stack[series_index,:], color=colors[series_index], lw=0.0, label=y_signals.keys()[series_index])
# plt.legend()
def stackplot(ax, x_signal, y_signals):
"""
:param matplotlib.Axes ax:
:param numpy.array x_signal:
:param dict(str,numpy.array) y_signals:
"""
if 'stackplot' in dir(ax):
ax.stackplot(x_signal, list(y_signals.itervalues()) )
plt.legend(list(y_signals.keys()))
else:
# emulating missing Axes.stackplot method
colors = ['blue', 'orange', 'green', 'purple', 'yellow']
y = np.row_stack(list(y_signals.itervalues()))
# this call to 'cumsum' (cumulative sum), passing in your y data,
# is necessary to avoid having to manually order the datasets
y_stack = np.cumsum(y, axis=0) # a 3x10 array
for series_index in range(len(y_signals)):
if series_index == 0:
from_signal = 0
else:
from_signal = y_stack[series_index-1,:]
ax.fill_between(x_signal, from_signal, y_stack[series_index,:], color=colors[series_index], lw=0.0)
p = plt.Rectangle((0, 0), 0, 0, color=colors[series_index])
ax.add_patch(p)
plt.legend(list(y_signals.keys()))
def draw_cluster_value_over_time_graph(inventory, from_date, to_date, graph_type):
time_value = matplotlib.dates.drange(dstart=from_date, dend=to_date, delta=datetime.timedelta(days=1))
flops_price_over_time = get_flops_price_over_time(inventory, time_value)
cluster_value = {}
rows = inventory.query("SELECT * FROM machines")
for row in rows:
(name, serial_number, affectation, machine_spec_id, command_id, price_ex_vat, pos_x, pos_y, pos_z, inv_number)=row
is_cluster_node = re.match('^simpatix[0-9]+$', name)
if is_cluster_node:
purchase_date = inventory.get_machine_purchase_date(serial_number)
if purchase_date is not None:
# print(name, price_ex_vat)
purchase_time = matplotlib.dates.date2num(purchase_date.date())
item_value_over_time = {
'cluster_cost_over_time':get_investment_over_time(time_value, price_ex_vat, purchase_time),
'cluster_value_over_time':get_computer_value_over_time(inventory, serial_number, time_value, flops_price_over_time, purchase_time),
'cluster_dp_gflops_over_time':get_flops_over_time(inventory, time_value, serial_number, purchase_time)}[graph_type]
for ownership in inventory.get_item_ownership(serial_number):
# print(ownership)
# print(ownership['owner'], ownership['owner_ratio'])
owner = ownership['owner']
owner_dept = owner.split('.')[1]
# if owner_dept == 'matnano':
# print(name, owner, purchase_date, price_ex_vat)
if owner_dept in cluster_value.keys():
cluster_value[owner_dept] += item_value_over_time
else:
cluster_value[owner_dept] = np.zeros_like(time_value)
# print(purchase_date)
# print(type(from_date))
# print(type(to_date))
# X = np.linspace(-np.pi, np.pi, 256, endpoint=True)
# C,S = np.cos(X), np.sin(X)
fig, ax = plt.subplots()
ax.set_title(graph_type)
#for dept, cluster_value_for_dept in cluster_value.iteritems():
# ax.plot(time_value, cluster_value_for_dept)
stackplot( ax, time_value, cluster_value)
plt.xlabel('time')
plt.ylabel(
{'cluster_cost_over_time':u'cluster investment (€)',
'cluster_value_over_time':u'cluster value (€)',
'cluster_dp_gflops_over_time':u'double prec gflops'}[graph_type])
years = matplotlib.dates.YearLocator() # every year
months = matplotlib.dates.MonthLocator() # every month
yearsFmt = matplotlib.dates.DateFormatter('%Y')
# format the ticks
ax.xaxis.set_major_locator(years)
ax.xaxis.set_major_formatter(yearsFmt)
ax.xaxis.set_minor_locator(months)
datemin = datetime.date(from_date.year, 1, 1)
datemax = datetime.date(to_date.year + 1, 1, 1)
ax.set_xlim(datemin, datemax)
# rotates and right aligns the x labels, and moves the bottom of the
# axes up to make room for them
# fig.autofmt_xdate()
ax.grid(True)
#plt.plot()
# plt.plot(X,S)
return fig
def draw_dp_gflops_price_over_time_over_time_graph(inventory, from_date, to_date):
"""
:param Inventory inventory: the inventory database
:param datetime from_time:
:param datetime to_time:
"""
time_value = matplotlib.dates.drange(dstart=from_date, dend=to_date, delta=datetime.timedelta(days=1))
flops_price_over_time = get_flops_price_over_time(inventory, time_value)
fig, ax = plt.subplots()
ax.set_yscale('log')
ax.plot(time_value, flops_price_over_time)
ax.set_xlabel('time')
ax.set_ylabel(u'double precision flops price (€/gflops)')
ax.set_title('gflops_price_over_time')
years = matplotlib.dates.YearLocator() # every year
months = matplotlib.dates.MonthLocator() # every month
yearsFmt = matplotlib.dates.DateFormatter('%Y')
# format the ticks
ax.xaxis.set_major_locator(years)
ax.xaxis.set_major_formatter(yearsFmt)
ax.xaxis.set_minor_locator(months)
ax.grid(True)
return fig
class IFigureHandler(object):
"""
specifies what to do with generated figures
"""
@abc.abstractmethod
def on_figure_ended(self, fig):
"""
:param matplotlib.Figure fig:
"""
pass
@abc.abstractmethod
def on_finalize(self):
"""
called after all figures have been created
"""
pass
class ScreenFigureHandler(IFigureHandler):
"""
displays figures on screen
"""
def __init__(self):
pass
def on_figure_ended(self, fig):
pass
def on_finalize(self):
plt.show()
class SvgFigureHandler(IFigureHandler):
"""
saves figures as svg files
"""
def __init__(self):
pass
def __init__(self, out_svg_dir_path):
"""
:param str out_svg_dir_path: where to save the svg files
"""
self._out_svg_dir_path = out_svg_dir_path
def on_figure_ended(self, fig):
fig.savefig(self._out_svg_dir_path + '/' + fig.axes[0].get_title() + '.svg')
def on_finalize(self):
pass
def draw_graphs(inventory, from_time, to_time, figure_handler):
"""
:param Inventory inventory: the inventory database
:param datetime from_time:
:param datetime to_time:
:param IFigureHandler figure_handler:
"""
fig = draw_cluster_value_over_time_graph(inventory, from_time.date(), to_time.date(), 'cluster_value_over_time')
figure_handler.on_figure_ended(fig)
fig = draw_cluster_value_over_time_graph(inventory, from_time.date(), to_time.date(), 'cluster_dp_gflops_over_time')
figure_handler.on_figure_ended(fig)
fig = draw_cluster_value_over_time_graph(inventory, from_time.date(), to_time.date(), 'cluster_cost_over_time')
figure_handler.on_figure_ended(fig)
fig = draw_dp_gflops_price_over_time_over_time_graph(inventory, from_time.date(), to_time.date())
figure_handler.on_figure_ended(fig)
figure_handler.on_finalize()

View File

@ -3,7 +3,6 @@ import sys
import os
import re
import datetime
import numpy as np
from SimpaDbUtil import SqlDatabaseReader, SqlFile