cocluto/cluster_stats.py

374 lines
14 KiB
Python

# encoding: utf-8
# import sys
import os
import re
import datetime
import numpy as np
import colorsys
# fix to prevent the following error when run from www-data
# Failed to create /var/www/.matplotlib; consider setting MPLCONFIGDIR to a writable directory for matplotlib configuration data
# root@intranet:~# echo ~www-data
# /var/www
os.environ['MPLCONFIGDIR'] = "/tmp/cluster_stats"
import matplotlib
# fix to TclError at /cluster/ClusterEvolution/
# no display name and no $DISPLAY environment variable
# https://matplotlib.org/tutorials/introductory/usage.html#what-is-a-backend
r = os.system('python -c "import matplotlib.pyplot as plt;plt.figure()"')
if r != 0:
matplotlib.use('Agg') # use Anti Grain Geometry backend for non-interactive rendering into pngs, svg, etc...
import matplotlib.pyplot as plt
# import matplotlib.pyplot as plt
import matplotlib.dates
import abc
# from SimpaDbUtil import SqlDatabaseReader, SqlFile
# from inventory import Inventory
def is_cluster_node_name(name):
return re.match('^simpatix[0-9]+$', name) is not None or re.match('^physix[0-9]+$', name) is not None
def get_investment_over_time(time_value, price, purchase_time):
percent_decay_per_day = 0.0 # 1.0/(7.0*365.0)
f1 = (purchase_time - time_value) * percent_decay_per_day + 1.0
f2 = np.where(f1 < 0.0, 0.0, f1)
f3 = np.where(time_value < purchase_time, 0.0, f2)
return f3 * price
def get_flops_over_time(inventory, time_value, computer_id, purchase_time):
"""
:param Inventory inventory:
"""
return np.where(time_value < purchase_time, 0.0, inventory.get_computer_dflops(computer_id))
def get_flops_price_over_time(inventory, time_value):
"""
:param Inventory inventory: the inventory database
"""
rows = inventory.query("SELECT * FROM machines")
def get_key(item):
return item['time']
flops_prices = []
for row in rows:
(name, serial_number, affectation, machine_spec_id, command_id, price_ex_vat, pos_x, pos_y, pos_z, inv_number) = row
is_cluster_node = is_cluster_node_name(name)
if is_cluster_node:
purchase_date = inventory.get_machine_purchase_date(name)
if purchase_date is not None:
# print(name, price_ex_vat)
purchase_time = matplotlib.dates.date2num(purchase_date.date())
computer_flops = inventory.get_computer_dflops(name)
flops_price = (price_ex_vat - inventory.get_computer_options_price(name)) / computer_flops
# print ( purchase_date, name, price_ex_vat, computer_flops, flops_price )
flops_prices.append({'time': purchase_time, 'flops_price': flops_price, 'purchase_date': purchase_date})
flops_prices = sorted(flops_prices, key=get_key)
flops_price_over_time = np.where(True, 0.0, 0.0)
for item in flops_prices:
# print(item)
flops_price_over_time = np.where(time_value < item['time'], flops_price_over_time, item['flops_price'])
return flops_price_over_time
def get_computer_value_over_time(inventory, computer_id, time_value, flops_price_over_time, purchase_time):
# print('flops_price_over_time = ', flops_price_over_time)
computer_flops = inventory.get_computer_dflops(computer_id)
computer_flops_over_time = np.where(time_value < purchase_time, 0.0, computer_flops)
computer_value_over_time = computer_flops_over_time * flops_price_over_time
return computer_value_over_time
# def stackplot(ax, x_signal, y_signals):
# """
# :param matplotlib.Axes ax:
# :param numpy.array x_signal:
# :param dict(str,numpy.array) y_signals:
# """
# # matplot 1.1.1 doesn't have the stackplot method in Axes
# if 'toto_stackplot' in dir(ax):
# ax.stackplot(x_signal, list(y_signals.itervalues()) )
# plt.legend(list(y_signals.keys()))
# else:
# colors = ['blue', 'orange', 'green', 'purple', 'yellow']
# # emulating missing Axes.stackplot method
# y = np.row_stack(list(y_signals.itervalues()))
# # this call to 'cumsum' (cumulative sum), passing in your y data,
# # is necessary to avoid having to manually order the datasets
# y_stack = np.cumsum(y, axis=0) # a 3x10 array
# for series_index in range(len(y_signals)):
# if series_index == 0:
# from_signal = 0
# else:
# from_signal = y_stack[series_index-1,:]
# ax.fill_between(x_signal, from_signal, y_stack[series_index,:], color=colors[series_index], lw=0.0, label=y_signals.keys()[series_index])
# plt.legend()
def get_rgb_palette(num_colors, saturation=0.5, value=0.5):
hsv_tuples = [(x * 1.0 / num_colors, saturation, value) for x in range(num_colors)]
rgb_tuples = map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)
return rgb_tuples
def stackplot(ax, x_signal, y_signals, legend_location='best'):
"""
:param matplotlib.Axes ax:
:param numpy.array x_signal:
:param dict(str,numpy.array) y_signals:
:param str legend_location: one of the values allowed in loc argument of matplotlib's plt.legend() function, or:
'outside right': outside the graph, at its right
"""
if 'stackplot' in dir(ax):
ax.stackplot(x_signal, list(y_signals.values()))
plt.legend(list(y_signals.keys()))
else:
# emulating missing Axes.stackplot method
colors = get_rgb_palette(num_colors=len(y_signals), saturation=1.0, value=0.8) # ['blue', 'orange', 'green', 'purple', 'yellow', 'cyan']
y = np.row_stack(list(y_signals.itervalues()))
# this call to 'cumsum' (cumulative sum), passing in your y data,
# is necessary to avoid having to manually order the datasets
y_stack = np.cumsum(y, axis=0) # a 3x10 array
for series_index in range(len(y_signals)):
if series_index == 0:
from_signal = 0
else:
from_signal = y_stack[series_index - 1, :]
ax.fill_between(x_signal, from_signal, y_stack[series_index, :], color=colors[series_index], lw=0.0)
p = plt.Rectangle((0, 0), 0, 0, color=colors[series_index])
ax.add_patch(p)
if legend_location == 'outside right':
plt.legend(list(y_signals.keys()), bbox_to_anchor=(1.10, 1.00), loc='upper left') # force the legend into the bounding box
else:
plt.legend(list(y_signals.keys()), loc=legend_location)
def draw_cluster_value_over_time_graph(inventory, from_date, to_date, graph_type):
time_value = matplotlib.dates.drange(dstart=from_date, dend=to_date, delta=datetime.timedelta(days=1))
flops_price_over_time = get_flops_price_over_time(inventory, time_value)
cluster_value = {}
rows = inventory.query("SELECT * FROM machines")
for row in rows:
(name, serial_number, affectation, machine_spec_id, command_id, price_ex_vat, pos_x, pos_y, pos_z, inv_number) = row
is_cluster_node = is_cluster_node_name(name)
if is_cluster_node:
purchase_date = inventory.get_machine_purchase_date(name)
if purchase_date is not None:
# print(name, price_ex_vat)
purchase_time = matplotlib.dates.date2num(purchase_date.date())
item_value_over_time = {
'cluster_cost_over_time': get_investment_over_time(time_value, price_ex_vat, purchase_time),
'cluster_value_over_time': get_computer_value_over_time(inventory, name, time_value, flops_price_over_time, purchase_time),
'cluster_dp_gflops_over_time': get_flops_over_time(inventory, time_value, name, purchase_time)}[graph_type]
for ownership in inventory.get_item_ownership(name):
# print(ownership)
# print(ownership['owner'], ownership['owner_ratio'])
owner = ownership['owner']
owner_dept = '.'.join(owner.split('.')[0:2])
# if owner_dept == 'matnano':
# print(name, owner, purchase_date, price_ex_vat)
if owner_dept in cluster_value.keys():
cluster_value[owner_dept] += item_value_over_time
else:
cluster_value[owner_dept] = item_value_over_time # np.zeros_like(time_value)
# print(purchase_date)
# print(type(from_date))
# print(type(to_date))
# X = np.linspace(-np.pi, np.pi, 256, endpoint=True)
# C,S = np.cos(X), np.sin(X)
fig, ax = plt.subplots()
ax.set_title(graph_type)
# for dept, cluster_value_for_dept in cluster_value.iteritems():
# ax.plot(time_value, cluster_value_for_dept)
stackplot(ax, time_value, cluster_value, legend_location='upper left')
plt.xlabel('time')
plt.ylabel({
'cluster_cost_over_time': u'cluster investment (€)',
'cluster_value_over_time': u'cluster value (€)',
'cluster_dp_gflops_over_time': u'double prec gflops'}[graph_type])
datemin = datetime.date(from_date.year, 1, 1)
datemax = datetime.date(to_date.year + 1, 1, 1)
ax.set_xlim(datemin, datemax)
max_num_years = 8
num_years = (datemax - datemin).days // 365
year_step = num_years / max_num_years
years = matplotlib.dates.YearLocator(year_step) # every year_step year
months = matplotlib.dates.MonthLocator() # every month
yearsFmt = matplotlib.dates.DateFormatter('%Y')
# format the ticks
ax.xaxis.set_major_locator(years)
ax.xaxis.set_major_formatter(yearsFmt)
ax.xaxis.set_minor_locator(months)
# rotates and right aligns the x labels, and moves the bottom of the
# axes up to make room for them
# fig.autofmt_xdate()
ax.grid(True)
# plt.plot()
# plt.plot(X,S)
return fig
def draw_dp_gflops_price_over_time_over_time_graph(inventory, from_date, to_date):
"""
:param Inventory inventory: the inventory database
:param datetime from_time:
:param datetime to_time:
"""
time_value = matplotlib.dates.drange(dstart=from_date, dend=to_date, delta=datetime.timedelta(days=1))
gflops_price_over_time = get_flops_price_over_time(inventory, time_value) * 1.0e9
fig, ax = plt.subplots()
ax.set_yscale('log')
ax.plot(time_value, gflops_price_over_time)
ax.set_xlabel('time')
ax.set_ylabel(u'double precision flops price (€/gflops)')
ax.set_title('gflops_price_over_time')
years = matplotlib.dates.YearLocator() # every year
months = matplotlib.dates.MonthLocator() # every month
yearsFmt = matplotlib.dates.DateFormatter('%Y')
# format the ticks
ax.xaxis.set_major_locator(years)
ax.xaxis.set_major_formatter(yearsFmt)
ax.xaxis.set_minor_locator(months)
ax.grid(True)
return fig
def draw_age_pyramid_graph(inventory):
"""
:param Inventory inventory: the inventory database
"""
oldest_age = 20
age_histogram = np.zeros(shape=(oldest_age))
rows = inventory.query("SELECT * FROM machines")
for row in rows:
(name, serial_number, affectation, machine_spec_id, command_id, price_ex_vat, pos_x, pos_y, pos_z, inv_number) = row
is_cluster_node = is_cluster_node_name(name)
if is_cluster_node:
purchase_date = inventory.get_machine_purchase_date(name)
if purchase_date is not None:
purchase_time = matplotlib.dates.date2num(purchase_date.date()) # noqa: F841
age = datetime.datetime.now() - purchase_date
age_histogram[age.days / 365] += 1
# print(name, age)
fig, ax = plt.subplots()
ax.bar(range(oldest_age), age_histogram)
ax.set_xlabel('age (in years)')
ax.set_xticks(range(oldest_age))
ax.set_ylabel(u'number of compute nodes')
ax.set_title('compute_nodes_age_pyramid')
# format the ticks
ax.grid(True)
return fig
class IFigureHandler(object):
"""
specifies what to do with generated figures
"""
@abc.abstractmethod
def on_figure_ended(self, fig):
"""
:param matplotlib.Figure fig:
"""
pass
@abc.abstractmethod
def on_finalize(self):
"""
called after all figures have been created
"""
pass
class ScreenFigureHandler(IFigureHandler):
"""
displays figures on screen
"""
def __init__(self):
pass
def on_figure_ended(self, fig):
pass
def on_finalize(self):
plt.show()
class SvgFigureHandler(IFigureHandler):
"""
saves figures as svg files
"""
def __init__(self, out_svg_dir_path):
"""
:param str out_svg_dir_path: where to save the svg files
"""
self._out_svg_dir_path = out_svg_dir_path
def on_figure_ended(self, fig):
fig.savefig(self._out_svg_dir_path + '/' + fig.axes[0].get_title() + '.svg')
def on_finalize(self):
pass
def draw_graphs(inventory, from_time, to_time, figure_handler):
"""
:param Inventory inventory: the inventory database
:param datetime from_time:
:param datetime to_time:
:param IFigureHandler figure_handler:
"""
fig = draw_cluster_value_over_time_graph(inventory, from_time.date(), to_time.date(), 'cluster_value_over_time')
figure_handler.on_figure_ended(fig)
fig = draw_cluster_value_over_time_graph(inventory, from_time.date(), to_time.date(), 'cluster_dp_gflops_over_time')
figure_handler.on_figure_ended(fig)
fig = draw_cluster_value_over_time_graph(inventory, from_time.date(), to_time.date(), 'cluster_cost_over_time')
figure_handler.on_figure_ended(fig)
fig = draw_dp_gflops_price_over_time_over_time_graph(inventory, from_time.date(), to_time.date())
figure_handler.on_figure_ended(fig)
fig = draw_age_pyramid_graph(inventory)
figure_handler.on_figure_ended(fig)
figure_handler.on_finalize()