concho/concho/hpev2.py

398 lines
21 KiB
Python

from typing import List, Tuple
from concho.config import IHtmlConfiguratorParser, Configurator, Module, Option, Cpu, Price, Dimm, SdramChip, DimmCas, Config, Chassis
from pathlib import Path
from lxml.html import HtmlElement, parse as parse_html
import re
import pandas as pd
import json
def parse_price(price_as_str: str) -> Price:
# 'EUR 1,092.65'
return Price(price_as_str.replace('EUR', '').replace(',', ''))
class Quantity():
num_selected: int # selected quantity
choices: List[int] # choice of quantities
def __init__(self, quantity_details: str):
'''
quantity_details: eg '2/[0, 2, 4, 6, 8, 12, 16]'
'''
parts = quantity_details.split('/')
self.num_selected = int(parts[0])
self.choices = json.loads(parts[1])
class HpeV2ConfiguratorParser(IHtmlConfiguratorParser):
# the body of the HpeV2 html pages look like this:
# <body style="">
# <div id="dataWrap" class="lang_en_US">
# <div id="theData" data-role="page">
# <div id="eocs_config_page" class="eocs_page hybris_punchout">
# <div id="sce_toolbar" class="sce-toolbar" style="display: none;">
# <div id="sce_toolbar_left" class="pull-left">
# <ul>
# <li id="sce_toolbar_new_config_normal" class="sce-toolbar-new-config ison"><a
# id="sce_toolbar_new_config">New Configuration</a></li>
# <li id="sce_toolbar_banding_normal" class="sce_toolbar_banding"><a
# id="sce_toolbar_banding">Banding</a></li>
# <li id="sce_toolbar_localization_normal" class="sce_toolbar_localization"><a
# id="sce_toolbar_localization">Localization</a></li>
# <li id="sce_toolbar_search_normal" class="sce_toolbar_search"><a
# id="sce_toolbar_search">Search</a></li>
# </ul>
# </div>
# <div id="sce_toolbar_right" class="pull-right">
# <ul>
# <li class="sce_toolbar_save_config"><span id="sce_toolbar_save_config_icon"
# class="tool-bar-icons sce-toolbar-save-config-icon"></span><input
# id="sce_toolbar_save_config" value="Save" type="button" style="font-size: 14px;"
# class="button slim critical"></li>
# </ul>
# </div>
# <li id="sce_toolbar_support_normal"><a id="sce_toolbar_support">Support</a></li>
# <div id="email_toolbar" class="pull-right"></div>
# <hr>
# </div>
# <div class="config_header">
# <div class="left_title_container">
# <div class="prod-titles-grp">
# <span id="main_title" title="DL380 Gen11 (Cat2 Conf16)" class="product-title pull-left">DL380 Gen11 (Cat2 Conf16)</span>
# <span class="product_subtitle">HPE ProLiant DL380 Gen11 8SFF NC Configure-to-order Server</span>
# </div>
# <div id="expandAll_collapseAll_buttonsDiv"><span id="expandAll_collapseAll"
# class="expandAll_button selected">Expand All</span><span id="angle_id"
# class="angle_class icon-angle-down"></span></div>
# </div>
# <div class="right_message_btns_container pull-left">
# <ul class="product-message-btns pull-right"><label id="config_your_product"></label>
# <li class="pull-left eocs_message_div"><span class="msg-icon hpenew-notes"></span><input
# id="see_all_system_message_div_id" type="button" value="Messages"
# class="all-messages-popup-btn"></li>
# </ul>
# </div>
# </div>
# <div id="center_div" class="column">
# <where all module tables are>
# </div>
# <div id="right_div" class="column">
# ...
# <section class="eocsrightsectiondiv">
# <section class="rsection1">
# <div id="referenceid">Reference Model ID: 46951646</div>
# <div id="price_div">
# <span id="rsection1_actprice" class="rsection_price">
# <label>Your price</label>
# <span class="currency_code">EUR</span>
# <span class="total_price">1,375.99</span>
# </span>
# <span id="rsection1_maxtime" class="rsection_maxtime" style="display: block;">
# <label><i>Estimated to ship in </i></label>
# <span id="max_leadtime" class="max_leadtime">15</span>
# <i class="max_leadtime"> business days </i>
# </span>
# <span id="ecoLablels" class="ecoLablels"><img id="eStar" class="eStar"><img id="ePeat"
# class="ePeat"></span></div>
# inside the <where all module tables are>:
# <div id="ProcessorSection_AdditionalProcessorsChoice" class="choice_container choice_type_multi_select" choicename="Processors">
# <div class="section_container">
# <span id="section_title">Processors</span>
# <div class="choicecontent FE-Carbon-Footprint__choicecontent FE-Occ-Configurator__choicecontent">
# <table id="ProcessorSection_AdditionalProcessorsChoice_table" class="tablemulti no_extended">
# <thead>
# <tr>
# <th class="tableheader description" width="300">Description</th>
# <th class="tableheader partNumber" width="120">Part Number</th>
# <th class="tableheader pcf">CO2e estimé</th>
# <th class="tableheader price">Price</th>
# <th class="tableheader quantity">Qty</th>
# ...
# </tr>
# </thead>
# <tbody>
# <tr id="P49597-B21" itemid="P49597-B21" class="qtyZero P49597-B21" style="">
# <td class="tabledetail radio-btn">
# ...
# </td>
# <td class="tabledetail ">
# <span id="item_description_ProcessorSection_AdditionalProcessorsChoice_P49597-B21" class="item_description pull-left">
# Intel Xeon-Gold 5415+ 2.9GHz 8-core 150W Processor for HPE
# </span>
# <span id="item_info_ProcessorSection_AdditionalProcessorsChoice_P49597-B21" class="hpenew-circle-information item_info_icon pull-left eocs_item_tooltip">
# <span class="eocs_item_tooltiptext">
# <li class="eocs_msg_text">Mixing of Processors is not allowed</li>
# </span>
# </span>
# <br>
# </td>
# <td class="tabledetail ">
# P49597-B21
# </td>
# <td class="tabledetail column_hpe_preferred FE-Carbon-Footprint__hide">
# </td>
# <td class="tabledetail column_recommended FE-Carbon-Footprint__hide">
# <span class="icon-checkmark lightgraycolor greencolor">
# </span>
# <br>
# </td>
# <td id="leadtime_P49597-B21" class="tabledetail thinLeadTime ">
# <div>
# <div style="background:Green; width:15px;height:15px;float:left;">
# </div>
# <div style="padding-left: 10px;float:left;">4D
# </div>
# </div>
# </td>
# <td class="tabledetail endDate ">09/30/2025</td>
# <td class="tabledetail pcf">
# <div class="FE-Carbon-Footprint__cf-row" title="CO2e / each">133 kg CO2e</div>
# </td>
# <td id="item_price_ProcessorSection_AdditionalProcessorsChoice_P49597-B21" class="tabledetail price ">
# <span>EUR 1,092.65</span>
# </td>
# <td class="tabledetail">
# <select id="item_dropdown_ProcessorSection_AdditionalProcessorsChoice_P49597-B21" class="tabledetailqtyselect" style="border:none;appearance:none;" disabled="disabled">
# <option value="0" selected="selected">0</option>
# <option value="1">1</option>
# <option value="2">2</option>
# </select>
# </td>
# </tr>
def __init__(self):
pass
def get_module_label(self, module_id):
return {
'cpu_change': 'Processeurs (Passage)',
'additional_cpus': 'ProcessorSection_AdditionalProcessorsChoice', # <div id="ProcessorSection_AdditionalProcessorsChoice" class="choice_container choice_type_multi_select" choicename="Processors">
'ram': 'memory_memorySlotsChoice',
}[module_id]
def get_xpath_filter(self, filter_id):
return {
'root_to_modules_element': ".//div[@class='choice_section_div']", # <div id="choice_section_div" class="choice_section_div">
'modules_element_to_modules': ".//div[@class='choice_container choice_type_multi_select']", # <div id="ProcessorSection_AdditionalProcessorsChoice" class="choice_container choice_type_multi_select" choicename="Processors">
'module_to_blue_title': ".//header",
'module_to_grey_title': ".//div[@class='col-md-4 module-title color-808080']",
'module_to_options': ".//div[@class='product-options-configuration-line']",
'option_to_label': ".//div[@class='option-info']",
'option_to_price': ".//div[@class='option-price']",
'base_module_to_label': ".//div[@class='product-options-configuration-block option-selected']",
}[filter_id]
@staticmethod
def _parse_html_table(table_root_element: HtmlElement) -> pd.DataFrame:
table_as_dict = {}
thead_element = table_root_element.xpath(".//thead")[0]
col_labels = []
ignored_classes = {
'tableheader',
'FE-Carbon-Footprint__hide'
}
for th_element in thead_element.xpath(".//th"):
classes = th_element.get('class')
col_label_found = False
for cl in classes.split(' '):
if cl not in ignored_classes:
col_labels.append(cl)
table_as_dict[cl] = []
col_label_found = True
break
assert col_label_found, f'failed to find a valid column label in {classes}'
print(col_labels)
tbody_element = table_root_element.xpath(".//tbody")[0]
for tr_element in tbody_element.xpath(".//tr"):
td_elements = tr_element.xpath(".//td")
assert len(td_elements) == len(col_labels)
icol = 0
for td_element in td_elements:
col_label = col_labels[icol]
if col_label == 'quantity':
# <select id="item_dropdown_memory_memorySlotsChoice_P43328-B21" class="tabledetailqtyselect">
# <option value="0">0</option>
# <option value="2" selected="selected">2</option>
# <option value="4">4</option>
# <option value="6">6</option>
# <option value="8">8</option>
# <option value="12">12</option>
# <option value="16">16</option>
# </select>
options = []
selected_quantity = None
for option_element in td_element.xpath(".//option"):
quantity_choice = int(option_element.get('value'))
options.append(quantity_choice)
selected_as_str = option_element.get('selected')
print(selected_as_str)
if selected_as_str:
selected_quantity = quantity_choice
cell_value = f'{selected_quantity}/{options}'
else:
cell_value = ''.join(td_element.itertext()).replace('\t', '').replace('\n', ' ')
table_as_dict[col_label].append(cell_value)
icol += 1
# print(table_as_dict)
table = pd.DataFrame(table_as_dict)
print(table)
return table
def _get_module(self, root_element: HtmlElement, module_id: str) -> HtmlElement:
'''
'''
modules_element = root_element.xpath(self.get_xpath_filter('root_to_modules_element'))[0]
# print(modules_element)
module_label = self.get_module_label(module_id) # eg ProcessorSection_AdditionalProcessorsChoice
print(f'module label: {module_label}')
module_root = modules_element.xpath(f".//div[@id='{module_label}']")[0]
return module_root
def _parse_module_html_table(self, html_root: HtmlElement, module_id: str) -> pd.DataFrame:
'''
module_id: eg 'additional_cpus'
'''
module_root_element = self._get_module(html_root, module_id)
assert module_root_element is not None
table_root = module_root_element.xpath(".//table")[0]
table = HpeV2ConfiguratorParser._parse_html_table(table_root)
return table
def _parse_proc_options(self, proc_module_table: pd.DataFrame) -> Tuple[Module, List[Cpu]]:
proc_options = Module('processor')
selected_procs = []
# module_root_element = self._get_module(html_root, 'Processeurs (Passage)')
for row_index, row in proc_module_table.iterrows():
print(f'row = {row}')
label = row['description']
cpu_price = parse_price(row['price'])
match = re.match(r'^ *Intel Xeon-(?P<cpu_class>Bronze|Silver|Gold|Platinum) (?P<cpu_number>[0-9][0-9][0-9][0-9][HNPQRSLUVY]?[+]?).*', label)
assert match, 'unhandled label : %s' % label
# print(match['cpu_class'], match['cpu_number'])
cpu_class = match['cpu_class'].lower()
cpu_id = "intel-xeon-%s-%s" % (cpu_class, match['cpu_number'].lower())
cpu = Cpu(cpu_id)
option = Option(cpu, cpu_price)
for selected_item in range(Quantity(row['quantity']).num_selected):
selected_procs.append(cpu)
proc_options.add_option(option)
assert len(proc_options.options) > 0
return proc_options, selected_procs
def _parse_ram_options(self, ram_module_table: pd.DataFrame) -> Tuple[Module, List[Dimm]]:
ram_options = Module('ram')
selected_dimms = []
# module_root_element = self._get_module(html_root, 'Processeurs (Passage)')
for row_index, row in ram_module_table.iterrows():
print(f'row = {row}')
label = row['description'] # eg 'HPE 32GB (1x32GB) Dual Rank x8 DDR5-4800 CAS-40-39-39 EC8 Registered Smart Memory Kit'
match = re.match(r'^ *HPE (?P<total_num_gb>[0-9]+)GB \((?P<num_dimms>[1-9]+)x(?P<num_gb_per_dimm>[0-9]+)GB\) (?P<dimm_rank>Single|Dual|Quad|Octal) +Rank +x(?P<by>[48]) +DDR(?P<ddr_generation>[0-9]+)-(?P<mega_transfers_per_sec>[0-9]+) +CAS-(?P<cas1>[0-9]+)-(?P<cas2>[0-9]+)-(?P<cas3>[0-9]+) EC8 Registered.*', label)
assert match, 'unhandled label : %s' % label
assert int(match['num_dimms']) == 1
dimm_price = parse_price(row['price'])
sdram_chip = SdramChip('ddr', int(match['ddr_generation']), int(match['mega_transfers_per_sec']))
cas = DimmCas(int(match['cas1']), int(match['cas2']), int(match['cas3']))
dimm = Dimm(num_gb=int(match['num_gb_per_dimm']), sdram_chip=sdram_chip, cas=cas, mem_type='rdimm')
option = Option(dimm, dimm_price)
for selected_item in range(Quantity(row['quantity']).num_selected):
selected_dimms.append(dimm)
ram_options.add_option(option)
assert len(ram_options.options) > 0
return ram_options, selected_dimms
@staticmethod
def _parse_base_config_info(html_root: HtmlElement) -> Tuple[str, Price]:
prod_titles_grp_el = html_root.xpath(r".//div[@class='prod-titles-grp']")[0]
assert prod_titles_grp_el
# <div class="prod-titles-grp">
# <span id="main_title" title="DL380 Gen11 (Cat2 Conf16)" class="product-title pull-left">DL380 Gen11 (Cat2 Conf16)</span>
# <span class="product_subtitle">HPE ProLiant DL380 Gen11 8SFF NC Configure-to-order Server</span>
# </div>
product_subtitle = prod_titles_grp_el.xpath(r".//span[@class='product_subtitle']")[0].text.replace('\n', '').replace('\t', '') # eg HPE ProLiant DL380 Gen11 8SFF NC Configure-to-order Server
# print(product_subtitle)
match = re.match(r'^HPE ProLiant DL(?P<chassis_number>[3][68][05]) *Gen11 *8SFF *NC *Configure-to-order *Server$', product_subtitle)
assert match
chassis_number = match['chassis_number']
# configurator.base_config = self._parse_base_config()
chassis_id = f"hpe-proliant-dl{chassis_number}-gen11"
# <span class="total_price">1,375.99</span>
config_price_as_str = html_root.xpath(r".//span[@class='total_price']")[0].text
config_price = Price(config_price_as_str.replace(',', ''))
print(f'config_price = {config_price}')
return chassis_id, config_price
def parse(self, hpe_configurator_html_file_path: Path, configurator: Configurator):
'''
hpe_configurator_html_file_path : eg '/home/graffy/work/concho/catalogs/hpev2/20250314-cat2-conf16-hpe-dl380-gen11.html'
'''
hybris_file_path = hpe_configurator_html_file_path.parent / Path(str(hpe_configurator_html_file_path.stem) + '_files') / 'HybrisIntegrationLogin.html' # eg /home/graffy/work/concho/catalogs/hpev2/20250314-cat2-conf16-hpe-dl380-gen11_files/HybrisIntegrationLogin.html
# print(hybris_file_path)
html_root: HtmlElement = parse_html(str(hybris_file_path)).getroot()
# print(type(html_root))
proc_table = self._parse_module_html_table(html_root, 'additional_cpus')
proc_module, selected_procs = self._parse_proc_options(proc_table)
configurator.add_module(proc_module)
assert len(selected_procs) == 1
ram_table = self._parse_module_html_table(html_root, 'ram')
ram_module, selected_dimms = self._parse_ram_options(ram_table)
configurator.add_module(ram_module)
chassis_id, config_price = HpeV2ConfiguratorParser._parse_base_config_info(html_root)
selected_procs_price = sum([proc_module.options[cpu.uid].price for cpu in selected_procs])
selected_dimms_price = sum([ram_module.options[dimm.uid].price for dimm in selected_dimms])
chassis_price = config_price - selected_procs_price - selected_dimms_price
base_config = Config(configurator)
base_config.num_servers = 1
base_config.num_cpu_per_server = 2
print(f'selected_procs_price = {selected_procs_price}')
print(f'selected_dimms_price = {selected_dimms_price}')
print(f'chassis_price = {chassis_price}')
configurator.chassis = Option(Chassis(chassis_id), chassis_price)
base_config.set_cpu(selected_procs[0])
channel_index = 0
for dimm in selected_dimms:
base_config.cpu_slots_mem[0].mem_channels[channel_index].dimms.append(dimm)
channel_index += 1
configurator.base_config = base_config
# configurator.add_module(self._parse_ram_options(html_root))
# script_elements = html_root.xpath(".//script[@type='text/javascript']")
# print('number of javascript scripts:', len(script_elements))
# # script type="text/javascript"
# db_jscript = None