398 lines
21 KiB
Python
398 lines
21 KiB
Python
from typing import List, Tuple
|
|
from concho.config import IHtmlConfiguratorParser, Configurator, Module, Option, Cpu, Price, Dimm, SdramChip, DimmCas, Config, Chassis
|
|
from pathlib import Path
|
|
from lxml.html import HtmlElement, parse as parse_html
|
|
import re
|
|
import pandas as pd
|
|
import json
|
|
|
|
|
|
def parse_price(price_as_str: str) -> Price:
|
|
# 'EUR 1,092.65'
|
|
return Price(price_as_str.replace('EUR', '').replace(',', ''))
|
|
|
|
|
|
class Quantity():
|
|
num_selected: int # selected quantity
|
|
choices: List[int] # choice of quantities
|
|
|
|
def __init__(self, quantity_details: str):
|
|
'''
|
|
quantity_details: eg '2/[0, 2, 4, 6, 8, 12, 16]'
|
|
'''
|
|
parts = quantity_details.split('/')
|
|
self.num_selected = int(parts[0])
|
|
self.choices = json.loads(parts[1])
|
|
|
|
|
|
class HpeV2ConfiguratorParser(IHtmlConfiguratorParser):
|
|
# the body of the HpeV2 html pages look like this:
|
|
# <body style="">
|
|
# <div id="dataWrap" class="lang_en_US">
|
|
# <div id="theData" data-role="page">
|
|
# <div id="eocs_config_page" class="eocs_page hybris_punchout">
|
|
# <div id="sce_toolbar" class="sce-toolbar" style="display: none;">
|
|
# <div id="sce_toolbar_left" class="pull-left">
|
|
# <ul>
|
|
# <li id="sce_toolbar_new_config_normal" class="sce-toolbar-new-config ison"><a
|
|
# id="sce_toolbar_new_config">New Configuration</a></li>
|
|
# <li id="sce_toolbar_banding_normal" class="sce_toolbar_banding"><a
|
|
# id="sce_toolbar_banding">Banding</a></li>
|
|
# <li id="sce_toolbar_localization_normal" class="sce_toolbar_localization"><a
|
|
# id="sce_toolbar_localization">Localization</a></li>
|
|
# <li id="sce_toolbar_search_normal" class="sce_toolbar_search"><a
|
|
# id="sce_toolbar_search">Search</a></li>
|
|
# </ul>
|
|
# </div>
|
|
# <div id="sce_toolbar_right" class="pull-right">
|
|
# <ul>
|
|
# <li class="sce_toolbar_save_config"><span id="sce_toolbar_save_config_icon"
|
|
# class="tool-bar-icons sce-toolbar-save-config-icon"></span><input
|
|
# id="sce_toolbar_save_config" value="Save" type="button" style="font-size: 14px;"
|
|
# class="button slim critical"></li>
|
|
# </ul>
|
|
# </div>
|
|
# <li id="sce_toolbar_support_normal"><a id="sce_toolbar_support">Support</a></li>
|
|
# <div id="email_toolbar" class="pull-right"></div>
|
|
# <hr>
|
|
# </div>
|
|
# <div class="config_header">
|
|
# <div class="left_title_container">
|
|
# <div class="prod-titles-grp">
|
|
# <span id="main_title" title="DL380 Gen11 (Cat2 Conf16)" class="product-title pull-left">DL380 Gen11 (Cat2 Conf16)</span>
|
|
# <span class="product_subtitle">HPE ProLiant DL380 Gen11 8SFF NC Configure-to-order Server</span>
|
|
# </div>
|
|
# <div id="expandAll_collapseAll_buttonsDiv"><span id="expandAll_collapseAll"
|
|
# class="expandAll_button selected">Expand All</span><span id="angle_id"
|
|
# class="angle_class icon-angle-down"></span></div>
|
|
# </div>
|
|
# <div class="right_message_btns_container pull-left">
|
|
# <ul class="product-message-btns pull-right"><label id="config_your_product"></label>
|
|
# <li class="pull-left eocs_message_div"><span class="msg-icon hpenew-notes"></span><input
|
|
# id="see_all_system_message_div_id" type="button" value="Messages"
|
|
# class="all-messages-popup-btn"></li>
|
|
# </ul>
|
|
# </div>
|
|
# </div>
|
|
# <div id="center_div" class="column">
|
|
# <where all module tables are>
|
|
# </div>
|
|
# <div id="right_div" class="column">
|
|
# ...
|
|
# <section class="eocsrightsectiondiv">
|
|
# <section class="rsection1">
|
|
# <div id="referenceid">Reference Model ID: 46951646</div>
|
|
# <div id="price_div">
|
|
# <span id="rsection1_actprice" class="rsection_price">
|
|
# <label>Your price</label>
|
|
# <span class="currency_code">EUR</span>
|
|
# <span class="total_price">1,375.99</span>
|
|
# </span>
|
|
# <span id="rsection1_maxtime" class="rsection_maxtime" style="display: block;">
|
|
# <label><i>Estimated to ship in </i></label>
|
|
# <span id="max_leadtime" class="max_leadtime">15</span>
|
|
# <i class="max_leadtime"> business days </i>
|
|
# </span>
|
|
# <span id="ecoLablels" class="ecoLablels"><img id="eStar" class="eStar"><img id="ePeat"
|
|
# class="ePeat"></span></div>
|
|
|
|
# inside the <where all module tables are>:
|
|
|
|
# <div id="ProcessorSection_AdditionalProcessorsChoice" class="choice_container choice_type_multi_select" choicename="Processors">
|
|
# <div class="section_container">
|
|
# <span id="section_title">Processors</span>
|
|
# <div class="choicecontent FE-Carbon-Footprint__choicecontent FE-Occ-Configurator__choicecontent">
|
|
# <table id="ProcessorSection_AdditionalProcessorsChoice_table" class="tablemulti no_extended">
|
|
# <thead>
|
|
# <tr>
|
|
# <th class="tableheader description" width="300">Description</th>
|
|
# <th class="tableheader partNumber" width="120">Part Number</th>
|
|
# <th class="tableheader pcf">CO2e estimé</th>
|
|
# <th class="tableheader price">Price</th>
|
|
# <th class="tableheader quantity">Qty</th>
|
|
# ...
|
|
# </tr>
|
|
# </thead>
|
|
# <tbody>
|
|
# <tr id="P49597-B21" itemid="P49597-B21" class="qtyZero P49597-B21" style="">
|
|
# <td class="tabledetail radio-btn">
|
|
# ...
|
|
# </td>
|
|
|
|
# <td class="tabledetail ">
|
|
# <span id="item_description_ProcessorSection_AdditionalProcessorsChoice_P49597-B21" class="item_description pull-left">
|
|
# Intel Xeon-Gold 5415+ 2.9GHz 8-core 150W Processor for HPE
|
|
# </span>
|
|
|
|
# <span id="item_info_ProcessorSection_AdditionalProcessorsChoice_P49597-B21" class="hpenew-circle-information item_info_icon pull-left eocs_item_tooltip">
|
|
# <span class="eocs_item_tooltiptext">
|
|
# <li class="eocs_msg_text">Mixing of Processors is not allowed</li>
|
|
# </span>
|
|
# </span>
|
|
# <br>
|
|
# </td>
|
|
|
|
# <td class="tabledetail ">
|
|
# P49597-B21
|
|
# </td>
|
|
|
|
# <td class="tabledetail column_hpe_preferred FE-Carbon-Footprint__hide">
|
|
# </td>
|
|
|
|
# <td class="tabledetail column_recommended FE-Carbon-Footprint__hide">
|
|
# <span class="icon-checkmark lightgraycolor greencolor">
|
|
# </span>
|
|
# <br>
|
|
# </td>
|
|
|
|
# <td id="leadtime_P49597-B21" class="tabledetail thinLeadTime ">
|
|
# <div>
|
|
# <div style="background:Green; width:15px;height:15px;float:left;">
|
|
# </div>
|
|
# <div style="padding-left: 10px;float:left;">4D
|
|
# </div>
|
|
# </div>
|
|
# </td>
|
|
|
|
# <td class="tabledetail endDate ">09/30/2025</td>
|
|
|
|
# <td class="tabledetail pcf">
|
|
# <div class="FE-Carbon-Footprint__cf-row" title="CO2e / each">133 kg CO2e</div>
|
|
# </td>
|
|
|
|
# <td id="item_price_ProcessorSection_AdditionalProcessorsChoice_P49597-B21" class="tabledetail price ">
|
|
# <span>EUR 1,092.65</span>
|
|
# </td>
|
|
|
|
# <td class="tabledetail">
|
|
# <select id="item_dropdown_ProcessorSection_AdditionalProcessorsChoice_P49597-B21" class="tabledetailqtyselect" style="border:none;appearance:none;" disabled="disabled">
|
|
# <option value="0" selected="selected">0</option>
|
|
# <option value="1">1</option>
|
|
# <option value="2">2</option>
|
|
# </select>
|
|
|
|
# </td>
|
|
# </tr>
|
|
|
|
def __init__(self):
|
|
pass
|
|
|
|
def get_module_label(self, module_id):
|
|
return {
|
|
'cpu_change': 'Processeurs (Passage)',
|
|
'additional_cpus': 'ProcessorSection_AdditionalProcessorsChoice', # <div id="ProcessorSection_AdditionalProcessorsChoice" class="choice_container choice_type_multi_select" choicename="Processors">
|
|
'ram': 'memory_memorySlotsChoice',
|
|
}[module_id]
|
|
|
|
def get_xpath_filter(self, filter_id):
|
|
return {
|
|
'root_to_modules_element': ".//div[@class='choice_section_div']", # <div id="choice_section_div" class="choice_section_div">
|
|
'modules_element_to_modules': ".//div[@class='choice_container choice_type_multi_select']", # <div id="ProcessorSection_AdditionalProcessorsChoice" class="choice_container choice_type_multi_select" choicename="Processors">
|
|
'module_to_blue_title': ".//header",
|
|
'module_to_grey_title': ".//div[@class='col-md-4 module-title color-808080']",
|
|
'module_to_options': ".//div[@class='product-options-configuration-line']",
|
|
'option_to_label': ".//div[@class='option-info']",
|
|
'option_to_price': ".//div[@class='option-price']",
|
|
'base_module_to_label': ".//div[@class='product-options-configuration-block option-selected']",
|
|
}[filter_id]
|
|
|
|
@staticmethod
|
|
def _parse_html_table(table_root_element: HtmlElement) -> pd.DataFrame:
|
|
table_as_dict = {}
|
|
thead_element = table_root_element.xpath(".//thead")[0]
|
|
col_labels = []
|
|
|
|
ignored_classes = {
|
|
'tableheader',
|
|
'FE-Carbon-Footprint__hide'
|
|
}
|
|
|
|
for th_element in thead_element.xpath(".//th"):
|
|
classes = th_element.get('class')
|
|
col_label_found = False
|
|
for cl in classes.split(' '):
|
|
if cl not in ignored_classes:
|
|
col_labels.append(cl)
|
|
table_as_dict[cl] = []
|
|
col_label_found = True
|
|
break
|
|
assert col_label_found, f'failed to find a valid column label in {classes}'
|
|
print(col_labels)
|
|
|
|
tbody_element = table_root_element.xpath(".//tbody")[0]
|
|
for tr_element in tbody_element.xpath(".//tr"):
|
|
td_elements = tr_element.xpath(".//td")
|
|
assert len(td_elements) == len(col_labels)
|
|
icol = 0
|
|
for td_element in td_elements:
|
|
col_label = col_labels[icol]
|
|
if col_label == 'quantity':
|
|
# <select id="item_dropdown_memory_memorySlotsChoice_P43328-B21" class="tabledetailqtyselect">
|
|
# <option value="0">0</option>
|
|
# <option value="2" selected="selected">2</option>
|
|
# <option value="4">4</option>
|
|
# <option value="6">6</option>
|
|
# <option value="8">8</option>
|
|
# <option value="12">12</option>
|
|
# <option value="16">16</option>
|
|
# </select>
|
|
options = []
|
|
selected_quantity = None
|
|
for option_element in td_element.xpath(".//option"):
|
|
quantity_choice = int(option_element.get('value'))
|
|
options.append(quantity_choice)
|
|
selected_as_str = option_element.get('selected')
|
|
print(selected_as_str)
|
|
if selected_as_str:
|
|
selected_quantity = quantity_choice
|
|
cell_value = f'{selected_quantity}/{options}'
|
|
else:
|
|
cell_value = ''.join(td_element.itertext()).replace('\t', '').replace('\n', ' ')
|
|
table_as_dict[col_label].append(cell_value)
|
|
icol += 1
|
|
# print(table_as_dict)
|
|
table = pd.DataFrame(table_as_dict)
|
|
print(table)
|
|
return table
|
|
|
|
def _get_module(self, root_element: HtmlElement, module_id: str) -> HtmlElement:
|
|
'''
|
|
'''
|
|
modules_element = root_element.xpath(self.get_xpath_filter('root_to_modules_element'))[0]
|
|
# print(modules_element)
|
|
module_label = self.get_module_label(module_id) # eg ProcessorSection_AdditionalProcessorsChoice
|
|
print(f'module label: {module_label}')
|
|
module_root = modules_element.xpath(f".//div[@id='{module_label}']")[0]
|
|
return module_root
|
|
|
|
def _parse_module_html_table(self, html_root: HtmlElement, module_id: str) -> pd.DataFrame:
|
|
'''
|
|
module_id: eg 'additional_cpus'
|
|
'''
|
|
module_root_element = self._get_module(html_root, module_id)
|
|
assert module_root_element is not None
|
|
table_root = module_root_element.xpath(".//table")[0]
|
|
table = HpeV2ConfiguratorParser._parse_html_table(table_root)
|
|
return table
|
|
|
|
def _parse_proc_options(self, proc_module_table: pd.DataFrame) -> Tuple[Module, List[Cpu]]:
|
|
|
|
proc_options = Module('processor')
|
|
selected_procs = []
|
|
# module_root_element = self._get_module(html_root, 'Processeurs (Passage)')
|
|
for row_index, row in proc_module_table.iterrows():
|
|
print(f'row = {row}')
|
|
label = row['description']
|
|
cpu_price = parse_price(row['price'])
|
|
match = re.match(r'^ *Intel Xeon-(?P<cpu_class>Bronze|Silver|Gold|Platinum) (?P<cpu_number>[0-9][0-9][0-9][0-9][HNPQRSLUVY]?[+]?).*', label)
|
|
assert match, 'unhandled label : %s' % label
|
|
# print(match['cpu_class'], match['cpu_number'])
|
|
cpu_class = match['cpu_class'].lower()
|
|
cpu_id = "intel-xeon-%s-%s" % (cpu_class, match['cpu_number'].lower())
|
|
cpu = Cpu(cpu_id)
|
|
option = Option(cpu, cpu_price)
|
|
for selected_item in range(Quantity(row['quantity']).num_selected):
|
|
selected_procs.append(cpu)
|
|
|
|
proc_options.add_option(option)
|
|
|
|
assert len(proc_options.options) > 0
|
|
return proc_options, selected_procs
|
|
|
|
def _parse_ram_options(self, ram_module_table: pd.DataFrame) -> Tuple[Module, List[Dimm]]:
|
|
|
|
ram_options = Module('ram')
|
|
selected_dimms = []
|
|
|
|
# module_root_element = self._get_module(html_root, 'Processeurs (Passage)')
|
|
for row_index, row in ram_module_table.iterrows():
|
|
print(f'row = {row}')
|
|
label = row['description'] # eg 'HPE 32GB (1x32GB) Dual Rank x8 DDR5-4800 CAS-40-39-39 EC8 Registered Smart Memory Kit'
|
|
match = re.match(r'^ *HPE (?P<total_num_gb>[0-9]+)GB \((?P<num_dimms>[1-9]+)x(?P<num_gb_per_dimm>[0-9]+)GB\) (?P<dimm_rank>Single|Dual|Quad|Octal) +Rank +x(?P<by>[48]) +DDR(?P<ddr_generation>[0-9]+)-(?P<mega_transfers_per_sec>[0-9]+) +CAS-(?P<cas1>[0-9]+)-(?P<cas2>[0-9]+)-(?P<cas3>[0-9]+) EC8 Registered.*', label)
|
|
assert match, 'unhandled label : %s' % label
|
|
assert int(match['num_dimms']) == 1
|
|
|
|
dimm_price = parse_price(row['price'])
|
|
sdram_chip = SdramChip('ddr', int(match['ddr_generation']), int(match['mega_transfers_per_sec']))
|
|
cas = DimmCas(int(match['cas1']), int(match['cas2']), int(match['cas3']))
|
|
dimm = Dimm(num_gb=int(match['num_gb_per_dimm']), sdram_chip=sdram_chip, cas=cas, mem_type='rdimm')
|
|
option = Option(dimm, dimm_price)
|
|
for selected_item in range(Quantity(row['quantity']).num_selected):
|
|
selected_dimms.append(dimm)
|
|
|
|
ram_options.add_option(option)
|
|
|
|
assert len(ram_options.options) > 0
|
|
return ram_options, selected_dimms
|
|
|
|
@staticmethod
|
|
def _parse_base_config_info(html_root: HtmlElement) -> Tuple[str, Price]:
|
|
prod_titles_grp_el = html_root.xpath(r".//div[@class='prod-titles-grp']")[0]
|
|
assert prod_titles_grp_el
|
|
|
|
# <div class="prod-titles-grp">
|
|
# <span id="main_title" title="DL380 Gen11 (Cat2 Conf16)" class="product-title pull-left">DL380 Gen11 (Cat2 Conf16)</span>
|
|
# <span class="product_subtitle">HPE ProLiant DL380 Gen11 8SFF NC Configure-to-order Server</span>
|
|
# </div>
|
|
product_subtitle = prod_titles_grp_el.xpath(r".//span[@class='product_subtitle']")[0].text.replace('\n', '').replace('\t', '') # eg HPE ProLiant DL380 Gen11 8SFF NC Configure-to-order Server
|
|
# print(product_subtitle)
|
|
match = re.match(r'^HPE ProLiant DL(?P<chassis_number>[3][68][05]) *Gen11 *8SFF *NC *Configure-to-order *Server$', product_subtitle)
|
|
assert match
|
|
chassis_number = match['chassis_number']
|
|
# configurator.base_config = self._parse_base_config()
|
|
chassis_id = f"hpe-proliant-dl{chassis_number}-gen11"
|
|
|
|
# <span class="total_price">1,375.99</span>
|
|
config_price_as_str = html_root.xpath(r".//span[@class='total_price']")[0].text
|
|
config_price = Price(config_price_as_str.replace(',', ''))
|
|
print(f'config_price = {config_price}')
|
|
return chassis_id, config_price
|
|
|
|
def parse(self, hpe_configurator_html_file_path: Path, configurator: Configurator):
|
|
'''
|
|
hpe_configurator_html_file_path : eg '/home/graffy/work/concho/catalogs/hpev2/20250314-cat2-conf16-hpe-dl380-gen11.html'
|
|
'''
|
|
hybris_file_path = hpe_configurator_html_file_path.parent / Path(str(hpe_configurator_html_file_path.stem) + '_files') / 'HybrisIntegrationLogin.html' # eg /home/graffy/work/concho/catalogs/hpev2/20250314-cat2-conf16-hpe-dl380-gen11_files/HybrisIntegrationLogin.html
|
|
# print(hybris_file_path)
|
|
html_root: HtmlElement = parse_html(str(hybris_file_path)).getroot()
|
|
# print(type(html_root))
|
|
|
|
proc_table = self._parse_module_html_table(html_root, 'additional_cpus')
|
|
proc_module, selected_procs = self._parse_proc_options(proc_table)
|
|
configurator.add_module(proc_module)
|
|
assert len(selected_procs) == 1
|
|
|
|
ram_table = self._parse_module_html_table(html_root, 'ram')
|
|
ram_module, selected_dimms = self._parse_ram_options(ram_table)
|
|
configurator.add_module(ram_module)
|
|
|
|
chassis_id, config_price = HpeV2ConfiguratorParser._parse_base_config_info(html_root)
|
|
|
|
selected_procs_price = sum([proc_module.options[cpu.uid].price for cpu in selected_procs])
|
|
selected_dimms_price = sum([ram_module.options[dimm.uid].price for dimm in selected_dimms])
|
|
chassis_price = config_price - selected_procs_price - selected_dimms_price
|
|
base_config = Config(configurator)
|
|
base_config.num_servers = 1
|
|
base_config.num_cpu_per_server = 2
|
|
print(f'selected_procs_price = {selected_procs_price}')
|
|
print(f'selected_dimms_price = {selected_dimms_price}')
|
|
print(f'chassis_price = {chassis_price}')
|
|
configurator.chassis = Option(Chassis(chassis_id), chassis_price)
|
|
|
|
base_config.set_cpu(selected_procs[0])
|
|
|
|
channel_index = 0
|
|
for dimm in selected_dimms:
|
|
base_config.cpu_slots_mem[0].mem_channels[channel_index].dimms.append(dimm)
|
|
channel_index += 1
|
|
configurator.base_config = base_config
|
|
|
|
|
|
# configurator.add_module(self._parse_ram_options(html_root))
|
|
|
|
# script_elements = html_root.xpath(".//script[@type='text/javascript']")
|
|
# print('number of javascript scripts:', len(script_elements))
|
|
# # script type="text/javascript"
|
|
# db_jscript = None
|
|
|