from pathlib import Path import re import pandas # converts a cnrs geslab type t001 report to a single table def geslabt001_to_sheet(in_tsv_file_path: Path, out_tsv_file_path: Path): with open(in_tsv_file_path) as inf, open(out_tsv_file_path, 'wt') as outf: table_header_has_been_written = False for line in inf.readlines(): ignore_line = False # Entité dépensière : AESJULLIEN AES RENNES METROPOLE MC JULLIEN Crédits reçus : 40,000.00 # Disponible : 24,743.14 # # # N° commande Souche Libellé commande Date commande Raison sociale fournisseur Montant consommé sur exercice antérieur Montant consommé sur l'exercice Montant réservé Montant facturé Code origine Nature dépense Statut Cde groupée if re.match(r'^Entité dépensière', line): ignore_line = True is_table_header = re.match(r'^N° commande', line) is not None # 19,572.00 19AESMCJ CAMERA ZYLA 5.5 sCMOS 04/11/19 ANDOR TECHNOLOGY LIMITED 0.00 13,681.56 0.00 0.00 635991 IM if is_table_header and not table_header_has_been_written: outf.write('# %s' % line) table_header_has_been_written = True if re.match(r'^[0-9,./]+\t', line): outf.write(line) # converts a cnrs geslab type t001 report to a single table def geslabt002_to_sheet(in_tsv_file_path: Path, out_tsv_file_path: Path): with open(in_tsv_file_path) as inf, open(out_tsv_file_path, 'wt') as outf: table_header_has_been_written = False for line in inf.readlines(): # Entité dépensière : AESJULLIEN AES RENNES METROPOLE MC JULLIEN Crédits reçus : 40,000.00 # Disponible : 24,743.14 # # # N° commande Souche Libellé commande Date commande Raison sociale fournisseur Montant consommé sur exercice antérieur Montant consommé sur l'exercice Montant réservé Montant facturé Code origine Nature dépense Statut Cde groupée is_table_header = re.match(r'^N° com. GESLAB', line) is not None # for some strange reason, the column 'N° com. GESLAB''s contents are alternatively something like '1952-12-17 12:00:00 AM' and something like '19,855.00' if is_table_header and not table_header_has_been_written: outf.write('# %s' % line) table_header_has_been_written = True if re.match(r'^[0-9,.]+\t', line): outf.write(line) elif re.match(r'^[0-9][0-9][0-9][0-9]-[0-9]+-[0-9]+ [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [AP]M\t', line): outf.write(line) else: print('ignoring line : %s' % line) def geslabt001_to_itorders(geslabt001_file_path: Path, itorders_file_path: Path): sheet_file_path = Path('./tmp/commandes-2019-cnrs.tsv') geslabt001_to_sheet(geslabt001_file_path, sheet_file_path) df = pandas.read_csv(sheet_file_path, sep='\t') # delete the colums for which the label is of the form 'Unnamed: '. They come from the csv export of libre office unnamed_columns = [column_label for column_label in df.keys() if re.match(r'^Unnamed', column_label) is not None] print(unnamed_columns) df = df.drop(columns=unnamed_columns) print(df.columns) print(df.keys()) print(df) it_df = df[(df['Raison sociale fournisseur'] == 'DELL SAS') | (df['Raison sociale fournisseur'] == 'ECONOCOM PRODUCTS & SOLUTIONS')] # 'AMAZON EU SARL SUCCURSALE FRANCAISE' # 'INMAC' # 'RETIS' # 'APIXIT' print(it_df) print(it_df[['Montant facturé', 'Raison sociale fournisseur', 'Libellé commande']]) it_df.to_csv(itorders_file_path, sep='\t') def geslabt002_to_itorders(geslabt001_file_path: Path, itorders_file_path: Path): sheet_file_path = Path('./tmp/commandes-2019-cnrs.tsv') geslabt002_to_sheet(geslabt001_file_path, sheet_file_path) df = pandas.read_csv(sheet_file_path, sep='\t') # delete the colums for which the labve is of the form 'Unnamed: '. They come from the csv export of libre office unnamed_columns = [column_label for column_label in df.keys() if re.match(r'^Unnamed', column_label) is not None] print(unnamed_columns) df = df.drop(columns=unnamed_columns) print(df.columns) print(df.keys()) print(df) PETIT_MATERIEL_INFORMATIQUE = '1100' EQUIPEMENT_INFORMATIQUE = '2100' INFORMATIQUE_ACHAT = 'D3--' it_df = df[(df['Matière'] == PETIT_MATERIEL_INFORMATIQUE) | (df['Matière'] == EQUIPEMENT_INFORMATIQUE) | (df['Matière'] == INFORMATIQUE_ACHAT)] print(it_df) # to remove clutter, drop the columns that we don't need print(it_df.keys()) it_df = it_df.drop(columns=['# N° com. GESLAB']) # this column seems to contain anything but ordering number it_df = it_df.drop(columns=['N° ligne']) # I don't know the meaning of this column it_df = it_df.drop(columns=['Code origine']) # I don't know the meaning of this column it_df = it_df.drop(columns=['Elément analytique']) # I don't know the meaning of this column it_df = it_df.drop(columns=['S']) # I don't know the meaning of this column print(it_df[['Facturé ligne', 'Raison sociale fournisseur', 'Libellé ligne']]) it_df.to_csv(itorders_file_path, sep='\t') def main(): geslabt001_to_itorders(Path('./achats-ipr/2019/commandes-2019-cnrs-t001.tsv'), Path('./tmp/commandes-it-2019-cnrs-001.tsv')) geslabt002_to_itorders(Path('./achats-ipr/2019/commandes-2019-cnrs-t002.tsv'), Path('./tmp/commandes-it-2019-cnrs-002.tsv')) main()