2023-01-27 12:10:41 +01:00
from pathlib import Path
import re
import pandas
2023-01-27 14:31:11 +01:00
# converts a cnrs geslab type t001 report to a single table
def geslabt001_to_sheet ( in_tsv_file_path : Path , out_tsv_file_path : Path ) :
2023-01-27 12:10:41 +01:00
with open ( in_tsv_file_path ) as inf , open ( out_tsv_file_path , ' wt ' ) as outf :
table_header_has_been_written = False
for line in inf . readlines ( ) :
ignore_line = False
2023-03-06 11:01:49 +01:00
# Entité dépensière : AESJULLIEN AES RENNES METROPOLE MC JULLIEN Crédits reçus : 40,000.00
# Disponible : 24,743.14
#
#
# N° commande Souche Libellé commande Date commande Raison sociale fournisseur Montant consommé sur exercice antérieur Montant consommé sur l'exercice Montant réservé Montant facturé Code origine Nature dépense Statut Cde groupée
2023-01-27 12:10:41 +01:00
if re . match ( r ' ^Entité dépensière ' , line ) :
2023-03-06 11:01:49 +01:00
ignore_line = True # noqa
2023-01-27 12:10:41 +01:00
is_table_header = re . match ( r ' ^N° commande ' , line ) is not None
2023-03-06 11:01:49 +01:00
# 19,572.00 19AESMCJ CAMERA ZYLA 5.5 sCMOS 04/11/19 ANDOR TECHNOLOGY LIMITED 0.00 13,681.56 0.00 0.00 635991 IM
2023-01-27 12:10:41 +01:00
if is_table_header and not table_header_has_been_written :
outf . write ( ' # %s ' % line )
table_header_has_been_written = True
if re . match ( r ' ^[0-9,./]+ \ t ' , line ) :
outf . write ( line )
2023-01-27 14:31:11 +01:00
# converts a cnrs geslab type t001 report to a single table
def geslabt002_to_sheet ( in_tsv_file_path : Path , out_tsv_file_path : Path ) :
with open ( in_tsv_file_path ) as inf , open ( out_tsv_file_path , ' wt ' ) as outf :
table_header_has_been_written = False
for line in inf . readlines ( ) :
2023-03-06 11:01:49 +01:00
# Entité dépensière : AESJULLIEN AES RENNES METROPOLE MC JULLIEN Crédits reçus : 40,000.00
# Disponible : 24,743.14
#
#
# N° commande Souche Libellé commande Date commande Raison sociale fournisseur Montant consommé sur exercice antérieur Montant consommé sur l'exercice Montant réservé Montant facturé Code origine Nature dépense Statut Cde groupée
2023-01-27 14:31:11 +01:00
is_table_header = re . match ( r ' ^N° com. GESLAB ' , line ) is not None
# for some strange reason, the column 'N° com. GESLAB''s contents are alternatively something like '1952-12-17 12:00:00 AM' and something like '19,855.00'
if is_table_header and not table_header_has_been_written :
outf . write ( ' # %s ' % line )
table_header_has_been_written = True
if re . match ( r ' ^[0-9,.]+ \ t ' , line ) :
outf . write ( line )
elif re . match ( r ' ^[0-9][0-9][0-9][0-9]-[0-9]+-[0-9]+ [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [AP]M \ t ' , line ) :
outf . write ( line )
else :
print ( ' ignoring line : %s ' % line )
def geslabt001_to_itorders ( geslabt001_file_path : Path , itorders_file_path : Path ) :
2023-01-27 12:10:41 +01:00
sheet_file_path = Path ( ' ./tmp/commandes-2019-cnrs.tsv ' )
2023-01-27 14:31:11 +01:00
geslabt001_to_sheet ( geslabt001_file_path , sheet_file_path )
2023-01-27 12:10:41 +01:00
df = pandas . read_csv ( sheet_file_path , sep = ' \t ' )
2023-01-27 14:31:11 +01:00
# delete the colums for which the label is of the form 'Unnamed: <n>'. They come from the csv export of libre office
2023-01-27 12:10:41 +01:00
unnamed_columns = [ column_label for column_label in df . keys ( ) if re . match ( r ' ^Unnamed ' , column_label ) is not None ]
print ( unnamed_columns )
df = df . drop ( columns = unnamed_columns )
print ( df . columns )
print ( df . keys ( ) )
print ( df )
it_df = df [ ( df [ ' Raison sociale fournisseur ' ] == ' DELL SAS ' ) | ( df [ ' Raison sociale fournisseur ' ] == ' ECONOCOM PRODUCTS & SOLUTIONS ' ) ]
# 'AMAZON EU SARL SUCCURSALE FRANCAISE'
# 'INMAC'
# 'RETIS'
# 'APIXIT'
print ( it_df )
print ( it_df [ [ ' Montant facturé ' , ' Raison sociale fournisseur ' , ' Libellé commande ' ] ] )
2023-01-27 14:31:11 +01:00
it_df . to_csv ( itorders_file_path , sep = ' \t ' )
def geslabt002_to_itorders ( geslabt001_file_path : Path , itorders_file_path : Path ) :
sheet_file_path = Path ( ' ./tmp/commandes-2019-cnrs.tsv ' )
geslabt002_to_sheet ( geslabt001_file_path , sheet_file_path )
df = pandas . read_csv ( sheet_file_path , sep = ' \t ' )
# delete the colums for which the labve is of the form 'Unnamed: <n>'. They come from the csv export of libre office
unnamed_columns = [ column_label for column_label in df . keys ( ) if re . match ( r ' ^Unnamed ' , column_label ) is not None ]
print ( unnamed_columns )
df = df . drop ( columns = unnamed_columns )
print ( df . columns )
print ( df . keys ( ) )
print ( df )
PETIT_MATERIEL_INFORMATIQUE = ' 1100 '
EQUIPEMENT_INFORMATIQUE = ' 2100 '
INFORMATIQUE_ACHAT = ' D3-- '
it_df = df [ ( df [ ' Matière ' ] == PETIT_MATERIEL_INFORMATIQUE ) | ( df [ ' Matière ' ] == EQUIPEMENT_INFORMATIQUE ) | ( df [ ' Matière ' ] == INFORMATIQUE_ACHAT ) ]
print ( it_df )
# to remove clutter, drop the columns that we don't need
print ( it_df . keys ( ) )
it_df = it_df . drop ( columns = [ ' # N° com. GESLAB ' ] ) # this column seems to contain anything but ordering number
it_df = it_df . drop ( columns = [ ' N° ligne ' ] ) # I don't know the meaning of this column
it_df = it_df . drop ( columns = [ ' Code origine ' ] ) # I don't know the meaning of this column
it_df = it_df . drop ( columns = [ ' Elément analytique ' ] ) # I don't know the meaning of this column
it_df = it_df . drop ( columns = [ ' S ' ] ) # I don't know the meaning of this column
print ( it_df [ [ ' Facturé ligne ' , ' Raison sociale fournisseur ' , ' Libellé ligne ' ] ] )
it_df . to_csv ( itorders_file_path , sep = ' \t ' )
def main ( ) :
geslabt001_to_itorders ( Path ( ' ./achats-ipr/2019/commandes-2019-cnrs-t001.tsv ' ) , Path ( ' ./tmp/commandes-it-2019-cnrs-001.tsv ' ) )
geslabt002_to_itorders ( Path ( ' ./achats-ipr/2019/commandes-2019-cnrs-t002.tsv ' ) , Path ( ' ./tmp/commandes-it-2019-cnrs-002.tsv ' ) )
2023-01-27 12:10:41 +01:00
main ( )