extraction des achats informatiques à partir de commandes-2019-cnrs-t002.tsv : c'est beaucoup plus complet
This commit is contained in:
parent
6bfc9bf300
commit
9506a106f4
|
@ -0,0 +1,5 @@
|
||||||
|
tmp
|
||||||
|
*.xls
|
||||||
|
*.xlsx
|
||||||
|
.~lock*
|
||||||
|
*.docx
|
71
src/main.py
71
src/main.py
|
@ -4,7 +4,8 @@ import re
|
||||||
import pandas
|
import pandas
|
||||||
|
|
||||||
|
|
||||||
def cnrsformat1_to_sheet(in_tsv_file_path: Path, out_tsv_file_path: Path):
|
# converts a cnrs geslab type t001 report to a single table
|
||||||
|
def geslabt001_to_sheet(in_tsv_file_path: Path, out_tsv_file_path: Path):
|
||||||
with open(in_tsv_file_path) as inf, open(out_tsv_file_path, 'wt') as outf:
|
with open(in_tsv_file_path) as inf, open(out_tsv_file_path, 'wt') as outf:
|
||||||
table_header_has_been_written = False
|
table_header_has_been_written = False
|
||||||
for line in inf.readlines():
|
for line in inf.readlines():
|
||||||
|
@ -25,14 +26,36 @@ def cnrsformat1_to_sheet(in_tsv_file_path: Path, out_tsv_file_path: Path):
|
||||||
outf.write(line)
|
outf.write(line)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
# converts a cnrs geslab type t001 report to a single table
|
||||||
cnrsformat1_file_path = Path('./from-cloud.ipr/2019/commandes-2019-cnrs-t001.tsv')
|
def geslabt002_to_sheet(in_tsv_file_path: Path, out_tsv_file_path: Path):
|
||||||
|
with open(in_tsv_file_path) as inf, open(out_tsv_file_path, 'wt') as outf:
|
||||||
|
table_header_has_been_written = False
|
||||||
|
for line in inf.readlines():
|
||||||
|
# Entité dépensière : AESJULLIEN AES RENNES METROPOLE MC JULLIEN Crédits reçus : 40,000.00
|
||||||
|
# Disponible : 24,743.14
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# N° commande Souche Libellé commande Date commande Raison sociale fournisseur Montant consommé sur exercice antérieur Montant consommé sur l'exercice Montant réservé Montant facturé Code origine Nature dépense Statut Cde groupée
|
||||||
|
is_table_header = re.match(r'^N° com. GESLAB', line) is not None
|
||||||
|
# for some strange reason, the column 'N° com. GESLAB''s contents are alternatively something like '1952-12-17 12:00:00 AM' and something like '19,855.00'
|
||||||
|
if is_table_header and not table_header_has_been_written:
|
||||||
|
outf.write('# %s' % line)
|
||||||
|
table_header_has_been_written = True
|
||||||
|
if re.match(r'^[0-9,.]+\t', line):
|
||||||
|
outf.write(line)
|
||||||
|
elif re.match(r'^[0-9][0-9][0-9][0-9]-[0-9]+-[0-9]+ [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [AP]M\t', line):
|
||||||
|
outf.write(line)
|
||||||
|
else:
|
||||||
|
print('ignoring line : %s' % line)
|
||||||
|
|
||||||
|
|
||||||
|
def geslabt001_to_itorders(geslabt001_file_path: Path, itorders_file_path: Path):
|
||||||
sheet_file_path = Path('./tmp/commandes-2019-cnrs.tsv')
|
sheet_file_path = Path('./tmp/commandes-2019-cnrs.tsv')
|
||||||
cnrsformat1_to_sheet(cnrsformat1_file_path, sheet_file_path)
|
geslabt001_to_sheet(geslabt001_file_path, sheet_file_path)
|
||||||
|
|
||||||
df = pandas.read_csv(sheet_file_path, sep='\t')
|
df = pandas.read_csv(sheet_file_path, sep='\t')
|
||||||
|
|
||||||
# delete the colums for which the labve is of the form 'Unnamed: <n>'. They come from the csv export of libre office
|
# delete the colums for which the label is of the form 'Unnamed: <n>'. They come from the csv export of libre office
|
||||||
unnamed_columns = [column_label for column_label in df.keys() if re.match(r'^Unnamed', column_label) is not None]
|
unnamed_columns = [column_label for column_label in df.keys() if re.match(r'^Unnamed', column_label) is not None]
|
||||||
print(unnamed_columns)
|
print(unnamed_columns)
|
||||||
df = df.drop(columns=unnamed_columns)
|
df = df.drop(columns=unnamed_columns)
|
||||||
|
@ -48,6 +71,44 @@ def main():
|
||||||
print(it_df)
|
print(it_df)
|
||||||
|
|
||||||
print(it_df[['Montant facturé', 'Raison sociale fournisseur', 'Libellé commande']])
|
print(it_df[['Montant facturé', 'Raison sociale fournisseur', 'Libellé commande']])
|
||||||
|
it_df.to_csv(itorders_file_path, sep='\t')
|
||||||
|
|
||||||
|
|
||||||
|
def geslabt002_to_itorders(geslabt001_file_path: Path, itorders_file_path: Path):
|
||||||
|
sheet_file_path = Path('./tmp/commandes-2019-cnrs.tsv')
|
||||||
|
geslabt002_to_sheet(geslabt001_file_path, sheet_file_path)
|
||||||
|
|
||||||
|
df = pandas.read_csv(sheet_file_path, sep='\t')
|
||||||
|
|
||||||
|
# delete the colums for which the labve is of the form 'Unnamed: <n>'. They come from the csv export of libre office
|
||||||
|
unnamed_columns = [column_label for column_label in df.keys() if re.match(r'^Unnamed', column_label) is not None]
|
||||||
|
print(unnamed_columns)
|
||||||
|
df = df.drop(columns=unnamed_columns)
|
||||||
|
|
||||||
|
print(df.columns)
|
||||||
|
print(df.keys())
|
||||||
|
print(df)
|
||||||
|
PETIT_MATERIEL_INFORMATIQUE = '1100'
|
||||||
|
EQUIPEMENT_INFORMATIQUE = '2100'
|
||||||
|
INFORMATIQUE_ACHAT = 'D3--'
|
||||||
|
it_df = df[(df['Matière'] == PETIT_MATERIEL_INFORMATIQUE) | (df['Matière'] == EQUIPEMENT_INFORMATIQUE) | (df['Matière'] == INFORMATIQUE_ACHAT)]
|
||||||
|
print(it_df)
|
||||||
|
|
||||||
|
# to remove clutter, drop the columns that we don't need
|
||||||
|
print(it_df.keys())
|
||||||
|
it_df = it_df.drop(columns=['# N° com. GESLAB']) # this column seems to contain anything but ordering number
|
||||||
|
it_df = it_df.drop(columns=['N° ligne']) # I don't know the meaning of this column
|
||||||
|
it_df = it_df.drop(columns=['Code origine']) # I don't know the meaning of this column
|
||||||
|
it_df = it_df.drop(columns=['Elément analytique']) # I don't know the meaning of this column
|
||||||
|
it_df = it_df.drop(columns=['S']) # I don't know the meaning of this column
|
||||||
|
|
||||||
|
print(it_df[['Facturé ligne', 'Raison sociale fournisseur', 'Libellé ligne']])
|
||||||
|
it_df.to_csv(itorders_file_path, sep='\t')
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
geslabt001_to_itorders(Path('./achats-ipr/2019/commandes-2019-cnrs-t001.tsv'), Path('./tmp/commandes-it-2019-cnrs-001.tsv'))
|
||||||
|
geslabt002_to_itorders(Path('./achats-ipr/2019/commandes-2019-cnrs-t002.tsv'), Path('./tmp/commandes-it-2019-cnrs-002.tsv'))
|
||||||
|
|
||||||
|
|
||||||
main()
|
main()
|
||||||
|
|
Loading…
Reference in New Issue