ddrs/src/geslabt002_to_itorders.py

#!/usr/bin/env python3
from pathlib import Path
import re
import pandas


# converts a cnrs geslab type t001 report to a single table
def geslabt002_to_sheet(in_tsv_file_path: Path, out_tsv_file_path: Path):
    with open(in_tsv_file_path) as inf, open(out_tsv_file_path, 'wt') as outf:
        table_header_has_been_written = False
        for line in inf.readlines():
            # Entité dépensière : 				AESJULLIEN		AES RENNES METROPOLE MC JULLIEN									Crédits reçus : 								40,000.00
            # 															Disponible : 								24,743.14
            #
            #
            # N° commande	Souche		Libellé commande				Date commande	Raison sociale fournisseur	Montant consommé sur exercice antérieur	Montant consommé sur l'exercice			Montant réservé					Montant facturé		Code origine	Nature dépense	Statut		Cde groupée
            is_table_header = re.match(r'^N° com. GESLAB', line) is not None
            # for some strange reason, the column 'N° com. GESLAB''s contents are alternatively something like '1952-12-17 12:00:00 AM' and something like '19,855.00'
            if is_table_header and not table_header_has_been_written:
                outf.write('# %s' % line)
                table_header_has_been_written = True
            if re.match(r'^[0-9,.]+\t', line):
                outf.write(line)
            elif re.match(r'^[0-9][0-9][0-9][0-9]-[0-9]+-[0-9]+ [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [AP]M\t', line):
                outf.write(line)
            else:
                print('ignoring line : %s' % line)


def geslabt002_to_itorders(geslabt001_file_path: Path, itorders_file_path: Path):
    sheet_file_path = Path('./tmp/commandes-2019-cnrs.tsv')
    geslabt002_to_sheet(geslabt001_file_path, sheet_file_path)

    df = pandas.read_csv(sheet_file_path, sep='\t')

    # delete the colums for which the labve is of the form 'Unnamed: <n>'. They come from the csv export of libre office
    unnamed_columns = [column_label for column_label in df.keys() if re.match(r'^Unnamed', column_label) is not None]
    print(unnamed_columns)
    df = df.drop(columns=unnamed_columns)

    print(df.columns)
    print(df.keys())
    print(df)
    PETIT_MATERIEL_INFORMATIQUE = '1100'
    EQUIPEMENT_INFORMATIQUE = '2100'
    INFORMATIQUE_ACHAT = 'D3--'
    it_df = df[(df['Matière'] == PETIT_MATERIEL_INFORMATIQUE) | (df['Matière'] == EQUIPEMENT_INFORMATIQUE) | (df['Matière'] == INFORMATIQUE_ACHAT)]
    print(it_df)

    # to remove clutter, drop the columns that we don't need
    print(it_df.keys())
    it_df = it_df.drop(columns=['# N° com. GESLAB'])  # this column seems to contain anything but ordering number
    it_df = it_df.drop(columns=['N° ligne'])  # I don't know the meaning of this column
    it_df = it_df.drop(columns=['Code origine'])  # I don't know the meaning of this column
    it_df = it_df.drop(columns=['Elément analytique'])  # I don't know the meaning of this column
    it_df = it_df.drop(columns=['S'])  # I don't know the meaning of this column

    print(it_df[['Facturé ligne', 'Raison sociale fournisseur', 'Libellé ligne']])
    it_df.to_csv(itorders_file_path, sep='\t')


def main():
    geslabt002_to_itorders(Path('./achats-ipr/2019/cnrs/from_ngicquiaux_20230127/commandes-2019-cnrs-t002.tsv'), Path('./tmp/commandes-it-2019-cnrs-002.tsv'))


main()
added `it-cnrs-l1p5.tsv`, which contains the it hardware bought in 2019 in the format expected by labo1.5 - also cleaned up and added documentation 2023-03-06 14:24:38 +01:00			`#!/usr/bin/env python3`
ajout d'un programme pour extraire automatiquement la table concernant les achats informatiques à partier des extractions gestlab de ngicquiaux 2023-01-27 12:10:41 +01:00			`from pathlib import Path`
			`import re`
			`import pandas`


extraction des achats informatiques à partir de commandes-2019-cnrs-t002.tsv : c'est beaucoup plus complet 2023-01-27 14:31:11 +01:00			`# converts a cnrs geslab type t001 report to a single table`
			`def geslabt002_to_sheet(in_tsv_file_path: Path, out_tsv_file_path: Path):`
			`with open(in_tsv_file_path) as inf, open(out_tsv_file_path, 'wt') as outf:`
			`table_header_has_been_written = False`
			`for line in inf.readlines():`
fixed pep8 errors 2023-03-06 11:01:49 +01:00			`# Entité dépensière : AESJULLIEN AES RENNES METROPOLE MC JULLIEN Crédits reçus : 40,000.00`
			`# Disponible : 24,743.14`
			`#`
			`#`
			`# N° commande Souche Libellé commande Date commande Raison sociale fournisseur Montant consommé sur exercice antérieur Montant consommé sur l'exercice Montant réservé Montant facturé Code origine Nature dépense Statut Cde groupée`
extraction des achats informatiques à partir de commandes-2019-cnrs-t002.tsv : c'est beaucoup plus complet 2023-01-27 14:31:11 +01:00			`is_table_header = re.match(r'^N° com. GESLAB', line) is not None`
			`# for some strange reason, the column 'N° com. GESLAB''s contents are alternatively something like '1952-12-17 12:00:00 AM' and something like '19,855.00'`
			`if is_table_header and not table_header_has_been_written:`
			`outf.write('# %s' % line)`
			`table_header_has_been_written = True`
			`if re.match(r'^[0-9,.]+\t', line):`
			`outf.write(line)`
			`elif re.match(r'^[0-9][0-9][0-9][0-9]-[0-9]+-[0-9]+ [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [AP]M\t', line):`
			`outf.write(line)`
			`else:`
			`print('ignoring line : %s' % line)`


			`def geslabt002_to_itorders(geslabt001_file_path: Path, itorders_file_path: Path):`
			`sheet_file_path = Path('./tmp/commandes-2019-cnrs.tsv')`
			`geslabt002_to_sheet(geslabt001_file_path, sheet_file_path)`

			`df = pandas.read_csv(sheet_file_path, sep='\t')`

			`# delete the colums for which the labve is of the form 'Unnamed: <n>'. They come from the csv export of libre office`
			`unnamed_columns = [column_label for column_label in df.keys() if re.match(r'^Unnamed', column_label) is not None]`
			`print(unnamed_columns)`
			`df = df.drop(columns=unnamed_columns)`

			`print(df.columns)`
			`print(df.keys())`
			`print(df)`
			`PETIT_MATERIEL_INFORMATIQUE = '1100'`
			`EQUIPEMENT_INFORMATIQUE = '2100'`
			`INFORMATIQUE_ACHAT = 'D3--'`
			`it_df = df[(df['Matière'] == PETIT_MATERIEL_INFORMATIQUE) \| (df['Matière'] == EQUIPEMENT_INFORMATIQUE) \| (df['Matière'] == INFORMATIQUE_ACHAT)]`
			`print(it_df)`

			`# to remove clutter, drop the columns that we don't need`
			`print(it_df.keys())`
			`it_df = it_df.drop(columns=['# N° com. GESLAB']) # this column seems to contain anything but ordering number`
			`it_df = it_df.drop(columns=['N° ligne']) # I don't know the meaning of this column`
			`it_df = it_df.drop(columns=['Code origine']) # I don't know the meaning of this column`
			`it_df = it_df.drop(columns=['Elément analytique']) # I don't know the meaning of this column`
			`it_df = it_df.drop(columns=['S']) # I don't know the meaning of this column`

			`print(it_df[['Facturé ligne', 'Raison sociale fournisseur', 'Libellé ligne']])`
			`it_df.to_csv(itorders_file_path, sep='\t')`


			`def main():`
added `it-cnrs-l1p5.tsv`, which contains the it hardware bought in 2019 in the format expected by labo1.5 - also cleaned up and added documentation 2023-03-06 14:24:38 +01:00			`geslabt002_to_itorders(Path('./achats-ipr/2019/cnrs/from_ngicquiaux_20230127/commandes-2019-cnrs-t002.tsv'), Path('./tmp/commandes-it-2019-cnrs-002.tsv'))`
ajout d'un programme pour extraire automatiquement la table concernant les achats informatiques à partier des extractions gestlab de ngicquiaux 2023-01-27 12:10:41 +01:00

			`main()`