Spaces:

Adr740
/

accounting-micro-automation

Sleeping

App Files Files Community

Adr740 commited on Jul 26, 2024

Commit

5acc5a8

•

1 Parent(s): 9b91941

Upload 3 files

Browse files

Files changed (3) hide show

app.py +41 -0
main.py +41 -0
reconciliate_and_upload.py +140 -0

app.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from config import json_url_id
+from main import run_main
+import gdown
+import gradio as gr
+download_url = f'https://drive.google.com/uc?id={json_url_id}'
+output = 'secret_google_service_account.json'
+gdown.download(download_url, output, quiet=False)
+with gr.Blocks(title="Accountant automation",theme='nota-ai/theme') as demo:
+    with gr.Row():
+        with gr.Column(scale=6):
+            with gr.Row():
+                with gr.Column(scale=3):
+                    source_folder_with_reciepts = gr.Textbox(placeholder="Dossier contenant les factures", lines=1)
+                    link_to_csv = gr.Textbox(placeholder="Lien vers le relevé de compte en csv", lines=1, )
+                    folder_to_save_processed_reciepts = gr.Textbox(placeholder="Dossier où sauvegarder les factures", lines=1, )
+                    folder_to_save_reconciled_data = gr.Textbox(placeholder="Dossier où sauvegarder le tableau final", lines=1, )
+                    name_output_file = gr.Textbox(placeholder="Nom du fichier de tableau final", lines=1, )
+                    transaction_csv_path = gr.Textbox(placeholder="Company Name", lines=1, )
+                    chat_submit_button = gr.Button(value="Submit ▶")
+                with gr.Column(scale=6):
+                    chat_output = gr.Markdown("Appuyez sur valider pour lance le processing")
+    fn_chat = run_main
+    chat_submit_button.click(fn=fn_chat, inputs=[source_folder_with_reciepts, link_to_csv, folder_to_save_processed_reciepts, folder_to_save_reconciled_data, name_output_file, transaction_csv_path], outputs=[chat_output])
+demo.launch(max_threads=40)

main.py ADDED Viewed

	@@ -0,0 +1,41 @@

+#USER INPUT
+from preprocessing import run_preprocessing
+from ai_transcriber import transcribe_all
+from reconciliate_and_upload import reconciliate_and_upload
+import os
+def run_main(
+source_folder_with_reciepts,
+link_to_csv,
+folder_to_save_processed_reciepts,
+folder_to_save_reconciled_data ,
+name_output_file = "[AI generated] June 2024.xlsx",
+transaction_csv_path = 'downloaded_file.csv',
+data_path = "trial2"
+):
+    os.system("apt update; yes | apt-get install poppler-utils; yes | ls")
+    # breakpoint()
+    source_folder_with_reciepts = source_folder_with_reciepts.split("?")[0].split("/")[-1]
+    folder_to_save_processed_reciepts = folder_to_save_processed_reciepts.split("?")[0].split("/")[-1]
+    folder_to_save_reconciled_data = folder_to_save_reconciled_data.split("?")[0].split("/")[-1]
+    link_to_csv = link_to_csv.split("/view?")[0].split("/")[-1]
+    print("Extracted link csv id: ", link_to_csv)
+    name_output_file = name_output_file + ".xlsx"
+    name_output_file = name_output_file.replace(".xlsx.xlsx", ".xlsx")
+    # breakpoint()
+    run_preprocessing(data_path, source_folder_with_reciepts, link_to_csv)
+    print("Done pre-processing!")
+    transcribe_all(data_path)
+    print("Done transcription!")
+    id_output = reconciliate_and_upload(
+                            data_path,
+                            name_of_csv=transaction_csv_path,
+                            folder_to_save_processed_reciepts=folder_to_save_processed_reciepts,
+                            folder_to_save_reconciled_data = folder_to_save_reconciled_data,
+                            name_of_output=name_output_file)
+    url_output_file = "https://drive.google.com/file/d/" + str(id_output)
+    display = f"[Voir tableau final]({url_output_file})"
+    print("Done all!")
+    return display

reconciliate_and_upload.py ADDED Viewed

	@@ -0,0 +1,140 @@

+from uploader import save_logs
+import os
+import pandas as pd
+from rapidfuzz import process, fuzz
+from random import randint
+from time import sleep
+#other imports (google API)
+def fuzzy_match(row, choices, scorer, cutoff):
+    match = process.extractOne(row['Libellé d\'opération'], choices, scorer=scorer, score_cutoff=cutoff)
+    if match:
+        return match[0]
+    return "missing receipt"
+def reconciliate_and_upload(data_path,
+    name_of_csv,
+    folder_to_save_processed_reciepts,
+    folder_to_save_reconciled_data,
+    name_of_raw_transcripts = "transcript_raw.txt",
+    name_of_output = "[AI Generated] Output.xlsx" ):
+    with open(f"{data_path}/{name_of_raw_transcripts}") as file:
+        transcripts = eval(file.read())
+    imgs = []
+    path_to_pdfs =data_path
+    for root, dirs, files in os.walk(path_to_pdfs):
+        for file in files:
+            if file.endswith('.png'):
+                print(os.path.join(root, file))
+                imgs.append({"path": os.path.join(root, file)})
+                pass
+    list_transcripts_evaled = []
+    objects = []
+    for i,t in enumerate(transcripts):
+        content = eval(t["content"].replace('null', '-1'))
+        try:
+            obk = save_logs(imgs[i]["path"], f"P{i+1}.png", folder_to_save_processed_reciepts)
+        except:
+            print("sleeping a bit innit")
+            breakpoint()
+            sleep(randint(30,40))
+            obk = save_logs(imgs[i]["path"], f"P{i+1}.png", folder_to_save_processed_reciepts)
+        objects.append(obk)
+        print("uploaded image!")
+        try:
+            list_transcripts_evaled.append({
+                "path": imgs[i]["path"],
+                "name_of_supplier" :content["name_of_supplier"],
+                                            "amount":content["amount"],
+                                            "currency":content["currency"],
+                                            "date": content["date"]})
+        except:
+            breakpoint()
+    urls = []
+    for ob in objects:
+        url = "https://drive.google.com/file/d/" + ob["id"]
+        urls.append(url)
+    df_app = pd.DataFrame(list_transcripts_evaled)
+    float_regex = r'[-+]?\d*\.\d+|\d+'
+    df_app['amount'] = df_app['amount'].astype(str).str.extract(f'({float_regex})', expand=False)
+    # Replace spaces and commas, then handle N/A and convert to float
+    df_app['amount'] = (
+        df_app['amount']
+        .str.replace(" ", "", regex=False)
+        .str.replace(",", ".", regex=False)
+        .str.replace("N/A", "-1", regex=False)
+        .astype(float)
+    )
+    df_app["date"] = pd.to_datetime(df_app['date'], format="%d/%m/%Y", errors='coerce')
+    df_app["url"] = urls
+    df_app = df_app.drop_duplicates(["name_of_supplier", "amount", "date"]).reset_index(drop=True)
+    df_opp_app = pd.read_csv(f"{data_path}/{name_of_csv}",skiprows=3)
+    # df_opp_app["Débit"] = df_opp_app["Débit"].str.replace(" ", "").str.replace(",", ".").astype("float")
+    df_opp_app['Débit'] = df_opp_app['Débit'].astype(str).str.extract(f'({float_regex})', expand=False)
+    # Replace spaces and commas, then handle N/A and convert to float
+    df_opp_app['Débit'] = (
+        df_opp_app['Débit']
+        .str.replace(" ", "", regex=False)
+        .str.replace(",", ".", regex=False)
+        .str.replace("N/A", "-1", regex=False)
+        .astype(float)
+    )
+    # df_opp_app["Crédit"] = df_opp_app["Crédit"].str.replace(" ", "").str.replace(",", ".").astype("float")
+    df_opp_app['Crédit'] = df_opp_app['Crédit'].astype(str).str.extract(f'({float_regex})', expand=False)
+    # Replace spaces and commas, then handle N/A and convert to float
+    df_opp_app['Crédit'] = (
+        df_opp_app['Crédit']
+        .str.replace(" ", "", regex=False)
+        .str.replace(",", ".", regex=False)
+        .str.replace("N/A", "-1", regex=False)
+        .astype(float)
+    )
+    df_opp_app["Date"] = pd.to_datetime(df_opp_app['Date'], format="%d/%m/%Y", errors='coerce')
+    merged_df_app = pd.merge(df_opp_app, df_app, left_on=['Débit'], right_on=['amount'], how='left').drop(columns=["currency", "date","path"]).rename(columns={"name_of_supplier": "Nom fournisseur facture"})
+    merged_df_app["Nom fournisseur facture"] = merged_df_app["Nom fournisseur facture"].fillna("* Facture manquante *")
+    # Merge on amount (Débit and amount)
+    merged_df_app = pd.merge(df_opp_app, df_app, left_on='Débit', right_on='amount', how='left', suffixes=('_ops', '_df'))
+    # Apply fuzzy matching
+    raw_choices = df_app['name_of_supplier'].tolist()
+    choices = []
+    for r in raw_choices:
+        choices.append(r.upper())
+    merged_df_app['fuzzy_matched_supplier'] = merged_df_app.apply(lambda row: fuzzy_match(row, choices, fuzz.WRatio, 80), axis=1)
+    merged_df_app = merged_df_app.drop_duplicates(subset=["Date", "Valeur", "Libellé d'opération", "Débit"])
+    # Identify residuals in df that were not matched
+    df_residuals_app = df_app[~df_app['name_of_supplier'].isin(merged_df_app['name_of_supplier'])]
+    # Replace original supplier column with fuzzy_matched_supplier and drop the name_of_supplier column from df
+    merged_df_app['name_of_supplier'] = merged_df_app['fuzzy_matched_supplier']
+    # merged_df_app.drop(columns=['name_of_supplier', 'fuzzy_matched_supplier'], inplace=True)
+    merged_df_app.drop(columns=["name_of_supplier", "currency", "date", "path", "fuzzy_matched_supplier"], inplace=True)
+    df_residuals_app.drop(columns=["path"], inplace=True)
+    merged_df_app['url'] = merged_df_app['url'].apply(lambda x: f'=HYPERLINK("{x}", "Voir Facture")' if pd.notna(x) else '')
+    df_residuals_app['url'] = df_residuals_app['url'].apply(lambda x: f'=HYPERLINK("{x}", "Voir Facture")' if pd.notna(x) else '')
+    with pd.ExcelWriter(name_of_output) as writer:
+        merged_df_app.to_excel(writer, sheet_name='Données réconciliées', index=False)
+        df_residuals_app.to_excel(writer, sheet_name='Résidus et transactions introuvables', index=False)
+    id_output = save_logs(name_of_output, name_of_output , folder_to_save_reconciled_data)
+    return id_output