Spaces:
Runtime error
Runtime error
File size: 2,114 Bytes
411ca77 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
#USER INPUT
from preprocessing import run_preprocessing
from ai_transcriber import transcribe_all
from reconciliate_and_upload import reconciliate_and_upload
import os
def run_main(
source_folder_with_reciepts = "https://drive.google.com/drive/folders/1skbgiXMnAe3z2r8E9oLAxXxkDBnrk8l4?usp=sharing",
link_to_csv = "https://drive.google.com/file/d/1cYoj8U5mttwQu5hNoupifHtjESCIHpsp/view?usp=sharing",
folder_to_save_processed_reciepts = "https://drive.google.com/drive/folders/1zADJlZ8pvXHNdAhbrxScPynSq1m5Jo1C?usp=sharing",
folder_to_save_reconciled_data = "https://drive.google.com/drive/folders/1bmrHExKt0x5AJwJsMtwW1Yk6Hof4WbCF?usp=drive_link",
name_output_file = "[AI generated] June 2024.xlsx",
transaction_csv_path = 'downloaded_file.csv',
data_path = "trial2"
):
os.system("apt update; yes | apt-get install poppler-utils; yes | ls")
# breakpoint()
source_folder_with_reciepts = source_folder_with_reciepts.split("?")[0].split("/")[-1]
folder_to_save_processed_reciepts = folder_to_save_processed_reciepts.split("?")[0].split("/")[-1]
folder_to_save_reconciled_data = folder_to_save_reconciled_data.split("?")[0].split("/")[-1]
link_to_csv = link_to_csv.split("/view?")[0].split("/")[-1]
print("Extracted link csv id: ", link_to_csv)
name_output_file = name_output_file + ".xlsx"
name_output_file = name_output_file.replace(".xlsx.xlsx", ".xlsx")
# breakpoint()
run_preprocessing(data_path, source_folder_with_reciepts, link_to_csv)
print("Done pre-processing!")
transcribe_all(data_path)
print("Done transcription!")
id_output = reconciliate_and_upload(
data_path,
name_of_csv=transaction_csv_path,
folder_to_save_processed_reciepts=folder_to_save_processed_reciepts,
folder_to_save_reconciled_data = folder_to_save_reconciled_data,
name_of_output=name_output_file)
url_output_file = "https://drive.google.com/file/d/" + str(id_output)
print("Done all!")
return url_output_file
|