import streamlit as st from langchain_pipeline import pipeline, model_names import pdfplumber from docx import Document from redlines import Redlines def pdf_to_word(pdf_path, word_path): with pdfplumber.open(pdf_path) as pdf: full_text = "" for page in pdf.pages: full_text += page.extract_text() + "\n" doc = Document() for para in full_text.split("\n"): doc.add_paragraph(para) doc.save(word_path) def apply_pipeline(file, model_name, balance_type, apsn_transactions, max_fees_per_day, min_overdrawn_fee, min_transaction_overdraft): return pipeline( file, model_name, balance_type, apsn_transactions, max_fees_per_day, min_overdrawn_fee, min_transaction_overdraft ) def redline_changes(original_path, revised_path): original_doc = Document(original_path) revised_doc = Document(revised_path) original_text = "\n".join([para.text for para in original_doc.paragraphs]) revised_text = "\n".join([para.text for para in revised_doc.paragraphs]) redline = Redlines(original_text, revised_text) diff_html = redline.output_markdown() diff_doc = Document() diff_doc.add_paragraph(diff_html) diff_doc.save("redlined_document.docx") # Streamlit App st.title("Canarie AI Prototype") st.subheader("Finding the canarie in the coal mine") model_name = st.selectbox("Model", model_names()) balance_type = st.selectbox("Do you charge on available balance or ledger balance?", ["available balance", "ledger balance"]) apsn_transactions = st.selectbox("Do you charge for APSN transactions?", ["yes", "no"]) max_fees_per_day = st.number_input("How many overdraft fees per day can be charged?", min_value=0, max_value=10) min_overdrawn_fee = st.number_input("What is the minimum amount overdrawn to incur a fee?", min_value=0, max_value=500) min_transaction_overdraft = st.number_input("What is the minimum transaction amount to trigger an overdraft?", min_value=0, max_value=500) uploaded_file = st.file_uploader("Choose a file") if uploaded_file is not None: with st.spinner('Please wait ...'): try: original_word_path = "original_document.docx" pdf_to_word(uploaded_file, original_word_path) diff = apply_pipeline( uploaded_file, model_name, balance_type, apsn_transactions, max_fees_per_day, min_overdrawn_fee, min_transaction_overdraft ) revised_word_path = "revised_document.docx" revised_doc = Document() for line in diff.split("\n"): revised_doc.add_paragraph(line) revised_doc.save(revised_word_path) redline_changes(original_word_path, revised_word_path) st.success("Redlined document created successfully!") except Exception as e: st.exception(e)