Spaces:
Sleeping
Sleeping
import streamlit as st | |
from langchain_pipeline import pipeline, model_names | |
import pdfplumber | |
from docx import Document | |
from redlines import Redlines | |
def pdf_to_word(pdf_path, word_path): | |
with pdfplumber.open(pdf_path) as pdf: | |
full_text = "" | |
for page in pdf.pages: | |
full_text += page.extract_text() + "\n" | |
doc = Document() | |
for para in full_text.split("\n"): | |
doc.add_paragraph(para) | |
doc.save(word_path) | |
def apply_pipeline(file, model_name, balance_type, apsn_transactions, max_fees_per_day, min_overdrawn_fee, min_transaction_overdraft): | |
return pipeline( | |
file, | |
model_name, | |
balance_type, | |
apsn_transactions, | |
max_fees_per_day, | |
min_overdrawn_fee, | |
min_transaction_overdraft | |
) | |
def redline_changes(original_path, revised_path): | |
original_doc = Document(original_path) | |
revised_doc = Document(revised_path) | |
original_text = "\n".join([para.text for para in original_doc.paragraphs]) | |
revised_text = "\n".join([para.text for para in revised_doc.paragraphs]) | |
redline = Redlines(original_text, revised_text) | |
diff_html = redline.output_markdown() | |
diff_doc = Document() | |
diff_doc.add_paragraph(diff_html) | |
diff_doc.save("redlined_document.docx") | |
# Streamlit App | |
st.title("Canarie AI Prototype") | |
st.subheader("Finding the canarie in the coal mine") | |
model_name = st.selectbox("Model", model_names()) | |
balance_type = st.selectbox("Do you charge on available balance or ledger balance?", ["available balance", "ledger balance"]) | |
apsn_transactions = st.selectbox("Do you charge for APSN transactions?", ["yes", "no"]) | |
max_fees_per_day = st.number_input("How many overdraft fees per day can be charged?", min_value=0, max_value=10) | |
min_overdrawn_fee = st.number_input("What is the minimum amount overdrawn to incur a fee?", min_value=0, max_value=500) | |
min_transaction_overdraft = st.number_input("What is the minimum transaction amount to trigger an overdraft?", min_value=0, max_value=500) | |
uploaded_file = st.file_uploader("Choose a file") | |
if uploaded_file is not None: | |
with st.spinner('Please wait ...'): | |
try: | |
original_word_path = "original_document.docx" | |
pdf_to_word(uploaded_file, original_word_path) | |
diff = apply_pipeline( | |
uploaded_file, | |
model_name, | |
balance_type, | |
apsn_transactions, | |
max_fees_per_day, | |
min_overdrawn_fee, | |
min_transaction_overdraft | |
) | |
revised_word_path = "revised_document.docx" | |
revised_doc = Document() | |
for line in diff.split("\n"): | |
revised_doc.add_paragraph(line) | |
revised_doc.save(revised_word_path) | |
redline_changes(original_word_path, revised_word_path) | |
st.success("Redlined document created successfully!") | |
except Exception as e: | |
st.exception(e) | |