Spaces:
Sleeping
Sleeping
File size: 2,993 Bytes
3b5a8b1 c8891d8 855b4b0 3b5a8b1 855b4b0 3b5a8b1 855b4b0 3b5a8b1 855b4b0 3b5a8b1 855b4b0 3b5a8b1 855b4b0 3b5a8b1 855b4b0 3b5a8b1 855b4b0 3b5a8b1 c16c548 855b4b0 c16c548 855b4b0 dd61286 e2bb6ed 855b4b0 c16c548 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import streamlit as st
from langchain_pipeline import pipeline, model_names
import pdfplumber
from docx import Document
from redlines import Redlines
def pdf_to_word(pdf_path, word_path):
with pdfplumber.open(pdf_path) as pdf:
full_text = ""
for page in pdf.pages:
full_text += page.extract_text() + "\n"
doc = Document()
for para in full_text.split("\n"):
doc.add_paragraph(para)
doc.save(word_path)
def apply_pipeline(file, model_name, balance_type, apsn_transactions, max_fees_per_day, min_overdrawn_fee, min_transaction_overdraft):
return pipeline(
file,
model_name,
balance_type,
apsn_transactions,
max_fees_per_day,
min_overdrawn_fee,
min_transaction_overdraft
)
def redline_changes(original_path, revised_path):
original_doc = Document(original_path)
revised_doc = Document(revised_path)
original_text = "\n".join([para.text for para in original_doc.paragraphs])
revised_text = "\n".join([para.text for para in revised_doc.paragraphs])
redline = Redlines(original_text, revised_text)
diff_html = redline.output_markdown()
diff_doc = Document()
diff_doc.add_paragraph(diff_html)
diff_doc.save("redlined_document.docx")
# Streamlit App
st.title("Canarie AI Prototype")
st.subheader("Finding the canarie in the coal mine")
model_name = st.selectbox("Model", model_names())
balance_type = st.selectbox("Do you charge on available balance or ledger balance?", ["available balance", "ledger balance"])
apsn_transactions = st.selectbox("Do you charge for APSN transactions?", ["yes", "no"])
max_fees_per_day = st.number_input("How many overdraft fees per day can be charged?", min_value=0, max_value=10)
min_overdrawn_fee = st.number_input("What is the minimum amount overdrawn to incur a fee?", min_value=0, max_value=500)
min_transaction_overdraft = st.number_input("What is the minimum transaction amount to trigger an overdraft?", min_value=0, max_value=500)
uploaded_file = st.file_uploader("Choose a file")
if uploaded_file is not None:
with st.spinner('Please wait ...'):
try:
original_word_path = "original_document.docx"
pdf_to_word(uploaded_file, original_word_path)
diff = apply_pipeline(
uploaded_file,
model_name,
balance_type,
apsn_transactions,
max_fees_per_day,
min_overdrawn_fee,
min_transaction_overdraft
)
revised_word_path = "revised_document.docx"
revised_doc = Document()
for line in diff.split("\n"):
revised_doc.add_paragraph(line)
revised_doc.save(revised_word_path)
redline_changes(original_word_path, revised_word_path)
st.success("Redlined document created successfully!")
except Exception as e:
st.exception(e)
|