File size: 2,993 Bytes
3b5a8b1
c8891d8
855b4b0
 
 
3b5a8b1
855b4b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b5a8b1
 
 
855b4b0
3b5a8b1
855b4b0
3b5a8b1
855b4b0
3b5a8b1
855b4b0
3b5a8b1
855b4b0
3b5a8b1
855b4b0
3b5a8b1
c16c548
855b4b0
c16c548
 
 
855b4b0
 
 
 
dd61286
 
 
 
 
 
 
e2bb6ed
855b4b0
 
 
 
 
 
 
 
 
 
 
c16c548
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import streamlit as st
from langchain_pipeline import pipeline, model_names
import pdfplumber
from docx import Document
from redlines import Redlines

def pdf_to_word(pdf_path, word_path):
    with pdfplumber.open(pdf_path) as pdf:
        full_text = ""
        for page in pdf.pages:
            full_text += page.extract_text() + "\n"

    doc = Document()
    for para in full_text.split("\n"):
        doc.add_paragraph(para)
    doc.save(word_path)

def apply_pipeline(file, model_name, balance_type, apsn_transactions, max_fees_per_day, min_overdrawn_fee, min_transaction_overdraft):
    return pipeline(
        file,
        model_name,
        balance_type,
        apsn_transactions,
        max_fees_per_day,
        min_overdrawn_fee,
        min_transaction_overdraft
    )

def redline_changes(original_path, revised_path):
    original_doc = Document(original_path)
    revised_doc = Document(revised_path)

    original_text = "\n".join([para.text for para in original_doc.paragraphs])
    revised_text = "\n".join([para.text for para in revised_doc.paragraphs])

    redline = Redlines(original_text, revised_text)
    diff_html = redline.output_markdown()

    diff_doc = Document()
    diff_doc.add_paragraph(diff_html)
    diff_doc.save("redlined_document.docx")

# Streamlit App
st.title("Canarie AI Prototype")
st.subheader("Finding the canarie in the coal mine")

model_name = st.selectbox("Model", model_names())

balance_type = st.selectbox("Do you charge on available balance or ledger balance?", ["available balance", "ledger balance"])

apsn_transactions = st.selectbox("Do you charge for APSN transactions?", ["yes", "no"])

max_fees_per_day = st.number_input("How many overdraft fees per day can be charged?", min_value=0, max_value=10)

min_overdrawn_fee = st.number_input("What is the minimum amount overdrawn to incur a fee?", min_value=0, max_value=500)

min_transaction_overdraft = st.number_input("What is the minimum transaction amount to trigger an overdraft?", min_value=0, max_value=500)

uploaded_file = st.file_uploader("Choose a file")

if uploaded_file is not None:
    with st.spinner('Please wait ...'):
        try:
            original_word_path = "original_document.docx"
            pdf_to_word(uploaded_file, original_word_path)

            diff = apply_pipeline(
                uploaded_file,
                model_name,
                balance_type,
                apsn_transactions,
                max_fees_per_day,
                min_overdrawn_fee,
                min_transaction_overdraft
            )

            revised_word_path = "revised_document.docx"
            revised_doc = Document()
            for line in diff.split("\n"):
                revised_doc.add_paragraph(line)
            revised_doc.save(revised_word_path)

            redline_changes(original_word_path, revised_word_path)

            st.success("Redlined document created successfully!")

        except Exception as e:
            st.exception(e)