Spaces:
Sleeping
Sleeping
anmolsahai
commited on
Commit
·
855b4b0
1
Parent(s):
d1fc749
test
Browse files- __pycache__/langchain_pipeline.cpython-310.pyc +0 -0
- app.py +62 -59
- langchain_pipeline.py +0 -0
- requirements.txt +10 -1
__pycache__/langchain_pipeline.cpython-310.pyc
CHANGED
Binary files a/__pycache__/langchain_pipeline.cpython-310.pyc and b/__pycache__/langchain_pipeline.cpython-310.pyc differ
|
|
app.py
CHANGED
@@ -1,44 +1,70 @@
|
|
1 |
import streamlit as st
|
2 |
from langchain_pipeline import pipeline, model_names
|
|
|
|
|
|
|
3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
st.title("Canarie AI Prototype")
|
5 |
st.subheader("Finding the canarie in the coal mine")
|
6 |
|
7 |
-
model_name = st.selectbox(
|
8 |
-
"Model",
|
9 |
-
model_names())
|
10 |
|
11 |
-
balance_type = st.selectbox(
|
12 |
-
"Do you charge on available balance or ledger balance?",
|
13 |
-
["available balance", "ledger balance"]
|
14 |
-
)
|
15 |
|
16 |
-
apsn_transactions = st.selectbox(
|
17 |
-
"Do you charge for APSN transactions?",
|
18 |
-
["yes", "no"]
|
19 |
-
)
|
20 |
|
21 |
-
max_fees_per_day = st.number_input(
|
22 |
-
"How many overdraft fees per day can be charged?",
|
23 |
-
min_value=0, max_value=10,
|
24 |
-
)
|
25 |
|
26 |
-
min_overdrawn_fee = st.number_input(
|
27 |
-
"What is the minimum amount overdrawn to incur a fee?",
|
28 |
-
min_value=0, max_value=500
|
29 |
-
)
|
30 |
|
31 |
-
min_transaction_overdraft = st.number_input(
|
32 |
-
"What is the minimum transaction amount to trigger an overdraft?",
|
33 |
-
min_value=0, max_value=500
|
34 |
-
)
|
35 |
|
36 |
uploaded_file = st.file_uploader("Choose a file")
|
|
|
37 |
if uploaded_file is not None:
|
38 |
-
diff = ""
|
39 |
with st.spinner('Please wait ...'):
|
40 |
try:
|
41 |
-
|
|
|
|
|
|
|
42 |
uploaded_file,
|
43 |
model_name,
|
44 |
balance_type,
|
@@ -47,39 +73,16 @@ if uploaded_file is not None:
|
|
47 |
min_overdrawn_fee,
|
48 |
min_transaction_overdraft
|
49 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
except Exception as e:
|
51 |
st.exception(e)
|
52 |
-
|
53 |
-
diff_lines = diff.split("\n")
|
54 |
-
|
55 |
-
styled_diff = """
|
56 |
-
<style>
|
57 |
-
body {
|
58 |
-
font-family: 'Times New Roman', serif;
|
59 |
-
line-height: 1.5;
|
60 |
-
}
|
61 |
-
.diff {
|
62 |
-
margin: 10px 0;
|
63 |
-
padding: 5px;
|
64 |
-
}
|
65 |
-
.add {
|
66 |
-
color: green;
|
67 |
-
}
|
68 |
-
.remove {
|
69 |
-
color: red;
|
70 |
-
}
|
71 |
-
</style>
|
72 |
-
<div>
|
73 |
-
"""
|
74 |
-
for line in diff_lines:
|
75 |
-
if line.startswith('+'):
|
76 |
-
styled_diff += f'<div class="diff add">{line}</div>'
|
77 |
-
elif line.startswith('-'):
|
78 |
-
styled_diff += f'<div class="diff remove">{line}</div>'
|
79 |
-
else:
|
80 |
-
styled_diff += f'<div class="diff">{line}</div>'
|
81 |
-
styled_diff += "</div>"
|
82 |
-
|
83 |
-
st.markdown(styled_diff, unsafe_allow_html=True)
|
84 |
-
|
85 |
-
st.markdown("The key changes are:")
|
|
|
1 |
import streamlit as st
|
2 |
from langchain_pipeline import pipeline, model_names
|
3 |
+
import pdfplumber
|
4 |
+
from docx import Document
|
5 |
+
from redlines import Redlines
|
6 |
|
7 |
+
def pdf_to_word(pdf_path, word_path):
|
8 |
+
with pdfplumber.open(pdf_path) as pdf:
|
9 |
+
full_text = ""
|
10 |
+
for page in pdf.pages:
|
11 |
+
full_text += page.extract_text() + "\n"
|
12 |
+
|
13 |
+
doc = Document()
|
14 |
+
for para in full_text.split("\n"):
|
15 |
+
doc.add_paragraph(para)
|
16 |
+
doc.save(word_path)
|
17 |
+
|
18 |
+
def apply_pipeline(file, model_name, balance_type, apsn_transactions, max_fees_per_day, min_overdrawn_fee, min_transaction_overdraft):
|
19 |
+
return pipeline(
|
20 |
+
file,
|
21 |
+
model_name,
|
22 |
+
balance_type,
|
23 |
+
apsn_transactions,
|
24 |
+
max_fees_per_day,
|
25 |
+
min_overdrawn_fee,
|
26 |
+
min_transaction_overdraft
|
27 |
+
)
|
28 |
+
|
29 |
+
def redline_changes(original_path, revised_path):
|
30 |
+
original_doc = Document(original_path)
|
31 |
+
revised_doc = Document(revised_path)
|
32 |
+
|
33 |
+
original_text = "\n".join([para.text for para in original_doc.paragraphs])
|
34 |
+
revised_text = "\n".join([para.text for para in revised_doc.paragraphs])
|
35 |
+
|
36 |
+
redline = Redlines(original_text, revised_text)
|
37 |
+
diff_html = redline.output_markdown()
|
38 |
+
|
39 |
+
diff_doc = Document()
|
40 |
+
diff_doc.add_paragraph(diff_html)
|
41 |
+
diff_doc.save("redlined_document.docx")
|
42 |
+
|
43 |
+
# Streamlit App
|
44 |
st.title("Canarie AI Prototype")
|
45 |
st.subheader("Finding the canarie in the coal mine")
|
46 |
|
47 |
+
model_name = st.selectbox("Model", model_names())
|
|
|
|
|
48 |
|
49 |
+
balance_type = st.selectbox("Do you charge on available balance or ledger balance?", ["available balance", "ledger balance"])
|
|
|
|
|
|
|
50 |
|
51 |
+
apsn_transactions = st.selectbox("Do you charge for APSN transactions?", ["yes", "no"])
|
|
|
|
|
|
|
52 |
|
53 |
+
max_fees_per_day = st.number_input("How many overdraft fees per day can be charged?", min_value=0, max_value=10)
|
|
|
|
|
|
|
54 |
|
55 |
+
min_overdrawn_fee = st.number_input("What is the minimum amount overdrawn to incur a fee?", min_value=0, max_value=500)
|
|
|
|
|
|
|
56 |
|
57 |
+
min_transaction_overdraft = st.number_input("What is the minimum transaction amount to trigger an overdraft?", min_value=0, max_value=500)
|
|
|
|
|
|
|
58 |
|
59 |
uploaded_file = st.file_uploader("Choose a file")
|
60 |
+
|
61 |
if uploaded_file is not None:
|
|
|
62 |
with st.spinner('Please wait ...'):
|
63 |
try:
|
64 |
+
original_word_path = "original_document.docx"
|
65 |
+
pdf_to_word(uploaded_file, original_word_path)
|
66 |
+
|
67 |
+
diff = apply_pipeline(
|
68 |
uploaded_file,
|
69 |
model_name,
|
70 |
balance_type,
|
|
|
73 |
min_overdrawn_fee,
|
74 |
min_transaction_overdraft
|
75 |
)
|
76 |
+
|
77 |
+
revised_word_path = "revised_document.docx"
|
78 |
+
revised_doc = Document()
|
79 |
+
for line in diff.split("\n"):
|
80 |
+
revised_doc.add_paragraph(line)
|
81 |
+
revised_doc.save(revised_word_path)
|
82 |
+
|
83 |
+
redline_changes(original_word_path, revised_word_path)
|
84 |
+
|
85 |
+
st.success("Redlined document created successfully!")
|
86 |
+
|
87 |
except Exception as e:
|
88 |
st.exception(e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
langchain_pipeline.py
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
CHANGED
@@ -10,4 +10,13 @@ langchain_google_genai==1.0.5
|
|
10 |
google_generativeai
|
11 |
pdf2docx
|
12 |
pymupdf
|
13 |
-
python-docx
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
google_generativeai
|
11 |
pdf2docx
|
12 |
pymupdf
|
13 |
+
python-docx
|
14 |
+
streamlit
|
15 |
+
pdfplumber
|
16 |
+
python-docx
|
17 |
+
redlines
|
18 |
+
langchain_astradb
|
19 |
+
langchain_core
|
20 |
+
langchain_openai
|
21 |
+
langchain_anthropic
|
22 |
+
langchain_google_genai
|