anmolsahai commited on
Commit
855b4b0
·
1 Parent(s): d1fc749
__pycache__/langchain_pipeline.cpython-310.pyc CHANGED
Binary files a/__pycache__/langchain_pipeline.cpython-310.pyc and b/__pycache__/langchain_pipeline.cpython-310.pyc differ
 
app.py CHANGED
@@ -1,44 +1,70 @@
1
  import streamlit as st
2
  from langchain_pipeline import pipeline, model_names
 
 
 
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  st.title("Canarie AI Prototype")
5
  st.subheader("Finding the canarie in the coal mine")
6
 
7
- model_name = st.selectbox(
8
- "Model",
9
- model_names())
10
 
11
- balance_type = st.selectbox(
12
- "Do you charge on available balance or ledger balance?",
13
- ["available balance", "ledger balance"]
14
- )
15
 
16
- apsn_transactions = st.selectbox(
17
- "Do you charge for APSN transactions?",
18
- ["yes", "no"]
19
- )
20
 
21
- max_fees_per_day = st.number_input(
22
- "How many overdraft fees per day can be charged?",
23
- min_value=0, max_value=10,
24
- )
25
 
26
- min_overdrawn_fee = st.number_input(
27
- "What is the minimum amount overdrawn to incur a fee?",
28
- min_value=0, max_value=500
29
- )
30
 
31
- min_transaction_overdraft = st.number_input(
32
- "What is the minimum transaction amount to trigger an overdraft?",
33
- min_value=0, max_value=500
34
- )
35
 
36
  uploaded_file = st.file_uploader("Choose a file")
 
37
  if uploaded_file is not None:
38
- diff = ""
39
  with st.spinner('Please wait ...'):
40
  try:
41
- diff = pipeline(
 
 
 
42
  uploaded_file,
43
  model_name,
44
  balance_type,
@@ -47,39 +73,16 @@ if uploaded_file is not None:
47
  min_overdrawn_fee,
48
  min_transaction_overdraft
49
  )
 
 
 
 
 
 
 
 
 
 
 
50
  except Exception as e:
51
  st.exception(e)
52
-
53
- diff_lines = diff.split("\n")
54
-
55
- styled_diff = """
56
- <style>
57
- body {
58
- font-family: 'Times New Roman', serif;
59
- line-height: 1.5;
60
- }
61
- .diff {
62
- margin: 10px 0;
63
- padding: 5px;
64
- }
65
- .add {
66
- color: green;
67
- }
68
- .remove {
69
- color: red;
70
- }
71
- </style>
72
- <div>
73
- """
74
- for line in diff_lines:
75
- if line.startswith('+'):
76
- styled_diff += f'<div class="diff add">{line}</div>'
77
- elif line.startswith('-'):
78
- styled_diff += f'<div class="diff remove">{line}</div>'
79
- else:
80
- styled_diff += f'<div class="diff">{line}</div>'
81
- styled_diff += "</div>"
82
-
83
- st.markdown(styled_diff, unsafe_allow_html=True)
84
-
85
- st.markdown("The key changes are:")
 
1
  import streamlit as st
2
  from langchain_pipeline import pipeline, model_names
3
+ import pdfplumber
4
+ from docx import Document
5
+ from redlines import Redlines
6
 
7
+ def pdf_to_word(pdf_path, word_path):
8
+ with pdfplumber.open(pdf_path) as pdf:
9
+ full_text = ""
10
+ for page in pdf.pages:
11
+ full_text += page.extract_text() + "\n"
12
+
13
+ doc = Document()
14
+ for para in full_text.split("\n"):
15
+ doc.add_paragraph(para)
16
+ doc.save(word_path)
17
+
18
+ def apply_pipeline(file, model_name, balance_type, apsn_transactions, max_fees_per_day, min_overdrawn_fee, min_transaction_overdraft):
19
+ return pipeline(
20
+ file,
21
+ model_name,
22
+ balance_type,
23
+ apsn_transactions,
24
+ max_fees_per_day,
25
+ min_overdrawn_fee,
26
+ min_transaction_overdraft
27
+ )
28
+
29
+ def redline_changes(original_path, revised_path):
30
+ original_doc = Document(original_path)
31
+ revised_doc = Document(revised_path)
32
+
33
+ original_text = "\n".join([para.text for para in original_doc.paragraphs])
34
+ revised_text = "\n".join([para.text for para in revised_doc.paragraphs])
35
+
36
+ redline = Redlines(original_text, revised_text)
37
+ diff_html = redline.output_markdown()
38
+
39
+ diff_doc = Document()
40
+ diff_doc.add_paragraph(diff_html)
41
+ diff_doc.save("redlined_document.docx")
42
+
43
+ # Streamlit App
44
  st.title("Canarie AI Prototype")
45
  st.subheader("Finding the canarie in the coal mine")
46
 
47
+ model_name = st.selectbox("Model", model_names())
 
 
48
 
49
+ balance_type = st.selectbox("Do you charge on available balance or ledger balance?", ["available balance", "ledger balance"])
 
 
 
50
 
51
+ apsn_transactions = st.selectbox("Do you charge for APSN transactions?", ["yes", "no"])
 
 
 
52
 
53
+ max_fees_per_day = st.number_input("How many overdraft fees per day can be charged?", min_value=0, max_value=10)
 
 
 
54
 
55
+ min_overdrawn_fee = st.number_input("What is the minimum amount overdrawn to incur a fee?", min_value=0, max_value=500)
 
 
 
56
 
57
+ min_transaction_overdraft = st.number_input("What is the minimum transaction amount to trigger an overdraft?", min_value=0, max_value=500)
 
 
 
58
 
59
  uploaded_file = st.file_uploader("Choose a file")
60
+
61
  if uploaded_file is not None:
 
62
  with st.spinner('Please wait ...'):
63
  try:
64
+ original_word_path = "original_document.docx"
65
+ pdf_to_word(uploaded_file, original_word_path)
66
+
67
+ diff = apply_pipeline(
68
  uploaded_file,
69
  model_name,
70
  balance_type,
 
73
  min_overdrawn_fee,
74
  min_transaction_overdraft
75
  )
76
+
77
+ revised_word_path = "revised_document.docx"
78
+ revised_doc = Document()
79
+ for line in diff.split("\n"):
80
+ revised_doc.add_paragraph(line)
81
+ revised_doc.save(revised_word_path)
82
+
83
+ redline_changes(original_word_path, revised_word_path)
84
+
85
+ st.success("Redlined document created successfully!")
86
+
87
  except Exception as e:
88
  st.exception(e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
langchain_pipeline.py CHANGED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -10,4 +10,13 @@ langchain_google_genai==1.0.5
10
  google_generativeai
11
  pdf2docx
12
  pymupdf
13
- python-docx
 
 
 
 
 
 
 
 
 
 
10
  google_generativeai
11
  pdf2docx
12
  pymupdf
13
+ python-docx
14
+ streamlit
15
+ pdfplumber
16
+ python-docx
17
+ redlines
18
+ langchain_astradb
19
+ langchain_core
20
+ langchain_openai
21
+ langchain_anthropic
22
+ langchain_google_genai