anmolsahai commited on
Commit
56f5cbd
·
1 Parent(s): eeae563
__pycache__/langchain_pipeline.cpython-310.pyc CHANGED
Binary files a/__pycache__/langchain_pipeline.cpython-310.pyc and b/__pycache__/langchain_pipeline.cpython-310.pyc differ
 
app.py CHANGED
@@ -3,16 +3,15 @@ from langchain_pipeline import pipeline, model_names
3
  import pdfplumber
4
  from docx import Document
5
  from redlines import Redlines
 
6
 
7
  def pdf_to_word(pdf_path, word_path):
8
  with pdfplumber.open(pdf_path) as pdf:
9
- full_text = ""
10
  for page in pdf.pages:
11
- full_text += page.extract_text() + "\n"
12
-
13
- doc = Document()
14
- for para in full_text.split("\n"):
15
- doc.add_paragraph(para)
16
  doc.save(word_path)
17
 
18
  def apply_pipeline(file, model_name, balance_type, apsn_transactions, max_fees_per_day, min_overdrawn_fee, min_transaction_overdraft):
@@ -26,7 +25,7 @@ def apply_pipeline(file, model_name, balance_type, apsn_transactions, max_fees_p
26
  min_transaction_overdraft
27
  )
28
 
29
- def redline_changes(original_path, revised_path):
30
  original_doc = Document(original_path)
31
  revised_doc = Document(revised_path)
32
 
@@ -34,11 +33,13 @@ def redline_changes(original_path, revised_path):
34
  revised_text = "\n".join([para.text for para in revised_doc.paragraphs])
35
 
36
  redline = Redlines(original_text, revised_text)
37
- diff_html = redline.output_markdown()
38
 
39
  diff_doc = Document()
40
- diff_doc.add_paragraph(diff_html)
41
- diff_doc.save("redlined_document.docx")
 
 
42
 
43
  # Streamlit App
44
  st.title("Canarie AI Prototype")
@@ -61,7 +62,7 @@ uploaded_file = st.file_uploader("Choose a file")
61
  if uploaded_file is not None:
62
  with st.spinner('Please wait ...'):
63
  try:
64
- original_word_path = "original_document.docx"
65
  pdf_to_word(uploaded_file, original_word_path)
66
 
67
  diff = apply_pipeline(
@@ -74,14 +75,22 @@ if uploaded_file is not None:
74
  min_transaction_overdraft
75
  )
76
 
77
- revised_word_path = "revised_document.docx"
78
  revised_doc = Document()
79
  for line in diff.split("\n"):
80
  revised_doc.add_paragraph(line)
81
  revised_doc.save(revised_word_path)
82
 
83
- redline_changes(original_word_path, revised_word_path)
 
84
 
 
 
 
 
 
 
 
85
  st.success("Redlined document created successfully!")
86
 
87
  except Exception as e:
 
3
  import pdfplumber
4
  from docx import Document
5
  from redlines import Redlines
6
+ import tempfile
7
 
8
  def pdf_to_word(pdf_path, word_path):
9
  with pdfplumber.open(pdf_path) as pdf:
10
+ doc = Document()
11
  for page in pdf.pages:
12
+ for item in page.extract_words():
13
+ doc.add_paragraph(item['text'])
14
+ doc.add_page_break()
 
 
15
  doc.save(word_path)
16
 
17
  def apply_pipeline(file, model_name, balance_type, apsn_transactions, max_fees_per_day, min_overdrawn_fee, min_transaction_overdraft):
 
25
  min_transaction_overdraft
26
  )
27
 
28
+ def redline_changes(original_path, revised_path, output_path):
29
  original_doc = Document(original_path)
30
  revised_doc = Document(revised_path)
31
 
 
33
  revised_text = "\n".join([para.text for para in revised_doc.paragraphs])
34
 
35
  redline = Redlines(original_text, revised_text)
36
+ diff_html = redline.output_html()
37
 
38
  diff_doc = Document()
39
+ for line in diff_html.split("\n"):
40
+ diff_doc.add_paragraph(line)
41
+
42
+ diff_doc.save(output_path)
43
 
44
  # Streamlit App
45
  st.title("Canarie AI Prototype")
 
62
  if uploaded_file is not None:
63
  with st.spinner('Please wait ...'):
64
  try:
65
+ original_word_path = tempfile.NamedTemporaryFile(delete=False, suffix=".docx").name
66
  pdf_to_word(uploaded_file, original_word_path)
67
 
68
  diff = apply_pipeline(
 
75
  min_transaction_overdraft
76
  )
77
 
78
+ revised_word_path = tempfile.NamedTemporaryFile(delete=False, suffix=".docx").name
79
  revised_doc = Document()
80
  for line in diff.split("\n"):
81
  revised_doc.add_paragraph(line)
82
  revised_doc.save(revised_word_path)
83
 
84
+ redlined_output_path = tempfile.NamedTemporaryFile(delete=False, suffix=".docx").name
85
+ redline_changes(original_word_path, revised_word_path, redlined_output_path)
86
 
87
+ with open(redlined_output_path, "rb") as f:
88
+ st.download_button(
89
+ label="Download Redlined Document",
90
+ data=f,
91
+ file_name="redlined_document.docx",
92
+ mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
93
+ )
94
  st.success("Redlined document created successfully!")
95
 
96
  except Exception as e: