karthikeyan-r commited on
Commit
450bd79
·
verified ·
1 Parent(s): d0fec0d

Create pdfDocumentProcessor.py

Browse files
Files changed (1) hide show
  1. pdfDocumentProcessor.py +57 -0
pdfDocumentProcessor.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import pandas as pd
4
+ import base64
5
+ from findUpdate import FindUpdate # Import the FindUpdate class
6
+ from tempfile import NamedTemporaryFile
7
+
8
+ class PDFDocumentProcessor:
9
+ def __init__(self):
10
+ self.uploaded_agreement = None
11
+ self.uploaded_template = None
12
+ self.find_update = FindUpdate() # Create an instance of FindUpdate class
13
+
14
+ def file_uploaders(self):
15
+ """Function to handle file uploads."""
16
+ self.uploaded_agreement = st.file_uploader("Upload the PDF Agreement", type=['pdf'])
17
+ self.uploaded_template = st.file_uploader("Upload the PDF Template", type=['pdf'])
18
+
19
+ def process_files(self, agreement_path, template_path):
20
+ """Main file processing logic."""
21
+ try:
22
+ # Use the find_update instance to call the processing function
23
+ result = self.find_update.main_processing_function(agreement_path, template_path)
24
+ st.success("Files successfully processed!")
25
+
26
+ # Convert the result dictionary to a DataFrame
27
+ df_changes = pd.DataFrame(result['changes'])
28
+ df_changes = df_changes[['section_number', 'page_number', 'actual', 'changed', 'analysis', 'type_of_change']]
29
+
30
+ # Display the DataFrame in the UI
31
+ st.dataframe(df_changes, height=600) # You can adjust height based on your needs
32
+
33
+ # Convert DataFrame to CSV for download
34
+ csv = df_changes.to_csv(index=False)
35
+ b64 = base64.b64encode(csv.encode()).decode() # some browsers need base64 encoding
36
+ # href = f'<a href="data:file/csv;base64,{b64}" download="document_changes.csv">Download CSV File</a>'
37
+ st.markdown(href, unsafe_allow_html=True)
38
+
39
+ except Exception as e:
40
+ st.error(f"Error processing files: {e}")
41
+ finally:
42
+ # Clean up temporary files after processing
43
+ os.remove(agreement_path)
44
+ os.remove(template_path)
45
+
46
+ def save_uploaded_files(self):
47
+ """Save the uploaded files temporarily for processing."""
48
+ if self.uploaded_agreement and self.uploaded_template:
49
+ with NamedTemporaryFile(delete=False, suffix=".pdf", mode='wb') as temp_agreement:
50
+ temp_agreement.write(self.uploaded_agreement.read())
51
+ agreement_path = temp_agreement.name
52
+
53
+ with NamedTemporaryFile(delete=False, suffix=".pdf", mode='wb') as temp_template:
54
+ temp_template.write(self.uploaded_template.read())
55
+ template_path = temp_template.name
56
+
57
+ self.process_files(agreement_path, template_path)