taaha3244 commited on
Commit
8999dd1
·
verified ·
1 Parent(s): 177e0f7

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -0
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ import tempfile
4
+ import streamlit as st
5
+ from langchain_community.document_loaders import PyPDFLoader
6
+
7
+ from main import summarize_pdf_document
8
+ from main import retrieve_documents
9
+ from main import embed_document_data
10
+ from main import is_document_embedded
11
+
12
+
13
+
14
+
15
+ load_dotenv()
16
+
17
+ def main():
18
+ st.sidebar.title("PDF Management")
19
+ uploaded_files = st.sidebar.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)
20
+ files_info = [] # Initialize files_info to an empty list before checking for uploaded files
21
+
22
+ if uploaded_files:
23
+ files_info = save_uploaded_files(uploaded_files)
24
+ process_documents(files_info)
25
+ if st.button('Add Uploaded Documents in Q nd A'):
26
+ embed_documents(files_info)
27
+
28
+ # Call to display the Q&A section unconditionally
29
+ display_qna_section(files_info)
30
+
31
+
32
+ def save_uploaded_files(uploaded_files):
33
+ """Save uploaded files to temporary directory and return their file paths along with original filenames."""
34
+ files_info = []
35
+ for uploaded_file in uploaded_files:
36
+ # Create a temporary file
37
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmpfile:
38
+ # Write contents of the uploaded file to the temporary file
39
+ tmpfile.write(uploaded_file.getvalue())
40
+ # Append both the temp file path and the original name
41
+ files_info.append((tmpfile.name, uploaded_file.name))
42
+ return files_info
43
+
44
+
45
+ def process_documents(files_info):
46
+ """Handle document processing for summarization."""
47
+ st.header("Document Summaries")
48
+ summarize_button = st.button('Summarize Uploaded Documents')
49
+
50
+ if summarize_button:
51
+ for temp_path, original_name in files_info:
52
+ with st.container(): # Using container to better organize the display
53
+ st.write(f"Summary for {original_name}:") # Display the original file name
54
+ try:
55
+ summary = summarize_pdf_document(temp_path, os.getenv('OPENAI_API_KEY'))
56
+ st.text_area("", value=summary, height=200, key=f"summary_{original_name}")
57
+ except Exception as e:
58
+ st.error(f"Failed to summarize {original_name}: {str(e)}")
59
+
60
+
61
+ def embed_documents(files_info):
62
+ """Embed each document with correct metadata, replacing temp path with original filename."""
63
+ for temp_path, original_name in files_info:
64
+ if not is_document_embedded(original_name): # Check if already embedded
65
+ try:
66
+ # Load the document
67
+ loader = PyPDFLoader(temp_path)
68
+ documents = loader.load()
69
+
70
+ # Update the metadata for each document
71
+ updated_documents = []
72
+ for doc in documents:
73
+ doc.metadata['source'] = original_name # Use original filename
74
+ updated_documents.append(doc)
75
+
76
+ embed_document_data(updated_documents) # Embed the documents
77
+ st.success(f"Embedded {original_name}")
78
+ except Exception as e:
79
+ st.error(f"Failed to embed {original_name}: {str(e)}")
80
+ else:
81
+ st.info(f"{original_name} is already embedded.")
82
+
83
+
84
+ def display_qna_section(files_info):
85
+ """Display Q&A section."""
86
+ st.header("Question and Answer")
87
+ with st.form("qa_form"):
88
+ user_query = st.text_input("Enter your question here:")
89
+ submit_button = st.form_submit_button('Get Answer')
90
+
91
+ if submit_button and user_query:
92
+ answer = handle_query(user_query)
93
+ st.write(answer)
94
+ elif submit_button and not user_query:
95
+ st.error("Please enter a question to get an answer.")
96
+
97
+ def handle_query(query):
98
+ """Retrieve answers based on the query."""
99
+ answer = retrieve_documents(query)
100
+ return answer
101
+
102
+ if __name__ == "__main__":
103
+ main()