RakeshUtekar commited on
Commit
85c57d3
·
verified ·
1 Parent(s): bcf970d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -5
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import os
2
  import time
3
 
4
- import openai
5
  import streamlit as st
6
  from dotenv import load_dotenv
7
 
@@ -10,12 +9,9 @@ from generate import generate_response
10
  from preprocess import preprocess_text
11
  from retrieve import create_vectorizer, retrieve
12
 
13
- # Load environment variables from .env file
14
  load_dotenv()
15
 
16
- # Set OpenAI API key
17
- openai.api_key = os.getenv('api_key')
18
-
19
  # Initialize session state
20
  if "messages" not in st.session_state:
21
  st.session_state.messages = []
@@ -32,8 +28,80 @@ st.title("RAG-based PDF Query System")
32
  uploaded_files = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True)
33
 
34
  if uploaded_files:
 
35
  if "uploaded_files" not in st.session_state or uploaded_files != st.session_state.uploaded_files:
36
  st.session_state.uploaded_files = uploaded_files
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  st.session_state.messages = [] # Clear previous messages
38
  st.session_state.pdf_files = []
39
  st.session_state.processed_texts = []
 
1
  import os
2
  import time
3
 
 
4
  import streamlit as st
5
  from dotenv import load_dotenv
6
 
 
9
  from preprocess import preprocess_text
10
  from retrieve import create_vectorizer, retrieve
11
 
12
+ # Load environment variables from .env file (if needed)
13
  load_dotenv()
14
 
 
 
 
15
  # Initialize session state
16
  if "messages" not in st.session_state:
17
  st.session_state.messages = []
 
28
  uploaded_files = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True)
29
 
30
  if uploaded_files:
31
+ # Check if new files were uploaded (clear old data if new ones are uploaded)
32
  if "uploaded_files" not in st.session_state or uploaded_files != st.session_state.uploaded_files:
33
  st.session_state.uploaded_files = uploaded_files
34
+ st.session_state.messages = []
35
+ st.session_state.pdf_files = []
36
+ st.session_state.processed_texts = []
37
+
38
+ # Initialize status container
39
+ with st.status("Processing the uploaded PDFs...", state="running") as status:
40
+ # Save uploaded files to disk
41
+ for uploaded_file in uploaded_files:
42
+ with open(uploaded_file.name, "wb") as f:
43
+ f.write(uploaded_file.getbuffer())
44
+ st.session_state.pdf_files.append(uploaded_file.name)
45
+
46
+ # Extract text from PDFs
47
+ num_files = len(st.session_state.pdf_files)
48
+ texts = []
49
+ for i, pdf_file in enumerate(st.session_state.pdf_files):
50
+ st.write(f"Extracting text from file {i + 1} of {num_files}...")
51
+ text = extract_text_from_pdfs([pdf_file])
52
+ texts.extend(text)
53
+ time.sleep(0.1)
54
+
55
+ # Preprocess text
56
+ st.write("Preprocessing text...")
57
+ st.session_state.processed_texts = preprocess_text(texts)
58
+ time.sleep(0.1)
59
+
60
+ # Create vectorizer and transform texts
61
+ st.write("Creating vectorizer and transforming texts...")
62
+ st.session_state.vectorizer, st.session_state.X = create_vectorizer(st.session_state.processed_texts)
63
+ time.sleep(0.1)
64
+
65
+ # Update status to complete
66
+ status.update(label="Processing complete!", state="complete")
67
+
68
+ else:
69
+ st.stop()
70
+
71
+ # Chat interface
72
+ st.write("### Ask a question about the uploaded PDFs")
73
+
74
+ # Display chat messages
75
+ for message in st.session_state.messages:
76
+ with st.chat_message(message["role"]):
77
+ st.write(message["content"])
78
+
79
+ # Chat input
80
+ prompt = st.chat_input("Ask something about the uploaded PDFs")
81
+ if prompt:
82
+ # Add user message to session state
83
+ st.session_state.messages.append({"role": "user", "content": prompt})
84
+
85
+ # Retrieve relevant texts
86
+ top_indices = retrieve(prompt, st.session_state.X, st.session_state.vectorizer)
87
+ retrieved_texts = [" ".join(st.session_state.processed_texts[i]) for i in top_indices]
88
+
89
+ # Generate response using Qwen2.5-7B-Instruct-1M
90
+ response = generate_response(retrieved_texts, prompt)
91
+ st.session_state.messages.append({"role": "assistant", "content": response})
92
+
93
+ # Display user message
94
+ with st.chat_message("user"):
95
+ st.write(prompt)
96
+
97
+ # Display assistant message
98
+ with st.chat_message("assistant"):
99
+ st.write(response)
100
+
101
+ # Clean up uploaded files
102
+ for pdf_file in st.session_state.pdf_files:
103
+ if os.path.exists(pdf_file):
104
+ os.remove(pdf_file)
105
  st.session_state.messages = [] # Clear previous messages
106
  st.session_state.pdf_files = []
107
  st.session_state.processed_texts = []