bmv2021 commited on
Commit
cf15415
·
1 Parent(s): 2b87ca1

added image feature

Browse files
Files changed (1) hide show
  1. new_streamlit.py +188 -0
new_streamlit.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from typing import List, Tuple, Optional
4
+ from pinecone import Pinecone
5
+ from langchain_pinecone import PineconeVectorStore
6
+ from langchain_huggingface import HuggingFaceEmbeddings
7
+ from langchain_openai import ChatOpenAI
8
+ from langchain_core.prompts import PromptTemplate
9
+ from dotenv import load_dotenv
10
+ from RAG import RAG
11
+ from bpl_scraper import DigitalCommonwealthScraper
12
+ import logging
13
+ import json
14
+ import shutil
15
+ from PIL import Image
16
+ import io
17
+
18
+ # Configure logging
19
+ logging.basicConfig(level=logging.INFO)
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # Page configuration
23
+ st.set_page_config(
24
+ page_title="Boston Public Library Chatbot",
25
+ page_icon="🤖",
26
+ layout="wide"
27
+ )
28
+
29
+ def initialize_models() -> Tuple[Optional[ChatOpenAI], HuggingFaceEmbeddings]:
30
+ """Initialize the language model and embeddings."""
31
+ try:
32
+ load_dotenv()
33
+
34
+ # Initialize OpenAI model
35
+ llm = ChatOpenAI(
36
+ model="gpt-4", # Changed from gpt-4o-mini which appears to be a typo
37
+ temperature=0,
38
+ timeout=60, # Added reasonable timeout
39
+ max_retries=2
40
+ )
41
+
42
+ # Initialize embeddings
43
+ embeddings = HuggingFaceEmbeddings(
44
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
45
+ )
46
+
47
+ return llm, embeddings
48
+
49
+ except Exception as e:
50
+ logger.error(f"Error initializing models: {str(e)}")
51
+ st.error(f"Failed to initialize models: {str(e)}")
52
+ return None, None
53
+
54
+ def process_message(
55
+ query: str,
56
+ llm: ChatOpenAI,
57
+ index_name: str,
58
+ embeddings: HuggingFaceEmbeddings
59
+ ) -> Tuple[str, List]:
60
+ """Process the user message using the RAG system."""
61
+ try:
62
+ response, sources = RAG(
63
+ query=query,
64
+ llm=llm,
65
+ index_name=index_name,
66
+ embeddings=embeddings
67
+ )
68
+ return response, sources
69
+ except Exception as e:
70
+ logger.error(f"Error in process_message: {str(e)}")
71
+ return f"Error processing message: {str(e)}", []
72
+
73
+ def display_sources(sources: List) -> None:
74
+ """Display sources in expandable sections with proper formatting."""
75
+ if not sources:
76
+ st.info("No sources available for this response.")
77
+ return
78
+
79
+ st.subheader("Sources")
80
+ for i, doc in enumerate(sources, 1):
81
+ try:
82
+ with st.expander(f"Source {i}"):
83
+ if hasattr(doc, 'page_content'):
84
+ st.markdown(f"**Content:** {doc.page_content[0:100] + ' ...'}")
85
+ if hasattr(doc, 'metadata'):
86
+ for key, value in doc.metadata.items():
87
+ st.markdown(f"**{key.title()}:** {value}")
88
+
89
+ # Web Scraper to display images of sources
90
+ # Especially helpful if the sources are images themselves
91
+ # or are OCR'd text files
92
+ scraper = DigitalCommonwealthScraper()
93
+ images = scraper.extract_images(doc.metadata["URL"])
94
+ images = images[:1]
95
+
96
+ # If there are no images then don't display them
97
+ if not images:
98
+ st.warning("No images found on the page.")
99
+ return
100
+
101
+ # Download the images
102
+ # Delete the directory if it already exists
103
+ # to clear the existing cache of images for each listed source
104
+ output_dir = 'downloaded_images'
105
+ if os.path.exists(output_dir):
106
+ shutil.rmtree(output_dir)
107
+
108
+ # Download the main image to a local directory
109
+ downloaded_files = scraper.download_images(images)
110
+
111
+ # Display the image using st.image
112
+ # Display the title of the image using img.get
113
+ st.image(downloaded_files, width=400, caption=[
114
+ img.get('alt', f'Image {i+1}') for i, img in enumerate(images)
115
+ ])
116
+
117
+ else:
118
+ st.markdown(f"**Content:** {str(doc)}")
119
+
120
+ except Exception as e:
121
+ logger.error(f"Error displaying source {i}: {str(e)}")
122
+ st.error(f"Error displaying source {i}")
123
+
124
+
125
+ def main():
126
+ st.title("Boston Public Library RAG Chatbot")
127
+
128
+ # Initialize session state
129
+ if "messages" not in st.session_state:
130
+ st.session_state.messages = []
131
+
132
+ # Initialize models
133
+ llm, embeddings = initialize_models()
134
+ if not llm or not embeddings:
135
+ st.error("Failed to initialize the application. Please check the logs.")
136
+ return
137
+
138
+ # Constants
139
+ INDEX_NAME = 'bpl-rag'
140
+
141
+ # Display chat history
142
+ for message in st.session_state.messages:
143
+ with st.chat_message(message["role"]):
144
+ st.markdown(message["content"])
145
+
146
+ # Chat input
147
+ user_input = st.chat_input("Type your message here...")
148
+
149
+
150
+
151
+ if user_input:
152
+ # Display user message
153
+ with st.chat_message("user"):
154
+ st.markdown(user_input)
155
+ st.session_state.messages.append({"role": "user", "content": user_input})
156
+
157
+ # Process and display assistant response
158
+ with st.chat_message("assistant"):
159
+ with st.spinner("Thinking..."):
160
+ response, sources = process_message(
161
+ query=user_input,
162
+ llm=llm,
163
+ index_name=INDEX_NAME,
164
+ embeddings=embeddings
165
+ )
166
+
167
+ if isinstance(response, str):
168
+ st.markdown(response)
169
+ st.session_state.messages.append({
170
+ "role": "assistant",
171
+ "content": response
172
+ })
173
+
174
+ # Display sources
175
+ display_sources(sources)
176
+
177
+ else:
178
+ st.error("Received an invalid response format")
179
+
180
+ # Footer
181
+ st.markdown("---")
182
+ st.markdown(
183
+ "Built with ❤️ using Streamlit + LangChain + OpenAI",
184
+ help="An AI-powered chatbot with RAG capabilities"
185
+ )
186
+
187
+ if __name__ == "__main__":
188
+ main()