Spaces:

spark-ds549
/

LibRAG

Running

App Files Files Community

LibRAG / new_streamlit.py

bmv2021

added image feature

cf15415 about 1 month ago

raw

history blame

6.73 kB

	import streamlit as st
	import os
	from typing import List, Tuple, Optional
	from pinecone import Pinecone
	from langchain_pinecone import PineconeVectorStore
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_openai import ChatOpenAI
	from langchain_core.prompts import PromptTemplate
	from dotenv import load_dotenv
	from RAG import RAG
	from bpl_scraper import DigitalCommonwealthScraper
	import logging
	import json
	import shutil
	from PIL import Image
	import io

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Page configuration
	st.set_page_config(
	page_title="Boston Public Library Chatbot",
	page_icon="🤖",
	layout="wide"
	)

	def initialize_models() -> Tuple[Optional[ChatOpenAI], HuggingFaceEmbeddings]:
	"""Initialize the language model and embeddings."""
	try:
	load_dotenv()

	# Initialize OpenAI model
	llm = ChatOpenAI(
	model="gpt-4", # Changed from gpt-4o-mini which appears to be a typo
	temperature=0,
	timeout=60, # Added reasonable timeout
	max_retries=2
	)

	# Initialize embeddings
	embeddings = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-MiniLM-L6-v2"
	)

	return llm, embeddings

	except Exception as e:
	logger.error(f"Error initializing models: {str(e)}")
	st.error(f"Failed to initialize models: {str(e)}")
	return None, None

	def process_message(
	query: str,
	llm: ChatOpenAI,
	index_name: str,
	embeddings: HuggingFaceEmbeddings
	) -> Tuple[str, List]:
	"""Process the user message using the RAG system."""
	try:
	response, sources = RAG(
	query=query,
	llm=llm,
	index_name=index_name,
	embeddings=embeddings
	)
	return response, sources
	except Exception as e:
	logger.error(f"Error in process_message: {str(e)}")
	return f"Error processing message: {str(e)}", []

	def display_sources(sources: List) -> None:
	"""Display sources in expandable sections with proper formatting."""
	if not sources:
	st.info("No sources available for this response.")
	return

	st.subheader("Sources")
	for i, doc in enumerate(sources, 1):
	try:
	with st.expander(f"Source {i}"):
	if hasattr(doc, 'page_content'):
	st.markdown(f"Content: {doc.page_content[0:100] + ' ...'}")
	if hasattr(doc, 'metadata'):
	for key, value in doc.metadata.items():
	st.markdown(f"{key.title()}: {value}")

	# Web Scraper to display images of sources
	# Especially helpful if the sources are images themselves
	# or are OCR'd text files
	scraper = DigitalCommonwealthScraper()
	images = scraper.extract_images(doc.metadata["URL"])
	images = images[:1]

	# If there are no images then don't display them
	if not images:
	st.warning("No images found on the page.")
	return

	# Download the images
	# Delete the directory if it already exists
	# to clear the existing cache of images for each listed source
	output_dir = 'downloaded_images'
	if os.path.exists(output_dir):
	shutil.rmtree(output_dir)

	# Download the main image to a local directory
	downloaded_files = scraper.download_images(images)

	# Display the image using st.image
	# Display the title of the image using img.get
	st.image(downloaded_files, width=400, caption=[
	img.get('alt', f'Image {i+1}') for i, img in enumerate(images)
	])

	else:
	st.markdown(f"Content: {str(doc)}")

	except Exception as e:
	logger.error(f"Error displaying source {i}: {str(e)}")
	st.error(f"Error displaying source {i}")


	def main():
	st.title("Boston Public Library RAG Chatbot")

	# Initialize session state
	if "messages" not in st.session_state:
	st.session_state.messages = []

	# Initialize models
	llm, embeddings = initialize_models()
	if not llm or not embeddings:
	st.error("Failed to initialize the application. Please check the logs.")
	return

	# Constants
	INDEX_NAME = 'bpl-rag'

	# Display chat history
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	# Chat input
	user_input = st.chat_input("Type your message here...")



	if user_input:
	# Display user message
	with st.chat_message("user"):
	st.markdown(user_input)
	st.session_state.messages.append({"role": "user", "content": user_input})

	# Process and display assistant response
	with st.chat_message("assistant"):
	with st.spinner("Thinking..."):
	response, sources = process_message(
	query=user_input,
	llm=llm,
	index_name=INDEX_NAME,
	embeddings=embeddings
	)

	if isinstance(response, str):
	st.markdown(response)
	st.session_state.messages.append({
	"role": "assistant",
	"content": response
	})

	# Display sources
	display_sources(sources)

	else:
	st.error("Received an invalid response format")

	# Footer
	st.markdown("---")
	st.markdown(
	"Built with ❤️ using Streamlit + LangChain + OpenAI",
	help="An AI-powered chatbot with RAG capabilities"
	)

	if __name__ == "__main__":
	main()