Spaces:

spark-ds549
/

LibRAG

Sleeping

App Files Files Community

bmv2021 commited on Dec 10, 2024

Commit

cf15415

1 Parent(s): 2b87ca1

added image feature

Browse files

Files changed (1) hide show

new_streamlit.py +188 -0

new_streamlit.py ADDED Viewed

	@@ -0,0 +1,188 @@

+import streamlit as st
+import os
+from typing import List, Tuple, Optional
+from pinecone import Pinecone
+from langchain_pinecone import PineconeVectorStore
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_openai import ChatOpenAI
+from langchain_core.prompts import PromptTemplate
+from dotenv import load_dotenv
+from RAG import RAG
+from bpl_scraper import DigitalCommonwealthScraper
+import logging
+import json
+import shutil
+from PIL import Image
+import io
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Page configuration
+st.set_page_config(
+    page_title="Boston Public Library Chatbot",
+    page_icon="🤖",
+    layout="wide"
+)
+def initialize_models() -> Tuple[Optional[ChatOpenAI], HuggingFaceEmbeddings]:
+    """Initialize the language model and embeddings."""
+    try:
+        load_dotenv()
+        # Initialize OpenAI model
+        llm = ChatOpenAI(
+            model="gpt-4",  # Changed from gpt-4o-mini which appears to be a typo
+            temperature=0,
+            timeout=60,  # Added reasonable timeout
+            max_retries=2
+        )
+        # Initialize embeddings
+        embeddings = HuggingFaceEmbeddings(
+            model_name="sentence-transformers/all-MiniLM-L6-v2"
+        )
+        return llm, embeddings
+    except Exception as e:
+        logger.error(f"Error initializing models: {str(e)}")
+        st.error(f"Failed to initialize models: {str(e)}")
+        return None, None
+def process_message(
+    query: str,
+    llm: ChatOpenAI,
+    index_name: str,
+    embeddings: HuggingFaceEmbeddings
+) -> Tuple[str, List]:
+    """Process the user message using the RAG system."""
+    try:
+        response, sources = RAG(
+            query=query,
+            llm=llm,
+            index_name=index_name,
+            embeddings=embeddings
+        )
+        return response, sources
+    except Exception as e:
+        logger.error(f"Error in process_message: {str(e)}")
+        return f"Error processing message: {str(e)}", []
+def display_sources(sources: List) -> None:
+    """Display sources in expandable sections with proper formatting."""
+    if not sources:
+        st.info("No sources available for this response.")
+        return
+    st.subheader("Sources")
+    for i, doc in enumerate(sources, 1):
+        try:
+            with st.expander(f"Source {i}"):
+                if hasattr(doc, 'page_content'):
+                    st.markdown(f"**Content:** {doc.page_content[0:100] + ' ...'}")
+                    if hasattr(doc, 'metadata'):
+                        for key, value in doc.metadata.items():
+                            st.markdown(f"**{key.title()}:** {value}")
+                        # Web Scraper to display images of sources
+                        # Especially helpful if the sources are images themselves
+                        # or are OCR'd text files
+                        scraper = DigitalCommonwealthScraper()
+                        images = scraper.extract_images(doc.metadata["URL"])
+                        images = images[:1]
+                        # If there are no images then don't display them
+                        if not images:
+                                st.warning("No images found on the page.")
+                                return
+                        # Download the images
+                        # Delete the directory if it already exists
+                        # to clear the existing cache of images for each listed source
+                        output_dir = 'downloaded_images'
+                        if os.path.exists(output_dir):
+                            shutil.rmtree(output_dir)
+                        # Download the main image to a local directory
+                        downloaded_files = scraper.download_images(images)
+                        # Display the image using st.image
+                        # Display the title of the image using img.get
+                        st.image(downloaded_files, width=400, caption=[
+                            img.get('alt', f'Image {i+1}') for i, img in enumerate(images)
+                            ])
+                else:
+                    st.markdown(f"**Content:** {str(doc)}")
+        except Exception as e:
+            logger.error(f"Error displaying source {i}: {str(e)}")
+            st.error(f"Error displaying source {i}")
+def main():
+    st.title("Boston Public Library RAG Chatbot")
+    # Initialize session state
+    if "messages" not in st.session_state:
+        st.session_state.messages = []
+    # Initialize models
+    llm, embeddings = initialize_models()
+    if not llm or not embeddings:
+        st.error("Failed to initialize the application. Please check the logs.")
+        return
+    # Constants
+    INDEX_NAME = 'bpl-rag'
+    # Display chat history
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+    # Chat input
+    user_input = st.chat_input("Type your message here...")
+    if user_input:
+        # Display user message
+        with st.chat_message("user"):
+            st.markdown(user_input)
+        st.session_state.messages.append({"role": "user", "content": user_input})
+        # Process and display assistant response
+        with st.chat_message("assistant"):
+            with st.spinner("Thinking..."):
+                response, sources = process_message(
+                    query=user_input,
+                    llm=llm,
+                    index_name=INDEX_NAME,
+                    embeddings=embeddings
+                )
+                if isinstance(response, str):
+                    st.markdown(response)
+                    st.session_state.messages.append({
+                        "role": "assistant",
+                        "content": response
+                    })
+                    # Display sources
+                    display_sources(sources)
+                else:
+                    st.error("Received an invalid response format")
+    # Footer
+    st.markdown("---")
+    st.markdown(
+        "Built with ❤️ using Streamlit + LangChain + OpenAI",
+        help="An AI-powered chatbot with RAG capabilities"
+    )
+if __name__ == "__main__":
+    main()