Spaces:

Huzaifa367
/

gpt-4

Paused

App Files Files Community

Huzaifa367 commited on May 12, 2024

Commit

bc68b42

verified ·

1 Parent(s): cf7e13d

Upload 12 files

Browse files

Files changed (12) hide show

BrainBot.py +181 -0
Dockerfile +66 -0
README.Docker.md +22 -0
README.md +6 -4
brainbot.png +0 -0
compose.yaml +49 -0
dockerignore +34 -0
gitattributes +35 -0
main.py +348 -0
requirements.txt +19 -0
styles.css +11 -0
utils.py +33 -0

BrainBot.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import streamlit as st
+import requests
+import tempfile
+import validators
+import os
+# Custom CSS
+with open('styles.css') as f:
+    css = f.read()
+st.markdown(f'<style>{css}</style>', unsafe_allow_html=True)
+## FUNCTIONS
+## -------------------------------------------------------------------------------------------
+# Function to save the uploaded file as a temporary file and return its path.
+def save_uploaded_file(uploaded_file):
+    file_content = uploaded_file.read()  # Load the document
+    # Create a directory if it doesn't exist
+    data_dir = "/data"
+    # os.makedirs(data_dir, exist_ok=True)
+    # Create a temporary file in the data directory
+    with tempfile.NamedTemporaryFile(delete=False, dir=data_dir) as temp_file:
+        temp_file.write(file_content)  # Write the uploaded file content to the temporary file
+        temp_file_path = temp_file.name  # Get the path of the temporary file
+        return temp_file_path
+# Function to save the uploaded image as a temporary file  and return its path.
+def save_uploaded_image(uploaded_image):
+    # Create a directory named "images" if it doesn't exist
+    images_dir = "/images"
+    # os.makedirs(images_dir, exist_ok=True)
+    # Create a temporary file path within the "images" directory with .png extension
+    temp_file_path = os.path.join(images_dir, tempfile.NamedTemporaryFile(suffix=".png").name)
+    # Write the uploaded image content to the temporary file
+    with open(temp_file_path, "wb") as temp_file:
+        temp_file.write(uploaded_image.read())
+    return temp_file_path
+## LOGO and TITLE
+## -------------------------------------------------------------------------------------------
+# Show the logo and title side by side
+col1, col2 = st.columns([1, 4])
+with col1:
+    st.image("brainbot.png", use_column_width=True,)
+with col2:
+    st.title("Hi, I am BrainBot - Your AI Learning Assistant!")
+# Main content
+st.header("Upload any 📄 file, 🖼️ image, or 🔗 webpage link and ask me anything from it!")
+st.subheader("Supported file formats: PDF, DOCX, TXT, PPTX, HTML")
+st.subheader("Supported image formats: PNG, JPG, JPEG")
+col3, col4 = st.columns([2, 3])
+with col3:
+    ## LLM OPTIONS
+    # Select the LLM to use (either GPT-4 or GROQ)
+    llm = st.radio(
+        "Choose the LLM", ["GPT-4", "GROQ"],
+        index=1
+    )
+    st.session_state["llm"] = llm
+    ## CHAT OPTIONS - FILE, IMAGE, WEBSITE
+    ## -------------------------------------------------------------------------------------------
+    # User Inputs
+    uploaded_file = None
+    uploaded_image = None
+    website_link = None
+    question = None
+    if llm == "GPT-4" and "api_key_flag" not in st.session_state:
+        st.warning("Please enter your OpenAI API key.")
+        # Get OpenAI API Key from user
+        openai_api_key = st.sidebar.text_input("Enter your OpenAI API Key", type="password")
+        # Send POST request to a FastAPI endpoint to set the OpenAI API key as an environment
+        # variable
+        with st.spinner("Activating OpenAI API..."):
+            try:
+                FASTAPI_URL = "http://localhost:8000/set_api_key"
+                data = {"api_key": openai_api_key}
+                if openai_api_key:
+                    response = requests.post(FASTAPI_URL, json=data)
+                    st.sidebar.success(response.text)
+                    st.session_state['api_key_flag'] = True
+                    st.experimental_rerun()
+            except Exception as e:
+                st.switch_page("pages/error.py")
+with col4:
+    if llm == "GROQ" or "api_key_flag" in st.session_state:
+        # Select to upload file, image, or link to chat with them
+        upload_option = st.radio(
+            "Select an option", ["📄 Upload File", "🖼️ Upload Image", "🔗 Upload Link"]
+        )
+        # Select an option to show the appropriate file_uploader
+        if upload_option == "📄 Upload File":
+            uploaded_file = st.file_uploader("Choose a file",
+                                                    type=["txt", "pdf", "docx", "pptx", "html"])
+        elif upload_option == "🖼️ Upload Image":
+            uploaded_image = st.file_uploader("Choose an image", type=["png", "jpg", "jpeg"])
+        elif upload_option == "🔗 Upload Link":
+            website_link = st.text_input("Enter a website URL")
+## CHAT HISTORY
+## -------------------------------------------------------------------------------------------
+# Initialize an empty list to store chat messages with files
+if 'file_chat_history' not in st.session_state:
+    st.session_state['file_chat_history'] = []
+# Initialize an empty list to store image interpretations
+if 'image_chat_history' not in st.session_state:
+    st.session_state['image_chat_history'] = []
+# Initialize an empty list to store chat messages with websites
+if 'web_chat_history' not in st.session_state:
+    st.session_state['web_chat_history'] = []
+## FILE
+## -------------------------------------------------------------------------------------------
+# Load the uploaded file, then save it into a vector store, and enable the input field to ask
+# a question
+st.session_state['uploaded_file'] = False
+if uploaded_file is not None:
+    with st.spinner("Loading file..."):
+        # Save the uploaded file to a temporary path
+        temp_file_path = save_uploaded_file(uploaded_file)
+        try:
+            # Send POST request to a FastAPI endpoint to load the file into a vectorstore
+            data = {"file_path": temp_file_path, "file_type": uploaded_file.type}
+            FASTAPI_URL = f"http://localhost:8000/load_file/{llm}"
+            response = requests.post(FASTAPI_URL, json=data)
+            st.success(response.text)
+            st.session_state['current_file'] = uploaded_file.name
+            st.session_state['uploaded_file'] = True
+            st.switch_page("pages/File-chat.py")
+        except Exception as e:
+            st.switch_page("pages/error.py")
+## IMAGE
+## -------------------------------------------------------------------------------------------
+# Load the uploaded image if user uploads an image, then interpret the image
+st.session_state['uploaded_image'] = False
+if uploaded_image is not None:
+    try:
+        # Save uploaded image to a temporary file
+        temp_img_path = save_uploaded_image(uploaded_image)
+    except Exception as e:
+                st.switch_page("pages/error.py")
+    st.session_state['temp_img_path'] = temp_img_path
+    st.session_state['current_image'] = uploaded_image.name
+    st.session_state['uploaded_image'] = True
+    st.switch_page("pages/Image-scan.py")
+## WEBSITE LINK
+## -------------------------------------------------------------------------------------------
+# Load the website content, then save it into a vector store, and enable the input field to
+# ask a question
+st.session_state['uploaded_link'] = False
+if website_link is not None:
+    if website_link:
+        # Ensure that the user has entered a correct URL
+        if validators.url(website_link):
+            try:
+                # Send POST request to a FastAPI endpoint to scrape the webpage and load its text
+                # into a vector store
+                FASTAPI_URL = f"http://localhost:8000/load_link/{llm}"
+                data = {"website_link": website_link}
+                with st.spinner("Loading website..."):
+                    response = requests.post(FASTAPI_URL, json=data)
+                    st.success(response.text)
+                    st.session_state['current_website'] = website_link
+                    st.session_state['uploaded_link'] = True
+                    st.switch_page("pages/Web-chat.py")
+            except Exception as e:
+                st.switch_page("pages/error.py")
+        else:
+            st.error("Invalid URL. Please enter a valid URL.")

Dockerfile ADDED Viewed

	@@ -0,0 +1,66 @@

+# syntax=docker/dockerfile:1
+# Comments are provided throughout this file to help you get started.
+# If you need more help, visit the Dockerfile reference guide at
+# https://docs.docker.com/go/dockerfile-reference/
+# Want to help us make this template better? Share your feedback here: https://forms.gle/ybq9Krt8jtBL3iCk7
+ARG PYTHON_VERSION=3.11.9
+FROM python:${PYTHON_VERSION}-slim as base
+# Prevents Python from writing pyc files.
+ENV PYTHONDONTWRITEBYTECODE=1
+# Keeps Python from buffering stdout and stderr to avoid situations where
+# the application crashes without emitting any logs due to buffering.
+ENV PYTHONUNBUFFERED=1
+WORKDIR /app
+# Create a non-privileged user that the app will run under.
+# See https://docs.docker.com/go/dockerfile-user-best-practices/
+ARG UID=10001
+RUN adduser \
+    --disabled-password \
+    --gecos "" \
+    --home "/nonexistent" \
+    --shell "/sbin/nologin" \
+    --no-create-home \
+    --uid "${UID}" \
+    appuser
+# Download dependencies as a separate step to take advantage of Docker's caching.
+# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds.
+# Leverage a bind mount to requirements.txt to avoid having to copy them into
+# into this layer.
+RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=bind,source=requirements.txt,target=requirements.txt \
+    python -m pip install -r requirements.txt
+# Create a directory named 'data' and assign its ownership to appuser
+RUN mkdir -p /data
+RUN chown appuser /data
+# Create a directory named 'images' and assign its ownership to appuser
+RUN mkdir -p /images
+RUN chown appuser /images
+# Switch to the non-privileged user to run the application.
+USER appuser
+# Set the TRANSFORMERS_CACHE environment variable
+ENV TRANSFORMERS_CACHE=/tmp/.cache/huggingface
+# Create the cache folder with appropriate permissions
+RUN mkdir -p $TRANSFORMERS_CACHE && chmod -R 777 $TRANSFORMERS_CACHE
+# Copy the source code into the container.
+COPY . .
+# Expose the port that the application listens on.
+EXPOSE 7860
+EXPOSE 8501
+# Run the application.
+CMD ["bash", "-c", "uvicorn main:app --host 0.0.0.0 --port 7860 & streamlit run BrainBot.py --server.port 8501"]

README.Docker.md ADDED Viewed

	@@ -0,0 +1,22 @@

+### Building and running your application
+When you're ready, start your application by running:
+`docker compose up --build`.
+Your application will be available at http://localhost:8000.
+### Deploying your application to the cloud
+First, build your image, e.g.: `docker build -t myapp .`.
+If your cloud uses a different CPU architecture than your development
+machine (e.g., you are on a Mac M1 and your cloud provider is amd64),
+you'll want to build the image for that platform, e.g.:
+`docker build --platform=linux/amd64 -t myapp .`.
+Then, push it to your registry, e.g. `docker push myregistry.com/myapp`.
+Consult Docker's [getting started](https://docs.docker.com/go/get-started-sharing/)
+docs for more detail on building and pushing.
+### References
+* [Docker's Python guide](https://docs.docker.com/language/python/)

README.md CHANGED Viewed

@@ -1,10 +1,12 @@
 ---
-title: Gpt 4
-emoji: 📉
-colorFrom: red
-colorTo: pink
 sdk: docker
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: BrainBot
+emoji: 🐢
+colorFrom: pink
+colorTo: yellow
 sdk: docker
+app_port: 8501
 pinned: false
+license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

brainbot.png ADDED Viewed

compose.yaml ADDED Viewed

	@@ -0,0 +1,49 @@

+# Comments are provided throughout this file to help you get started.
+# If you need more help, visit the Docker Compose reference guide at
+# https://docs.docker.com/go/compose-spec-reference/
+# Here the instructions define your application as a service called "server".
+# This service is built from the Dockerfile in the current directory.
+# You can add other services your application may depend on here, such as a
+# database or a cache. For examples, see the Awesome Compose repository:
+# https://github.com/docker/awesome-compose
+services:
+  server:
+    build:
+      context: .
+    ports:
+      - 8000:8000
+# The commented out section below is an example of how to define a PostgreSQL
+# database that your application can use. `depends_on` tells Docker Compose to
+# start the database before your application. The `db-data` volume persists the
+# database data between container restarts. The `db-password` secret is used
+# to set the database password. You must create `db/password.txt` and add
+# a password of your choosing to it before running `docker compose up`.
+#     depends_on:
+#       db:
+#         condition: service_healthy
+#   db:
+#     image: postgres
+#     restart: always
+#     user: postgres
+#     secrets:
+#       - db-password
+#     volumes:
+#       - db-data:/var/lib/postgresql/data
+#     environment:
+#       - POSTGRES_DB=example
+#       - POSTGRES_PASSWORD_FILE=/run/secrets/db-password
+#     expose:
+#       - 5432
+#     healthcheck:
+#       test: [ "CMD", "pg_isready" ]
+#       interval: 10s
+#       timeout: 5s
+#       retries: 5
+# volumes:
+#   db-data:
+# secrets:
+#   db-password:
+#     file: db/password.txt

dockerignore ADDED Viewed

	@@ -0,0 +1,34 @@

+# Include any files or directories that you don't want to be copied to your
+# container here (e.g., local build artifacts, temporary files, etc.).
+#
+# For more help, visit the .dockerignore file reference guide at
+# https://docs.docker.com/go/build-context-dockerignore/
+**/.DS_Store
+**/__pycache__
+**/.venv
+**/.classpath
+**/.dockerignore
+**/.env
+**/.git
+**/.gitignore
+**/.project
+**/.settings
+**/.toolstarget
+**/.vs
+**/.vscode
+**/*.*proj.user
+**/*.dbmdl
+**/*.jfm
+**/bin
+**/charts
+**/docker-compose*
+**/compose*
+**/Dockerfile*
+**/node_modules
+**/npm-debug.log
+**/obj
+**/secrets.dev.yaml
+**/values.dev.yaml
+LICENSE
+README.md

gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

main.py ADDED Viewed

	@@ -0,0 +1,348 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from contextlib import asynccontextmanager
+from langchain_community.document_loaders import PyPDFLoader
+from langchain_community.document_loaders import WebBaseLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import FAISS
+from langchain_openai import OpenAIEmbeddings
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_openai import ChatOpenAI
+from langchain_groq import ChatGroq
+from langchain.chains import create_history_aware_retriever, create_retrieval_chain
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_community.chat_message_histories import ChatMessageHistory
+from langchain_core.chat_history import BaseChatMessageHistory
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_core.runnables.history import RunnableWithMessageHistory
+from transformers import pipeline
+from bs4 import BeautifulSoup
+from dotenv import load_dotenv
+from PIL import Image
+import base64
+import requests
+import docx2txt
+import pptx
+import os
+import utils
+from fastapi.middleware.cors import CORSMiddleware
+## APPLICATION LIFESPAN
+# Load the environment variables using FastAPI lifespan event so that they are available throughout the application
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Load the environment variables
+    load_dotenv()
+    #os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
+    ## Langsmith tracking
+    os.environ["LANGCHAIN_TRACING_V2"] = "true" # Enable tracing to capture all the monitoring results
+    os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
+    ## load the Groq API key
+    os.environ['GROQ_API_KEY'] = os.getenv("GROQ_API_KEY")
+    os.environ['HF_TOKEN'] = os.getenv("HF_TOKEN")
+    global image_to_text
+    image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
+    yield
+    # Delete all the temporary images
+    utils.unlink_images("/images")
+## FASTAPI APP
+# Initialize the FastAPI app
+app = FastAPI(lifespan=lifespan, docs_url="/")
+# Allow requests from all origins (replace * with specific origins if needed)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["GET", "POST", "PUT", "DELETE"],
+    allow_headers=["*"],
+)
+## PYDANTIC MODELS
+# Define an APIKey Pydantic model for the request body
+class APIKey(BaseModel):
+    api_key: str
+# Define a FileInfo Pydantic model for the request body
+class FileInfo(BaseModel):
+    file_path: str
+    file_type: str
+# Define an Image Pydantic model for the request body
+class Image(BaseModel):
+    image_path: str
+# Define a Website Pydantic model for the request body
+class Website(BaseModel):
+    website_link: str
+# Define a Question Pydantic model for the request body
+class Question(BaseModel):
+    question: str
+    resource: str
+## FUNCTIONS
+# Function to combine all documents
+def format_docs(docs):
+    return "\n\n".join(doc.page_content for doc in docs)
+# Function to encode the image
+def encode_image(image_path):
+  with open(image_path, "rb") as image_file:
+    return base64.b64encode(image_file.read()).decode('utf-8')
+## FASTAPI ENDPOINTS
+## GET - /
+@app.get("/")
+async def welcome():
+    return "Welcome to Brainbot!"
+## POST - /set_api_key
+@app.post("/set_api_key")
+async def set_api_key(api_key: APIKey):
+    os.environ["OPENAI_API_KEY"] = api_key.api_key
+    return "API key set successfully!"
+## POST - /load_file
+# Load the file, split it into document chunks, and upload the document embeddings into a vectorstore
+@app.post("/load_file/{llm}")
+async def load_file(llm: str, file_info: FileInfo):
+    file_path = file_info.file_path
+    file_type = file_info.file_type
+    # Read the file and split it into document chunks
+    try:
+        # Initialize the text splitter
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+        # Check the file type and load each file according to its type
+        if file_type == "application/pdf":
+            # Read pdf file
+            loader = PyPDFLoader(file_path)
+            docs = loader.load()
+        elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
+            # Read docx file
+            text = docx2txt.process(file_path)
+            docs = text_splitter.create_documents([text])
+        elif file_type == "text/plain":
+            # Read txt file
+            with open(file_path, 'r') as file:
+                text = file.read()
+                docs = text_splitter.create_documents([text])
+        elif file_type == "application/vnd.openxmlformats-officedocument.presentationml.presentation":
+            # Read pptx file
+            presentation = pptx.Presentation(file_path)
+            # Initialize an empty list to store slide texts
+            slide_texts = []
+            # Iterate through slides and extract text
+            for slide in presentation.slides:
+                # Initialize an empty string to store text for each slide
+                slide_text = ""
+                # Iterate through shapes in the slide
+                for shape in slide.shapes:
+                    if hasattr(shape, "text"):
+                        slide_text += shape.text + "\n"  # Add shape text to slide text
+                        # Append slide text to the list
+                        slide_texts.append(slide_text.strip())
+            docs = text_splitter.create_documents(slide_texts)
+        elif file_type == "text/html":
+            # Read html file
+            with open(file_path, 'r') as file:
+                soup = BeautifulSoup(file, 'html.parser')
+                text = soup.get_text()
+                docs = text_splitter.create_documents([text])
+        # Delete the temporary file
+        os.unlink(file_path)
+        # Split the document into chunks
+        documents = text_splitter.split_documents(docs)
+        if llm == "GPT-4":
+            embeddings = OpenAIEmbeddings()
+        elif llm == "GROQ":
+            embeddings = HuggingFaceEmbeddings()
+        # Save document embeddings into the FAISS vectorstore
+        global file_vectorstore
+        file_vectorstore = FAISS.from_documents(documents, embeddings)
+    except Exception as e:
+        # Handle errors
+        raise HTTPException(status_code=500, detail=str(e.with_traceback))
+    return "File uploaded successfully!"
+## POST - /image
+# Interpret the image using the LLM - OpenAI Vision
+@app.post("/image/{llm}")
+async def interpret_image(llm: str, image: Image):
+    try:
+        # Get the base64 string
+        base64_image = encode_image(image.image_path)
+        if llm == "GPT-4":
+            headers = {
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}"
+            }
+            payload = {
+                "model": "gpt-4-turbo",
+                "messages": [
+                    {
+                    "role": "user",
+                    "content": [
+                        {
+                        "type": "text",
+                        "text": "What's in this image?"
+                        },
+                        {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/jpeg;base64,{base64_image}"
+                        }
+                        }
+                    ]
+                    }
+                ],
+                "max_tokens": 300
+            }
+            response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
+            response = response.json()
+            # Extract description about the image
+            description = response["choices"][0]["message"]["content"]
+        elif llm == "GROQ":
+            # Use image-to-text model from Hugging Face
+            response = image_to_text(image.image_path)
+            # Extract description about the image
+            description = response[0]["generated_text"]
+            chat = ChatGroq(temperature=0, groq_api_key=os.environ["GROQ_API_KEY"], model_name="Llama3-8b-8192")
+            system = "You are an assistant to understand and interpret images."
+            human = "{text}"
+            prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)])
+            chain = prompt | chat
+            text = f"Explain the following image description in a small paragraph. {description}"
+            response = chain.invoke({"text": text})
+            description = str.capitalize(description) + ". " + response.content
+    except Exception as e:
+        # Handle errors
+        raise HTTPException(status_code=500, detail=str(e))
+    return description
+## POST - load_link
+# Load the website content through scraping, split it into document chunks, and upload the document
+# embeddings into a vectorstore
+@app.post("/load_link/{llm}")
+async def website_info(llm: str, link: Website):
+    try:
+        # load, chunk, and index the content of the html page
+        loader = WebBaseLoader(web_paths=(link.website_link,),)
+        global web_documents
+        web_documents = loader.load()
+        # split the document into chunks
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+        documents = text_splitter.split_documents(web_documents)
+        if llm == "GPT-4":
+            embeddings = OpenAIEmbeddings()
+        elif llm == "GROQ":
+            embeddings = HuggingFaceEmbeddings()
+        # Save document embeddings into the FAISS vectorstore
+        global website_vectorstore
+        website_vectorstore = FAISS.from_documents(documents, embeddings)
+    except Exception as e:
+        # Handle errors
+        raise HTTPException(status_code=500, detail=str(e))
+    return "Website loaded successfully!"
+## POST - /answer_with_chat_history
+# Retrieve the answer to the question using LLM and the RAG chain maintaining the chat history
+@app.post("/answer_with_chat_history/{llm}")
+async def get_answer_with_chat_history(llm: str, question: Question):
+    user_question = question.question
+    resource = question.resource
+    selected_llm = llm
+    try:
+        # Initialize the LLM
+        if selected_llm == "GPT-4":
+            llm = ChatOpenAI(model="gpt-4-turbo", temperature=0)
+        elif selected_llm == "GROQ":
+            llm = ChatGroq(groq_api_key=os.environ["GROQ_API_KEY"], model_name="Llama3-8b-8192")
+        # extract relevant context from the document using the retriever with similarity search
+        if resource == "file":
+            retriever = file_vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
+        elif resource == "web":
+            retriever = website_vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
+        ### Contextualize question ###
+        contextualize_q_system_prompt = """Given a chat history and the latest user question \
+        which might reference context in the chat history, formulate a standalone question \
+        which can be understood without the chat history. Do NOT answer the question, \
+        just reformulate it if needed and otherwise return it as is."""
+        contextualize_q_prompt = ChatPromptTemplate.from_messages(
+            [
+                ("system", contextualize_q_system_prompt),
+                MessagesPlaceholder("chat_history"),
+                ("human", "{input}"),
+            ]
+        )
+        history_aware_retriever = create_history_aware_retriever(
+            llm, retriever, contextualize_q_prompt
+        )
+        ### Answer question ###
+        qa_system_prompt = """You are an assistant for question-answering tasks. \
+        Use the following pieces of retrieved context to answer the question. \
+        If you don't know the answer, just say that you don't know. \
+        Use three sentences maximum and keep the answer concise.\
+        {context}"""
+        qa_prompt = ChatPromptTemplate.from_messages(
+            [
+                ("system", qa_system_prompt),
+                MessagesPlaceholder("chat_history"),
+                ("human", "{input}"),
+            ]
+        )
+        question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
+        rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
+        ### Statefully manage chat history ###
+        store = {}
+        def get_session_history(session_id: str) -> BaseChatMessageHistory:
+            if session_id not in store:
+                store[session_id] = ChatMessageHistory()
+            return store[session_id]
+        conversational_rag_chain = RunnableWithMessageHistory(
+            rag_chain,
+            get_session_history,
+            input_messages_key="input",
+            history_messages_key="chat_history",
+            output_messages_key="answer",
+        )
+        response = conversational_rag_chain.invoke(
+            {"input": user_question},
+            config={
+                "configurable": {"session_id": "abc123"}
+            },  # constructs a key "abc123" in `store`.
+        )["answer"]
+    except Exception as e:
+        # Handle errors
+        raise HTTPException(status_code=500, detail=str(e))
+    return response

requirements.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+bs4
+docx2txt
+faiss-cpu
+fastapi
+langchain
+langchain-community
+langchain-core
+langchain-groq
+langchain-openai
+Pillow
+pydantic
+pypdf
+python-dotenv
+python-pptx
+requests
+sentence-transformers
+streamlit
+validators
+uvicorn

styles.css ADDED Viewed

	@@ -0,0 +1,11 @@

+img {
+    border-radius: 10px;
+}
+.stApp {
+    background: linear-gradient(to bottom, rgba(247,251,252,1) 0%,rgba(217,237,242,1) 40%,rgba(173,217,228,1) 100%); /* W3C, IE10+, FF16+, Chrome26+, Opera12+, Safari7+ */
+}
+ul li:nth-child(2) {
+    display: none;
+}

utils.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import re
+import os
+## HELPER FUNCTIONS
+## ------------------------------------------------------------------------------------------
+# Function to format response received from a FastAPI endpoint
+def format_response(response_text):
+    # Replace \n with newline character in markdown
+    response_text = re.sub(r'\\n', '\n', response_text)
+    # Check for bullet points and replace with markdown syntax
+    response_text = re.sub(r'^\s*-\s+(.*)$', r'* \1', response_text, flags=re.MULTILINE)
+    # Check for numbered lists and replace with markdown syntax
+    response_text = re.sub(r'^\s*\d+\.\s+(.*)$', r'1. \1', response_text, flags=re.MULTILINE)
+    # Check for headings and replace with markdown syntax
+    response_text = re.sub(r'^\s*(#+)\s+(.*)$', r'\1 \2', response_text, flags=re.MULTILINE)
+    return response_text
+# Function to unlink all images when the application closes
+def unlink_images(folder_path):
+    # List all files in the folder
+    image_files = os.listdir(folder_path)
+    # Iterate over image files and unlink them
+    for image_file in image_files:
+        try:
+            os.unlink(os.path.join(folder_path, image_file))
+            print(f"Deleted: {image_file}")
+        except Exception as e:
+            print(f"Error deleting {image_file}: {e}")