Huzaifa367 commited on
Commit
bc68b42
·
verified ·
1 Parent(s): cf7e13d

Upload 12 files

Browse files
Files changed (12) hide show
  1. BrainBot.py +181 -0
  2. Dockerfile +66 -0
  3. README.Docker.md +22 -0
  4. README.md +6 -4
  5. brainbot.png +0 -0
  6. compose.yaml +49 -0
  7. dockerignore +34 -0
  8. gitattributes +35 -0
  9. main.py +348 -0
  10. requirements.txt +19 -0
  11. styles.css +11 -0
  12. utils.py +33 -0
BrainBot.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import tempfile
4
+ import validators
5
+ import os
6
+
7
+ # Custom CSS
8
+ with open('styles.css') as f:
9
+ css = f.read()
10
+
11
+ st.markdown(f'<style>{css}</style>', unsafe_allow_html=True)
12
+
13
+ ## FUNCTIONS
14
+ ## -------------------------------------------------------------------------------------------
15
+ # Function to save the uploaded file as a temporary file and return its path.
16
+ def save_uploaded_file(uploaded_file):
17
+ file_content = uploaded_file.read() # Load the document
18
+
19
+ # Create a directory if it doesn't exist
20
+ data_dir = "/data"
21
+ # os.makedirs(data_dir, exist_ok=True)
22
+
23
+ # Create a temporary file in the data directory
24
+ with tempfile.NamedTemporaryFile(delete=False, dir=data_dir) as temp_file:
25
+ temp_file.write(file_content) # Write the uploaded file content to the temporary file
26
+ temp_file_path = temp_file.name # Get the path of the temporary file
27
+ return temp_file_path
28
+
29
+ # Function to save the uploaded image as a temporary file and return its path.
30
+ def save_uploaded_image(uploaded_image):
31
+ # Create a directory named "images" if it doesn't exist
32
+ images_dir = "/images"
33
+ # os.makedirs(images_dir, exist_ok=True)
34
+
35
+ # Create a temporary file path within the "images" directory with .png extension
36
+ temp_file_path = os.path.join(images_dir, tempfile.NamedTemporaryFile(suffix=".png").name)
37
+
38
+ # Write the uploaded image content to the temporary file
39
+ with open(temp_file_path, "wb") as temp_file:
40
+ temp_file.write(uploaded_image.read())
41
+ return temp_file_path
42
+
43
+ ## LOGO and TITLE
44
+ ## -------------------------------------------------------------------------------------------
45
+ # Show the logo and title side by side
46
+ col1, col2 = st.columns([1, 4])
47
+ with col1:
48
+ st.image("brainbot.png", use_column_width=True,)
49
+ with col2:
50
+ st.title("Hi, I am BrainBot - Your AI Learning Assistant!")
51
+
52
+ # Main content
53
+ st.header("Upload any 📄 file, 🖼️ image, or 🔗 webpage link and ask me anything from it!")
54
+ st.subheader("Supported file formats: PDF, DOCX, TXT, PPTX, HTML")
55
+ st.subheader("Supported image formats: PNG, JPG, JPEG")
56
+
57
+ col3, col4 = st.columns([2, 3])
58
+ with col3:
59
+ ## LLM OPTIONS
60
+ # Select the LLM to use (either GPT-4 or GROQ)
61
+ llm = st.radio(
62
+ "Choose the LLM", ["GPT-4", "GROQ"],
63
+ index=1
64
+ )
65
+
66
+ st.session_state["llm"] = llm
67
+
68
+ ## CHAT OPTIONS - FILE, IMAGE, WEBSITE
69
+ ## -------------------------------------------------------------------------------------------
70
+ # User Inputs
71
+ uploaded_file = None
72
+ uploaded_image = None
73
+ website_link = None
74
+ question = None
75
+
76
+ if llm == "GPT-4" and "api_key_flag" not in st.session_state:
77
+ st.warning("Please enter your OpenAI API key.")
78
+ # Get OpenAI API Key from user
79
+ openai_api_key = st.sidebar.text_input("Enter your OpenAI API Key", type="password")
80
+ # Send POST request to a FastAPI endpoint to set the OpenAI API key as an environment
81
+ # variable
82
+ with st.spinner("Activating OpenAI API..."):
83
+ try:
84
+ FASTAPI_URL = "http://localhost:8000/set_api_key"
85
+ data = {"api_key": openai_api_key}
86
+ if openai_api_key:
87
+ response = requests.post(FASTAPI_URL, json=data)
88
+ st.sidebar.success(response.text)
89
+ st.session_state['api_key_flag'] = True
90
+ st.experimental_rerun()
91
+ except Exception as e:
92
+ st.switch_page("pages/error.py")
93
+ with col4:
94
+ if llm == "GROQ" or "api_key_flag" in st.session_state:
95
+ # Select to upload file, image, or link to chat with them
96
+ upload_option = st.radio(
97
+ "Select an option", ["📄 Upload File", "🖼️ Upload Image", "🔗 Upload Link"]
98
+ )
99
+ # Select an option to show the appropriate file_uploader
100
+ if upload_option == "📄 Upload File":
101
+ uploaded_file = st.file_uploader("Choose a file",
102
+ type=["txt", "pdf", "docx", "pptx", "html"])
103
+ elif upload_option == "🖼️ Upload Image":
104
+ uploaded_image = st.file_uploader("Choose an image", type=["png", "jpg", "jpeg"])
105
+ elif upload_option == "🔗 Upload Link":
106
+ website_link = st.text_input("Enter a website URL")
107
+
108
+ ## CHAT HISTORY
109
+ ## -------------------------------------------------------------------------------------------
110
+ # Initialize an empty list to store chat messages with files
111
+ if 'file_chat_history' not in st.session_state:
112
+ st.session_state['file_chat_history'] = []
113
+ # Initialize an empty list to store image interpretations
114
+ if 'image_chat_history' not in st.session_state:
115
+ st.session_state['image_chat_history'] = []
116
+ # Initialize an empty list to store chat messages with websites
117
+ if 'web_chat_history' not in st.session_state:
118
+ st.session_state['web_chat_history'] = []
119
+
120
+ ## FILE
121
+ ## -------------------------------------------------------------------------------------------
122
+ # Load the uploaded file, then save it into a vector store, and enable the input field to ask
123
+ # a question
124
+ st.session_state['uploaded_file'] = False
125
+ if uploaded_file is not None:
126
+ with st.spinner("Loading file..."):
127
+ # Save the uploaded file to a temporary path
128
+ temp_file_path = save_uploaded_file(uploaded_file)
129
+
130
+ try:
131
+ # Send POST request to a FastAPI endpoint to load the file into a vectorstore
132
+ data = {"file_path": temp_file_path, "file_type": uploaded_file.type}
133
+ FASTAPI_URL = f"http://localhost:8000/load_file/{llm}"
134
+ response = requests.post(FASTAPI_URL, json=data)
135
+ st.success(response.text)
136
+ st.session_state['current_file'] = uploaded_file.name
137
+ st.session_state['uploaded_file'] = True
138
+ st.switch_page("pages/File-chat.py")
139
+ except Exception as e:
140
+ st.switch_page("pages/error.py")
141
+
142
+ ## IMAGE
143
+ ## -------------------------------------------------------------------------------------------
144
+ # Load the uploaded image if user uploads an image, then interpret the image
145
+ st.session_state['uploaded_image'] = False
146
+ if uploaded_image is not None:
147
+ try:
148
+ # Save uploaded image to a temporary file
149
+ temp_img_path = save_uploaded_image(uploaded_image)
150
+ except Exception as e:
151
+ st.switch_page("pages/error.py")
152
+
153
+ st.session_state['temp_img_path'] = temp_img_path
154
+ st.session_state['current_image'] = uploaded_image.name
155
+ st.session_state['uploaded_image'] = True
156
+ st.switch_page("pages/Image-scan.py")
157
+
158
+ ## WEBSITE LINK
159
+ ## -------------------------------------------------------------------------------------------
160
+ # Load the website content, then save it into a vector store, and enable the input field to
161
+ # ask a question
162
+ st.session_state['uploaded_link'] = False
163
+ if website_link is not None:
164
+ if website_link:
165
+ # Ensure that the user has entered a correct URL
166
+ if validators.url(website_link):
167
+ try:
168
+ # Send POST request to a FastAPI endpoint to scrape the webpage and load its text
169
+ # into a vector store
170
+ FASTAPI_URL = f"http://localhost:8000/load_link/{llm}"
171
+ data = {"website_link": website_link}
172
+ with st.spinner("Loading website..."):
173
+ response = requests.post(FASTAPI_URL, json=data)
174
+ st.success(response.text)
175
+ st.session_state['current_website'] = website_link
176
+ st.session_state['uploaded_link'] = True
177
+ st.switch_page("pages/Web-chat.py")
178
+ except Exception as e:
179
+ st.switch_page("pages/error.py")
180
+ else:
181
+ st.error("Invalid URL. Please enter a valid URL.")
Dockerfile ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # syntax=docker/dockerfile:1
2
+
3
+ # Comments are provided throughout this file to help you get started.
4
+ # If you need more help, visit the Dockerfile reference guide at
5
+ # https://docs.docker.com/go/dockerfile-reference/
6
+
7
+ # Want to help us make this template better? Share your feedback here: https://forms.gle/ybq9Krt8jtBL3iCk7
8
+
9
+ ARG PYTHON_VERSION=3.11.9
10
+ FROM python:${PYTHON_VERSION}-slim as base
11
+
12
+ # Prevents Python from writing pyc files.
13
+ ENV PYTHONDONTWRITEBYTECODE=1
14
+
15
+ # Keeps Python from buffering stdout and stderr to avoid situations where
16
+ # the application crashes without emitting any logs due to buffering.
17
+ ENV PYTHONUNBUFFERED=1
18
+
19
+ WORKDIR /app
20
+
21
+ # Create a non-privileged user that the app will run under.
22
+ # See https://docs.docker.com/go/dockerfile-user-best-practices/
23
+ ARG UID=10001
24
+ RUN adduser \
25
+ --disabled-password \
26
+ --gecos "" \
27
+ --home "/nonexistent" \
28
+ --shell "/sbin/nologin" \
29
+ --no-create-home \
30
+ --uid "${UID}" \
31
+ appuser
32
+
33
+ # Download dependencies as a separate step to take advantage of Docker's caching.
34
+ # Leverage a cache mount to /root/.cache/pip to speed up subsequent builds.
35
+ # Leverage a bind mount to requirements.txt to avoid having to copy them into
36
+ # into this layer.
37
+ RUN --mount=type=cache,target=/root/.cache/pip \
38
+ --mount=type=bind,source=requirements.txt,target=requirements.txt \
39
+ python -m pip install -r requirements.txt
40
+
41
+ # Create a directory named 'data' and assign its ownership to appuser
42
+ RUN mkdir -p /data
43
+ RUN chown appuser /data
44
+
45
+ # Create a directory named 'images' and assign its ownership to appuser
46
+ RUN mkdir -p /images
47
+ RUN chown appuser /images
48
+
49
+ # Switch to the non-privileged user to run the application.
50
+ USER appuser
51
+
52
+ # Set the TRANSFORMERS_CACHE environment variable
53
+ ENV TRANSFORMERS_CACHE=/tmp/.cache/huggingface
54
+
55
+ # Create the cache folder with appropriate permissions
56
+ RUN mkdir -p $TRANSFORMERS_CACHE && chmod -R 777 $TRANSFORMERS_CACHE
57
+
58
+ # Copy the source code into the container.
59
+ COPY . .
60
+
61
+ # Expose the port that the application listens on.
62
+ EXPOSE 7860
63
+ EXPOSE 8501
64
+
65
+ # Run the application.
66
+ CMD ["bash", "-c", "uvicorn main:app --host 0.0.0.0 --port 7860 & streamlit run BrainBot.py --server.port 8501"]
README.Docker.md ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Building and running your application
2
+
3
+ When you're ready, start your application by running:
4
+ `docker compose up --build`.
5
+
6
+ Your application will be available at http://localhost:8000.
7
+
8
+ ### Deploying your application to the cloud
9
+
10
+ First, build your image, e.g.: `docker build -t myapp .`.
11
+ If your cloud uses a different CPU architecture than your development
12
+ machine (e.g., you are on a Mac M1 and your cloud provider is amd64),
13
+ you'll want to build the image for that platform, e.g.:
14
+ `docker build --platform=linux/amd64 -t myapp .`.
15
+
16
+ Then, push it to your registry, e.g. `docker push myregistry.com/myapp`.
17
+
18
+ Consult Docker's [getting started](https://docs.docker.com/go/get-started-sharing/)
19
+ docs for more detail on building and pushing.
20
+
21
+ ### References
22
+ * [Docker's Python guide](https://docs.docker.com/language/python/)
README.md CHANGED
@@ -1,10 +1,12 @@
1
  ---
2
- title: Gpt 4
3
- emoji: 📉
4
- colorFrom: red
5
- colorTo: pink
6
  sdk: docker
 
7
  pinned: false
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: BrainBot
3
+ emoji: 🐢
4
+ colorFrom: pink
5
+ colorTo: yellow
6
  sdk: docker
7
+ app_port: 8501
8
  pinned: false
9
+ license: apache-2.0
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
brainbot.png ADDED
compose.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Comments are provided throughout this file to help you get started.
2
+ # If you need more help, visit the Docker Compose reference guide at
3
+ # https://docs.docker.com/go/compose-spec-reference/
4
+
5
+ # Here the instructions define your application as a service called "server".
6
+ # This service is built from the Dockerfile in the current directory.
7
+ # You can add other services your application may depend on here, such as a
8
+ # database or a cache. For examples, see the Awesome Compose repository:
9
+ # https://github.com/docker/awesome-compose
10
+ services:
11
+ server:
12
+ build:
13
+ context: .
14
+ ports:
15
+ - 8000:8000
16
+
17
+ # The commented out section below is an example of how to define a PostgreSQL
18
+ # database that your application can use. `depends_on` tells Docker Compose to
19
+ # start the database before your application. The `db-data` volume persists the
20
+ # database data between container restarts. The `db-password` secret is used
21
+ # to set the database password. You must create `db/password.txt` and add
22
+ # a password of your choosing to it before running `docker compose up`.
23
+ # depends_on:
24
+ # db:
25
+ # condition: service_healthy
26
+ # db:
27
+ # image: postgres
28
+ # restart: always
29
+ # user: postgres
30
+ # secrets:
31
+ # - db-password
32
+ # volumes:
33
+ # - db-data:/var/lib/postgresql/data
34
+ # environment:
35
+ # - POSTGRES_DB=example
36
+ # - POSTGRES_PASSWORD_FILE=/run/secrets/db-password
37
+ # expose:
38
+ # - 5432
39
+ # healthcheck:
40
+ # test: [ "CMD", "pg_isready" ]
41
+ # interval: 10s
42
+ # timeout: 5s
43
+ # retries: 5
44
+ # volumes:
45
+ # db-data:
46
+ # secrets:
47
+ # db-password:
48
+ # file: db/password.txt
49
+
dockerignore ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Include any files or directories that you don't want to be copied to your
2
+ # container here (e.g., local build artifacts, temporary files, etc.).
3
+ #
4
+ # For more help, visit the .dockerignore file reference guide at
5
+ # https://docs.docker.com/go/build-context-dockerignore/
6
+
7
+ **/.DS_Store
8
+ **/__pycache__
9
+ **/.venv
10
+ **/.classpath
11
+ **/.dockerignore
12
+ **/.env
13
+ **/.git
14
+ **/.gitignore
15
+ **/.project
16
+ **/.settings
17
+ **/.toolstarget
18
+ **/.vs
19
+ **/.vscode
20
+ **/*.*proj.user
21
+ **/*.dbmdl
22
+ **/*.jfm
23
+ **/bin
24
+ **/charts
25
+ **/docker-compose*
26
+ **/compose*
27
+ **/Dockerfile*
28
+ **/node_modules
29
+ **/npm-debug.log
30
+ **/obj
31
+ **/secrets.dev.yaml
32
+ **/values.dev.yaml
33
+ LICENSE
34
+ README.md
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
main.py ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from contextlib import asynccontextmanager
4
+ from langchain_community.document_loaders import PyPDFLoader
5
+ from langchain_community.document_loaders import WebBaseLoader
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain_community.vectorstores import FAISS
8
+ from langchain_openai import OpenAIEmbeddings
9
+ from langchain_community.embeddings import HuggingFaceEmbeddings
10
+ from langchain_openai import ChatOpenAI
11
+ from langchain_groq import ChatGroq
12
+ from langchain.chains import create_history_aware_retriever, create_retrieval_chain
13
+ from langchain.chains.combine_documents import create_stuff_documents_chain
14
+ from langchain_community.chat_message_histories import ChatMessageHistory
15
+ from langchain_core.chat_history import BaseChatMessageHistory
16
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
17
+ from langchain_core.runnables.history import RunnableWithMessageHistory
18
+ from transformers import pipeline
19
+ from bs4 import BeautifulSoup
20
+ from dotenv import load_dotenv
21
+ from PIL import Image
22
+ import base64
23
+ import requests
24
+ import docx2txt
25
+ import pptx
26
+ import os
27
+ import utils
28
+ from fastapi.middleware.cors import CORSMiddleware
29
+
30
+ ## APPLICATION LIFESPAN
31
+ # Load the environment variables using FastAPI lifespan event so that they are available throughout the application
32
+ @asynccontextmanager
33
+ async def lifespan(app: FastAPI):
34
+ # Load the environment variables
35
+ load_dotenv()
36
+ #os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
37
+ ## Langsmith tracking
38
+ os.environ["LANGCHAIN_TRACING_V2"] = "true" # Enable tracing to capture all the monitoring results
39
+ os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
40
+ ## load the Groq API key
41
+ os.environ['GROQ_API_KEY'] = os.getenv("GROQ_API_KEY")
42
+ os.environ['HF_TOKEN'] = os.getenv("HF_TOKEN")
43
+ global image_to_text
44
+ image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
45
+ yield
46
+ # Delete all the temporary images
47
+ utils.unlink_images("/images")
48
+
49
+ ## FASTAPI APP
50
+ # Initialize the FastAPI app
51
+ app = FastAPI(lifespan=lifespan, docs_url="/")
52
+
53
+ # Allow requests from all origins (replace * with specific origins if needed)
54
+ app.add_middleware(
55
+ CORSMiddleware,
56
+ allow_origins=["*"],
57
+ allow_credentials=True,
58
+ allow_methods=["GET", "POST", "PUT", "DELETE"],
59
+ allow_headers=["*"],
60
+ )
61
+
62
+ ## PYDANTIC MODELS
63
+ # Define an APIKey Pydantic model for the request body
64
+ class APIKey(BaseModel):
65
+ api_key: str
66
+
67
+ # Define a FileInfo Pydantic model for the request body
68
+ class FileInfo(BaseModel):
69
+ file_path: str
70
+ file_type: str
71
+
72
+ # Define an Image Pydantic model for the request body
73
+ class Image(BaseModel):
74
+ image_path: str
75
+
76
+ # Define a Website Pydantic model for the request body
77
+ class Website(BaseModel):
78
+ website_link: str
79
+
80
+ # Define a Question Pydantic model for the request body
81
+ class Question(BaseModel):
82
+ question: str
83
+ resource: str
84
+
85
+ ## FUNCTIONS
86
+ # Function to combine all documents
87
+ def format_docs(docs):
88
+ return "\n\n".join(doc.page_content for doc in docs)
89
+
90
+ # Function to encode the image
91
+ def encode_image(image_path):
92
+ with open(image_path, "rb") as image_file:
93
+ return base64.b64encode(image_file.read()).decode('utf-8')
94
+
95
+ ## FASTAPI ENDPOINTS
96
+ ## GET - /
97
+ @app.get("/")
98
+ async def welcome():
99
+ return "Welcome to Brainbot!"
100
+
101
+ ## POST - /set_api_key
102
+ @app.post("/set_api_key")
103
+ async def set_api_key(api_key: APIKey):
104
+ os.environ["OPENAI_API_KEY"] = api_key.api_key
105
+ return "API key set successfully!"
106
+
107
+ ## POST - /load_file
108
+ # Load the file, split it into document chunks, and upload the document embeddings into a vectorstore
109
+ @app.post("/load_file/{llm}")
110
+ async def load_file(llm: str, file_info: FileInfo):
111
+ file_path = file_info.file_path
112
+ file_type = file_info.file_type
113
+
114
+ # Read the file and split it into document chunks
115
+ try:
116
+ # Initialize the text splitter
117
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
118
+
119
+ # Check the file type and load each file according to its type
120
+ if file_type == "application/pdf":
121
+ # Read pdf file
122
+ loader = PyPDFLoader(file_path)
123
+ docs = loader.load()
124
+ elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
125
+ # Read docx file
126
+ text = docx2txt.process(file_path)
127
+ docs = text_splitter.create_documents([text])
128
+ elif file_type == "text/plain":
129
+ # Read txt file
130
+ with open(file_path, 'r') as file:
131
+ text = file.read()
132
+ docs = text_splitter.create_documents([text])
133
+ elif file_type == "application/vnd.openxmlformats-officedocument.presentationml.presentation":
134
+ # Read pptx file
135
+ presentation = pptx.Presentation(file_path)
136
+ # Initialize an empty list to store slide texts
137
+ slide_texts = []
138
+
139
+ # Iterate through slides and extract text
140
+ for slide in presentation.slides:
141
+ # Initialize an empty string to store text for each slide
142
+ slide_text = ""
143
+
144
+ # Iterate through shapes in the slide
145
+ for shape in slide.shapes:
146
+ if hasattr(shape, "text"):
147
+ slide_text += shape.text + "\n" # Add shape text to slide text
148
+ # Append slide text to the list
149
+ slide_texts.append(slide_text.strip())
150
+
151
+ docs = text_splitter.create_documents(slide_texts)
152
+ elif file_type == "text/html":
153
+ # Read html file
154
+ with open(file_path, 'r') as file:
155
+ soup = BeautifulSoup(file, 'html.parser')
156
+ text = soup.get_text()
157
+ docs = text_splitter.create_documents([text])
158
+
159
+ # Delete the temporary file
160
+ os.unlink(file_path)
161
+
162
+ # Split the document into chunks
163
+ documents = text_splitter.split_documents(docs)
164
+
165
+ if llm == "GPT-4":
166
+ embeddings = OpenAIEmbeddings()
167
+ elif llm == "GROQ":
168
+ embeddings = HuggingFaceEmbeddings()
169
+
170
+ # Save document embeddings into the FAISS vectorstore
171
+ global file_vectorstore
172
+ file_vectorstore = FAISS.from_documents(documents, embeddings)
173
+ except Exception as e:
174
+ # Handle errors
175
+ raise HTTPException(status_code=500, detail=str(e.with_traceback))
176
+ return "File uploaded successfully!"
177
+
178
+ ## POST - /image
179
+ # Interpret the image using the LLM - OpenAI Vision
180
+ @app.post("/image/{llm}")
181
+ async def interpret_image(llm: str, image: Image):
182
+ try:
183
+ # Get the base64 string
184
+ base64_image = encode_image(image.image_path)
185
+
186
+ if llm == "GPT-4":
187
+ headers = {
188
+ "Content-Type": "application/json",
189
+ "Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}"
190
+ }
191
+
192
+ payload = {
193
+ "model": "gpt-4-turbo",
194
+ "messages": [
195
+ {
196
+ "role": "user",
197
+ "content": [
198
+ {
199
+ "type": "text",
200
+ "text": "What's in this image?"
201
+ },
202
+ {
203
+ "type": "image_url",
204
+ "image_url": {
205
+ "url": f"data:image/jpeg;base64,{base64_image}"
206
+ }
207
+ }
208
+ ]
209
+ }
210
+ ],
211
+ "max_tokens": 300
212
+ }
213
+
214
+ response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
215
+ response = response.json()
216
+ # Extract description about the image
217
+ description = response["choices"][0]["message"]["content"]
218
+ elif llm == "GROQ":
219
+ # Use image-to-text model from Hugging Face
220
+ response = image_to_text(image.image_path)
221
+ # Extract description about the image
222
+ description = response[0]["generated_text"]
223
+ chat = ChatGroq(temperature=0, groq_api_key=os.environ["GROQ_API_KEY"], model_name="Llama3-8b-8192")
224
+ system = "You are an assistant to understand and interpret images."
225
+ human = "{text}"
226
+ prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)])
227
+
228
+ chain = prompt | chat
229
+ text = f"Explain the following image description in a small paragraph. {description}"
230
+ response = chain.invoke({"text": text})
231
+ description = str.capitalize(description) + ". " + response.content
232
+ except Exception as e:
233
+ # Handle errors
234
+ raise HTTPException(status_code=500, detail=str(e))
235
+
236
+ return description
237
+
238
+ ## POST - load_link
239
+ # Load the website content through scraping, split it into document chunks, and upload the document
240
+ # embeddings into a vectorstore
241
+ @app.post("/load_link/{llm}")
242
+ async def website_info(llm: str, link: Website):
243
+ try:
244
+ # load, chunk, and index the content of the html page
245
+ loader = WebBaseLoader(web_paths=(link.website_link,),)
246
+
247
+ global web_documents
248
+ web_documents = loader.load()
249
+
250
+ # split the document into chunks
251
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
252
+ documents = text_splitter.split_documents(web_documents)
253
+
254
+ if llm == "GPT-4":
255
+ embeddings = OpenAIEmbeddings()
256
+ elif llm == "GROQ":
257
+ embeddings = HuggingFaceEmbeddings()
258
+
259
+ # Save document embeddings into the FAISS vectorstore
260
+ global website_vectorstore
261
+ website_vectorstore = FAISS.from_documents(documents, embeddings)
262
+ except Exception as e:
263
+ # Handle errors
264
+ raise HTTPException(status_code=500, detail=str(e))
265
+
266
+ return "Website loaded successfully!"
267
+
268
+ ## POST - /answer_with_chat_history
269
+ # Retrieve the answer to the question using LLM and the RAG chain maintaining the chat history
270
+ @app.post("/answer_with_chat_history/{llm}")
271
+ async def get_answer_with_chat_history(llm: str, question: Question):
272
+ user_question = question.question
273
+ resource = question.resource
274
+ selected_llm = llm
275
+
276
+ try:
277
+ # Initialize the LLM
278
+ if selected_llm == "GPT-4":
279
+ llm = ChatOpenAI(model="gpt-4-turbo", temperature=0)
280
+ elif selected_llm == "GROQ":
281
+ llm = ChatGroq(groq_api_key=os.environ["GROQ_API_KEY"], model_name="Llama3-8b-8192")
282
+
283
+ # extract relevant context from the document using the retriever with similarity search
284
+ if resource == "file":
285
+ retriever = file_vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
286
+ elif resource == "web":
287
+ retriever = website_vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
288
+
289
+ ### Contextualize question ###
290
+ contextualize_q_system_prompt = """Given a chat history and the latest user question \
291
+ which might reference context in the chat history, formulate a standalone question \
292
+ which can be understood without the chat history. Do NOT answer the question, \
293
+ just reformulate it if needed and otherwise return it as is."""
294
+ contextualize_q_prompt = ChatPromptTemplate.from_messages(
295
+ [
296
+ ("system", contextualize_q_system_prompt),
297
+ MessagesPlaceholder("chat_history"),
298
+ ("human", "{input}"),
299
+ ]
300
+ )
301
+ history_aware_retriever = create_history_aware_retriever(
302
+ llm, retriever, contextualize_q_prompt
303
+ )
304
+
305
+ ### Answer question ###
306
+ qa_system_prompt = """You are an assistant for question-answering tasks. \
307
+ Use the following pieces of retrieved context to answer the question. \
308
+ If you don't know the answer, just say that you don't know. \
309
+ Use three sentences maximum and keep the answer concise.\
310
+ {context}"""
311
+ qa_prompt = ChatPromptTemplate.from_messages(
312
+ [
313
+ ("system", qa_system_prompt),
314
+ MessagesPlaceholder("chat_history"),
315
+ ("human", "{input}"),
316
+ ]
317
+ )
318
+
319
+ question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
320
+
321
+ rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
322
+
323
+ ### Statefully manage chat history ###
324
+ store = {}
325
+ def get_session_history(session_id: str) -> BaseChatMessageHistory:
326
+ if session_id not in store:
327
+ store[session_id] = ChatMessageHistory()
328
+ return store[session_id]
329
+
330
+ conversational_rag_chain = RunnableWithMessageHistory(
331
+ rag_chain,
332
+ get_session_history,
333
+ input_messages_key="input",
334
+ history_messages_key="chat_history",
335
+ output_messages_key="answer",
336
+ )
337
+
338
+ response = conversational_rag_chain.invoke(
339
+ {"input": user_question},
340
+ config={
341
+ "configurable": {"session_id": "abc123"}
342
+ }, # constructs a key "abc123" in `store`.
343
+ )["answer"]
344
+ except Exception as e:
345
+ # Handle errors
346
+ raise HTTPException(status_code=500, detail=str(e))
347
+
348
+ return response
requirements.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bs4
2
+ docx2txt
3
+ faiss-cpu
4
+ fastapi
5
+ langchain
6
+ langchain-community
7
+ langchain-core
8
+ langchain-groq
9
+ langchain-openai
10
+ Pillow
11
+ pydantic
12
+ pypdf
13
+ python-dotenv
14
+ python-pptx
15
+ requests
16
+ sentence-transformers
17
+ streamlit
18
+ validators
19
+ uvicorn
styles.css ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ img {
2
+ border-radius: 10px;
3
+ }
4
+
5
+ .stApp {
6
+ background: linear-gradient(to bottom, rgba(247,251,252,1) 0%,rgba(217,237,242,1) 40%,rgba(173,217,228,1) 100%); /* W3C, IE10+, FF16+, Chrome26+, Opera12+, Safari7+ */
7
+ }
8
+
9
+ ul li:nth-child(2) {
10
+ display: none;
11
+ }
utils.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import os
3
+
4
+ ## HELPER FUNCTIONS
5
+ ## ------------------------------------------------------------------------------------------
6
+ # Function to format response received from a FastAPI endpoint
7
+ def format_response(response_text):
8
+ # Replace \n with newline character in markdown
9
+ response_text = re.sub(r'\\n', '\n', response_text)
10
+
11
+ # Check for bullet points and replace with markdown syntax
12
+ response_text = re.sub(r'^\s*-\s+(.*)$', r'* \1', response_text, flags=re.MULTILINE)
13
+
14
+ # Check for numbered lists and replace with markdown syntax
15
+ response_text = re.sub(r'^\s*\d+\.\s+(.*)$', r'1. \1', response_text, flags=re.MULTILINE)
16
+
17
+ # Check for headings and replace with markdown syntax
18
+ response_text = re.sub(r'^\s*(#+)\s+(.*)$', r'\1 \2', response_text, flags=re.MULTILINE)
19
+
20
+ return response_text
21
+
22
+ # Function to unlink all images when the application closes
23
+ def unlink_images(folder_path):
24
+ # List all files in the folder
25
+ image_files = os.listdir(folder_path)
26
+
27
+ # Iterate over image files and unlink them
28
+ for image_file in image_files:
29
+ try:
30
+ os.unlink(os.path.join(folder_path, image_file))
31
+ print(f"Deleted: {image_file}")
32
+ except Exception as e:
33
+ print(f"Error deleting {image_file}: {e}")