aminaj commited on
Commit
979518f
1 Parent(s): c5249db

Add new files and folders

Browse files
Files changed (14) hide show
  1. .dockerignore +34 -0
  2. BrainBot.py +189 -0
  3. Dockerfile +78 -0
  4. README.Docker.md +22 -0
  5. brainbot.png +0 -0
  6. compose.yaml +49 -0
  7. error.py +18 -0
  8. main.py +337 -0
  9. pages/File-chat.py +79 -0
  10. pages/Image-scan.py +72 -0
  11. pages/Web-chat.py +77 -0
  12. requirements.txt +19 -0
  13. styles.css +7 -0
  14. utils.py +43 -0
.dockerignore ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Include any files or directories that you don't want to be copied to your
2
+ # container here (e.g., local build artifacts, temporary files, etc.).
3
+ #
4
+ # For more help, visit the .dockerignore file reference guide at
5
+ # https://docs.docker.com/go/build-context-dockerignore/
6
+
7
+ **/.DS_Store
8
+ **/__pycache__
9
+ **/.venv
10
+ **/.classpath
11
+ **/.dockerignore
12
+ **/.env
13
+ **/.git
14
+ **/.gitignore
15
+ **/.project
16
+ **/.settings
17
+ **/.toolstarget
18
+ **/.vs
19
+ **/.vscode
20
+ **/*.*proj.user
21
+ **/*.dbmdl
22
+ **/*.jfm
23
+ **/bin
24
+ **/charts
25
+ **/docker-compose*
26
+ **/compose*
27
+ **/Dockerfile*
28
+ **/node_modules
29
+ **/npm-debug.log
30
+ **/obj
31
+ **/secrets.dev.yaml
32
+ **/values.dev.yaml
33
+ LICENSE
34
+ README.md
BrainBot.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import tempfile
4
+ import validators
5
+ import os
6
+ from utils import setup_logging, log_error
7
+
8
+ # Custom CSS
9
+ with open('styles.css') as f:
10
+ css = f.read()
11
+
12
+ st.markdown(f'<style>{css}</style>', unsafe_allow_html=True)
13
+
14
+ # Setup Logging
15
+ setup_logging()
16
+
17
+ ## FUNCTIONS
18
+ ## -------------------------------------------------------------------------------------------
19
+ # Function to save the uploaded file as a temporary file and return its path.
20
+ def save_uploaded_file(uploaded_file):
21
+ file_content = uploaded_file.read() # Load the document
22
+
23
+ # Create a directory if it doesn't exist
24
+ data_dir = "/data"
25
+ # os.makedirs(data_dir, exist_ok=True)
26
+
27
+ # Create a temporary file in the data directory
28
+ with tempfile.NamedTemporaryFile(delete=False, dir=data_dir) as temp_file:
29
+ temp_file.write(file_content) # Write the uploaded file content to the temporary file
30
+ temp_file_path = temp_file.name # Get the path of the temporary file
31
+ return temp_file_path
32
+
33
+ # Function to save the uploaded image as a temporary file and return its path.
34
+ def save_uploaded_image(uploaded_image):
35
+ # Create a directory named "images" if it doesn't exist
36
+ images_dir = "/images"
37
+ # os.makedirs(images_dir, exist_ok=True)
38
+
39
+ # Create a temporary file path within the "images" directory with .png extension
40
+ temp_file_path = os.path.join(images_dir, tempfile.NamedTemporaryFile(suffix=".png").name)
41
+
42
+ # Write the uploaded image content to the temporary file
43
+ with open(temp_file_path, "wb") as temp_file:
44
+ temp_file.write(uploaded_image.read())
45
+ return temp_file_path
46
+
47
+ ## LOGO and TITLE
48
+ ## -------------------------------------------------------------------------------------------
49
+ # Show the logo and title side by side
50
+ col1, col2 = st.columns([1, 4])
51
+ with col1:
52
+ st.image("brainbot.png", use_column_width=True,)
53
+ with col2:
54
+ st.title("Hi, I am BrainBot - Your AI Learning Assistant!")
55
+
56
+ # Main content
57
+ st.header("Upload any 📄 file, 🖼️ image, or 🔗 webpage link and ask me anything from it!")
58
+ st.subheader("Supported file formats: PDF, DOCX, TXT, PPTX, HTML")
59
+ st.subheader("Supported image formats: PNG, JPG, JPEG")
60
+
61
+ col3, col4 = st.columns([2, 3])
62
+ with col3:
63
+ ## LLM OPTIONS
64
+ # Select the LLM to use (either GPT-4 or GROQ)
65
+ llm = st.radio(
66
+ "Choose the LLM", ["GPT-4", "GROQ"],
67
+ index=1
68
+ )
69
+
70
+ st.session_state["llm"] = llm
71
+
72
+ ## CHAT OPTIONS - FILE, IMAGE, WEBSITE
73
+ ## -------------------------------------------------------------------------------------------
74
+ # User Inputs
75
+ uploaded_file = None
76
+ uploaded_image = None
77
+ website_link = None
78
+ question = None
79
+
80
+ if llm == "GPT-4" and "api_key_flag" not in st.session_state:
81
+ st.warning("Please enter your OpenAI API key.")
82
+ # Get OpenAI API Key from user
83
+ openai_api_key = st.sidebar.text_input("Enter your OpenAI API Key", type="password")
84
+ # Send POST request to a FastAPI endpoint to set the OpenAI API key as an environment
85
+ # variable
86
+ with st.spinner("Activating OpenAI API..."):
87
+ try:
88
+ FASTAPI_URL = "http://localhost:8000/set_api_key"
89
+ data = {"api_key": openai_api_key}
90
+ if openai_api_key:
91
+ response = requests.post(FASTAPI_URL, json=data)
92
+ st.sidebar.success(response.text)
93
+ st.session_state['api_key_flag'] = True
94
+ st.experimental_rerun()
95
+ except Exception as e:
96
+ log_error(str(e))
97
+ st.switch_page("error.py")
98
+ with col4:
99
+ if llm == "GROQ" or "api_key_flag" in st.session_state:
100
+ # Select to upload file, image, or link to chat with them
101
+ upload_option = st.radio(
102
+ "Select an option", ["📄 Upload File", "🖼️ Upload Image", "🔗 Upload Link"]
103
+ )
104
+ # Select an option to show the appropriate file_uploader
105
+ if upload_option == "📄 Upload File":
106
+ uploaded_file = st.file_uploader("Choose a file",
107
+ type=["txt", "pdf", "docx", "pptx", "html"])
108
+ elif upload_option == "🖼️ Upload Image":
109
+ uploaded_image = st.file_uploader("Choose an image", type=["png", "jpg", "jpeg"])
110
+ elif upload_option == "🔗 Upload Link":
111
+ website_link = st.text_input("Enter a website URL")
112
+
113
+ ## CHAT HISTORY
114
+ ## -------------------------------------------------------------------------------------------
115
+ # Initialize an empty list to store chat messages with files
116
+ if 'file_chat_history' not in st.session_state:
117
+ st.session_state['file_chat_history'] = []
118
+ # Initialize an empty list to store image interpretations
119
+ if 'image_chat_history' not in st.session_state:
120
+ st.session_state['image_chat_history'] = []
121
+ # Initialize an empty list to store chat messages with websites
122
+ if 'web_chat_history' not in st.session_state:
123
+ st.session_state['web_chat_history'] = []
124
+
125
+ ## FILE
126
+ ## -------------------------------------------------------------------------------------------
127
+ # Load the uploaded file, then save it into a vector store, and enable the input field to ask
128
+ # a question
129
+ st.session_state['uploaded_file'] = False
130
+ if uploaded_file is not None:
131
+ with st.spinner("Loading file..."):
132
+ # Save the uploaded file to a temporary path
133
+ temp_file_path = save_uploaded_file(uploaded_file)
134
+
135
+ try:
136
+ # Send POST request to a FastAPI endpoint to load the file into a vectorstore
137
+ data = {"file_path": temp_file_path, "file_type": uploaded_file.type}
138
+ FASTAPI_URL = f"http://localhost:8000/load_file/{llm}"
139
+ response = requests.post(FASTAPI_URL, json=data)
140
+ st.success(response.text)
141
+ st.session_state['current_file'] = uploaded_file.name
142
+ st.session_state['uploaded_file'] = True
143
+ st.switch_page("pages/File-chat.py")
144
+ except Exception as e:
145
+ log_error(str(e))
146
+ st.switch_page("error.py")
147
+
148
+ ## IMAGE
149
+ ## -------------------------------------------------------------------------------------------
150
+ # Load the uploaded image if user uploads an image, then interpret the image
151
+ st.session_state['uploaded_image'] = False
152
+ if uploaded_image is not None:
153
+ try:
154
+ # Save uploaded image to a temporary file
155
+ temp_img_path = save_uploaded_image(uploaded_image)
156
+ except Exception as e:
157
+ log_error(str(e))
158
+ st.switch_page("error.py")
159
+
160
+ st.session_state['temp_img_path'] = temp_img_path
161
+ st.session_state['current_image'] = uploaded_image.name
162
+ st.session_state['uploaded_image'] = True
163
+ st.switch_page("pages/Image-scan.py")
164
+
165
+ ## WEBSITE LINK
166
+ ## -------------------------------------------------------------------------------------------
167
+ # Load the website content, then save it into a vector store, and enable the input field to
168
+ # ask a question
169
+ st.session_state['uploaded_link'] = False
170
+ if website_link is not None:
171
+ if website_link:
172
+ # Ensure that the user has entered a correct URL
173
+ if validators.url(website_link):
174
+ try:
175
+ # Send POST request to a FastAPI endpoint to scrape the webpage and load its text
176
+ # into a vector store
177
+ FASTAPI_URL = f"http://localhost:8000/load_link/{llm}"
178
+ data = {"website_link": website_link}
179
+ with st.spinner("Loading website..."):
180
+ response = requests.post(FASTAPI_URL, json=data)
181
+ st.success(response.text)
182
+ st.session_state['current_website'] = website_link
183
+ st.session_state['uploaded_link'] = True
184
+ st.switch_page("pages/Web-chat.py")
185
+ except Exception as e:
186
+ log_error(str(e))
187
+ st.switch_page("error.py")
188
+ else:
189
+ st.error("Invalid URL. Please enter a valid URL.")
Dockerfile ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # syntax=docker/dockerfile:1
2
+
3
+ # Comments are provided throughout this file to help you get started.
4
+ # If you need more help, visit the Dockerfile reference guide at
5
+ # https://docs.docker.com/go/dockerfile-reference/
6
+
7
+ # Want to help us make this template better? Share your feedback here: https://forms.gle/ybq9Krt8jtBL3iCk7
8
+
9
+ ARG PYTHON_VERSION=3.11.9
10
+ FROM python:${PYTHON_VERSION}-slim as base
11
+
12
+ # Prevents Python from writing pyc files.
13
+ ENV PYTHONDONTWRITEBYTECODE=1
14
+
15
+ # Keeps Python from buffering stdout and stderr to avoid situations where
16
+ # the application crashes without emitting any logs due to buffering.
17
+ ENV PYTHONUNBUFFERED=1
18
+
19
+ # Set the LANGCHAIN API KEY
20
+ ENV LANGCHAIN_API_KEY="ls__5aff60b22129493a9d80a786bf6347f1"
21
+
22
+ # Set the GROQ API Key
23
+ ENV GROQ_API_KEY="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"
24
+
25
+ WORKDIR /app
26
+
27
+ # Create a non-privileged user that the app will run under.
28
+ # See https://docs.docker.com/go/dockerfile-user-best-practices/
29
+ ARG UID=10001
30
+ RUN adduser \
31
+ --disabled-password \
32
+ --gecos "" \
33
+ --home "/nonexistent" \
34
+ --shell "/sbin/nologin" \
35
+ --no-create-home \
36
+ --uid "${UID}" \
37
+ appuser
38
+
39
+ # Download dependencies as a separate step to take advantage of Docker's caching.
40
+ # Leverage a cache mount to /root/.cache/pip to speed up subsequent builds.
41
+ # Leverage a bind mount to requirements.txt to avoid having to copy them into
42
+ # into this layer.
43
+ RUN --mount=type=cache,target=/root/.cache/pip \
44
+ --mount=type=bind,source=requirements.txt,target=requirements.txt \
45
+ python -m pip install -r requirements.txt
46
+
47
+ # Create a directory named 'data' and assign its ownership to appuser
48
+ RUN mkdir -p /data
49
+ RUN chown appuser /data
50
+
51
+ # Create a directory named 'images' and assign its ownership to appuser
52
+ RUN mkdir -p /images
53
+ RUN chown appuser /images
54
+
55
+ # Create the app.log file
56
+ RUN touch app.log
57
+
58
+ # Assign the ownership of app.log to appuser
59
+ RUN chown appuser app.log
60
+
61
+ # Switch to the non-privileged user to run the application.
62
+ USER appuser
63
+
64
+ # Set the TRANSFORMERS_CACHE environment variable
65
+ ENV TRANSFORMERS_CACHE=/tmp/.cache/huggingface
66
+
67
+ # Create the cache folder with appropriate permissions
68
+ RUN mkdir -p $TRANSFORMERS_CACHE && chmod -R 777 $TRANSFORMERS_CACHE
69
+
70
+ # Copy the source code into the container.
71
+ COPY . .
72
+
73
+ # Expose the port that the application listens on.
74
+ EXPOSE 8000
75
+
76
+ # Run the application.
77
+ # CMD uvicorn 'main:app' --host=0.0.0.0 --port=8000
78
+ CMD ["bash", "-c", "uvicorn main:app --host 0.0.0.0 --port 7860 & streamlit run BrainBot.py --server.port 8501"]
README.Docker.md ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Building and running your application
2
+
3
+ When you're ready, start your application by running:
4
+ `docker compose up --build`.
5
+
6
+ Your application will be available at http://localhost:8000.
7
+
8
+ ### Deploying your application to the cloud
9
+
10
+ First, build your image, e.g.: `docker build -t myapp .`.
11
+ If your cloud uses a different CPU architecture than your development
12
+ machine (e.g., you are on a Mac M1 and your cloud provider is amd64),
13
+ you'll want to build the image for that platform, e.g.:
14
+ `docker build --platform=linux/amd64 -t myapp .`.
15
+
16
+ Then, push it to your registry, e.g. `docker push myregistry.com/myapp`.
17
+
18
+ Consult Docker's [getting started](https://docs.docker.com/go/get-started-sharing/)
19
+ docs for more detail on building and pushing.
20
+
21
+ ### References
22
+ * [Docker's Python guide](https://docs.docker.com/language/python/)
brainbot.png ADDED
compose.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Comments are provided throughout this file to help you get started.
2
+ # If you need more help, visit the Docker Compose reference guide at
3
+ # https://docs.docker.com/go/compose-spec-reference/
4
+
5
+ # Here the instructions define your application as a service called "server".
6
+ # This service is built from the Dockerfile in the current directory.
7
+ # You can add other services your application may depend on here, such as a
8
+ # database or a cache. For examples, see the Awesome Compose repository:
9
+ # https://github.com/docker/awesome-compose
10
+ services:
11
+ server:
12
+ build:
13
+ context: .
14
+ ports:
15
+ - 8000:8000
16
+
17
+ # The commented out section below is an example of how to define a PostgreSQL
18
+ # database that your application can use. `depends_on` tells Docker Compose to
19
+ # start the database before your application. The `db-data` volume persists the
20
+ # database data between container restarts. The `db-password` secret is used
21
+ # to set the database password. You must create `db/password.txt` and add
22
+ # a password of your choosing to it before running `docker compose up`.
23
+ # depends_on:
24
+ # db:
25
+ # condition: service_healthy
26
+ # db:
27
+ # image: postgres
28
+ # restart: always
29
+ # user: postgres
30
+ # secrets:
31
+ # - db-password
32
+ # volumes:
33
+ # - db-data:/var/lib/postgresql/data
34
+ # environment:
35
+ # - POSTGRES_DB=example
36
+ # - POSTGRES_PASSWORD_FILE=/run/secrets/db-password
37
+ # expose:
38
+ # - 5432
39
+ # healthcheck:
40
+ # test: [ "CMD", "pg_isready" ]
41
+ # interval: 10s
42
+ # timeout: 5s
43
+ # retries: 5
44
+ # volumes:
45
+ # db-data:
46
+ # secrets:
47
+ # db-password:
48
+ # file: db/password.txt
49
+
error.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ # Custom CSS
4
+ with open('styles.css') as f:
5
+ css = f.read()
6
+
7
+ st.markdown(f'<style>{css}</style>', unsafe_allow_html=True)
8
+
9
+ ## LOGO and TITLE
10
+ ## -------------------------------------------------------------------------------------------
11
+ # Show the logo and title side by side
12
+ col1, col2 = st.columns([1, 4])
13
+ with col1:
14
+ st.image("brainbot.png", width=100)
15
+ with col2:
16
+ st.title("Error")
17
+
18
+ st.error("Oops - Something went wrong! Please try again.")
main.py ADDED
@@ -0,0 +1,337 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from contextlib import asynccontextmanager
4
+ from langchain_community.document_loaders import PyPDFLoader
5
+ from langchain_community.document_loaders import WebBaseLoader
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain_community.vectorstores import FAISS
8
+ from langchain_openai import OpenAIEmbeddings
9
+ from langchain_community.embeddings import HuggingFaceEmbeddings
10
+ from langchain_openai import ChatOpenAI
11
+ from langchain_groq import ChatGroq
12
+ from langchain.chains import create_history_aware_retriever, create_retrieval_chain
13
+ from langchain.chains.combine_documents import create_stuff_documents_chain
14
+ from langchain_community.chat_message_histories import ChatMessageHistory
15
+ from langchain_core.chat_history import BaseChatMessageHistory
16
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
17
+ from langchain_core.runnables.history import RunnableWithMessageHistory
18
+ from transformers import pipeline
19
+ from bs4 import BeautifulSoup
20
+ from dotenv import load_dotenv
21
+ from PIL import Image
22
+ import base64
23
+ import requests
24
+ import docx2txt
25
+ import pptx
26
+ import os
27
+ import utils
28
+
29
+ ## APPLICATION LIFESPAN
30
+ # Load the environment variables using FastAPI lifespan event so that they are available throughout the application
31
+ @asynccontextmanager
32
+ async def lifespan(app: FastAPI):
33
+ # Load the environment variables
34
+ load_dotenv()
35
+ #os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
36
+ ## Langsmith tracking
37
+ os.environ["LANGCHAIN_TRACING_V2"] = "true" # Enable tracing to capture all the monitoring results
38
+ os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
39
+ ## load the Groq API key
40
+ os.environ['GROQ_API_KEY'] = os.getenv("GROQ_API_KEY")
41
+ global image_to_text
42
+ image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
43
+ yield
44
+ # Delete all the temporary images
45
+ utils.unlink_images("/images")
46
+
47
+ ## FASTAPI APP
48
+ # Initialize the FastAPI app
49
+ app = FastAPI(lifespan=lifespan)
50
+
51
+ ## PYDANTIC MODELS
52
+ # Define an APIKey Pydantic model for the request body
53
+ class APIKey(BaseModel):
54
+ api_key: str
55
+
56
+ # Define a FileInfo Pydantic model for the request body
57
+ class FileInfo(BaseModel):
58
+ file_path: str
59
+ file_type: str
60
+
61
+ # Define an Image Pydantic model for the request body
62
+ class Image(BaseModel):
63
+ image_path: str
64
+
65
+ # Define a Website Pydantic model for the request body
66
+ class Website(BaseModel):
67
+ website_link: str
68
+
69
+ # Define a Question Pydantic model for the request body
70
+ class Question(BaseModel):
71
+ question: str
72
+ resource: str
73
+
74
+ ## FUNCTIONS
75
+ # Function to combine all documents
76
+ def format_docs(docs):
77
+ return "\n\n".join(doc.page_content for doc in docs)
78
+
79
+ # Function to encode the image
80
+ def encode_image(image_path):
81
+ with open(image_path, "rb") as image_file:
82
+ return base64.b64encode(image_file.read()).decode('utf-8')
83
+
84
+ ## FASTAPI ENDPOINTS
85
+ ## GET - /
86
+ @app.get("/")
87
+ async def welcome():
88
+ return "Welcome to Brainbot!"
89
+
90
+ ## POST - /set_api_key
91
+ @app.post("/set_api_key")
92
+ async def set_api_key(api_key: APIKey):
93
+ os.environ["OPENAI_API_KEY"] = api_key.api_key
94
+ return "API key set successfully!"
95
+
96
+ ## POST - /load_file
97
+ # Load the file, split it into document chunks, and upload the document embeddings into a vectorstore
98
+ @app.post("/load_file/{llm}")
99
+ async def load_file(llm: str, file_info: FileInfo):
100
+ file_path = file_info.file_path
101
+ file_type = file_info.file_type
102
+
103
+ # Read the file and split it into document chunks
104
+ try:
105
+ # Initialize the text splitter
106
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
107
+
108
+ # Check the file type and load each file according to its type
109
+ if file_type == "application/pdf":
110
+ # Read pdf file
111
+ loader = PyPDFLoader(file_path)
112
+ docs = loader.load()
113
+ elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
114
+ # Read docx file
115
+ text = docx2txt.process(file_path)
116
+ docs = text_splitter.create_documents([text])
117
+ elif file_type == "text/plain":
118
+ # Read txt file
119
+ with open(file_path, 'r') as file:
120
+ text = file.read()
121
+ docs = text_splitter.create_documents([text])
122
+ elif file_type == "application/vnd.openxmlformats-officedocument.presentationml.presentation":
123
+ # Read pptx file
124
+ presentation = pptx.Presentation(file_path)
125
+ # Initialize an empty list to store slide texts
126
+ slide_texts = []
127
+
128
+ # Iterate through slides and extract text
129
+ for slide in presentation.slides:
130
+ # Initialize an empty string to store text for each slide
131
+ slide_text = ""
132
+
133
+ # Iterate through shapes in the slide
134
+ for shape in slide.shapes:
135
+ if hasattr(shape, "text"):
136
+ slide_text += shape.text + "\n" # Add shape text to slide text
137
+ # Append slide text to the list
138
+ slide_texts.append(slide_text.strip())
139
+
140
+ docs = text_splitter.create_documents(slide_texts)
141
+ elif file_type == "text/html":
142
+ # Read html file
143
+ with open(file_path, 'r') as file:
144
+ soup = BeautifulSoup(file, 'html.parser')
145
+ text = soup.get_text()
146
+ docs = text_splitter.create_documents([text])
147
+
148
+ # Delete the temporary file
149
+ os.unlink(file_path)
150
+
151
+ # Split the document into chunks
152
+ documents = text_splitter.split_documents(docs)
153
+
154
+ if llm == "GPT-4":
155
+ embeddings = OpenAIEmbeddings()
156
+ elif llm == "GROQ":
157
+ embeddings = HuggingFaceEmbeddings()
158
+
159
+ # Save document embeddings into the FAISS vectorstore
160
+ global file_vectorstore
161
+ file_vectorstore = FAISS.from_documents(documents, embeddings)
162
+ except Exception as e:
163
+ # Handle errors
164
+ raise HTTPException(status_code=500, detail=str(e.with_traceback))
165
+ return "File uploaded successfully!"
166
+
167
+ ## POST - /image
168
+ # Interpret the image using the LLM - OpenAI Vision
169
+ @app.post("/image/{llm}")
170
+ async def interpret_image(llm: str, image: Image):
171
+ try:
172
+ # Get the base64 string
173
+ base64_image = encode_image(image.image_path)
174
+
175
+ if llm == "GPT-4":
176
+ headers = {
177
+ "Content-Type": "application/json",
178
+ "Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}"
179
+ }
180
+
181
+ payload = {
182
+ "model": "gpt-4-turbo",
183
+ "messages": [
184
+ {
185
+ "role": "user",
186
+ "content": [
187
+ {
188
+ "type": "text",
189
+ "text": "What's in this image?"
190
+ },
191
+ {
192
+ "type": "image_url",
193
+ "image_url": {
194
+ "url": f"data:image/jpeg;base64,{base64_image}"
195
+ }
196
+ }
197
+ ]
198
+ }
199
+ ],
200
+ "max_tokens": 300
201
+ }
202
+
203
+ response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
204
+ response = response.json()
205
+ # Extract description about the image
206
+ description = response["choices"][0]["message"]["content"]
207
+ elif llm == "GROQ":
208
+ # Use image-to-text model from Hugging Face
209
+ response = image_to_text(image.image_path)
210
+ # Extract description about the image
211
+ description = response[0]["generated_text"]
212
+ chat = ChatGroq(temperature=0, groq_api_key=os.environ["GROQ_API_KEY"], model_name="Llama3-8b-8192")
213
+ system = "You are an assistant to understand and interpret images."
214
+ human = "{text}"
215
+ prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)])
216
+
217
+ chain = prompt | chat
218
+ text = f"Explain the following image description in a small paragraph. {description}"
219
+ response = chain.invoke({"text": text})
220
+ description = str.capitalize(description) + ". " + response.content
221
+ except Exception as e:
222
+ # Handle errors
223
+ raise HTTPException(status_code=500, detail=str(e))
224
+
225
+ return description
226
+
227
+ ## POST - load_link
228
+ # Load the website content through scraping, split it into document chunks, and upload the document
229
+ # embeddings into a vectorstore
230
+ @app.post("/load_link/{llm}")
231
+ async def website_info(llm: str, link: Website):
232
+ try:
233
+ # load, chunk, and index the content of the html page
234
+ loader = WebBaseLoader(web_paths=(link.website_link,),)
235
+
236
+ global web_documents
237
+ web_documents = loader.load()
238
+
239
+ # split the document into chunks
240
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
241
+ documents = text_splitter.split_documents(web_documents)
242
+
243
+ if llm == "GPT-4":
244
+ embeddings = OpenAIEmbeddings()
245
+ elif llm == "GROQ":
246
+ embeddings = HuggingFaceEmbeddings()
247
+
248
+ # Save document embeddings into the FAISS vectorstore
249
+ global website_vectorstore
250
+ website_vectorstore = FAISS.from_documents(documents, embeddings)
251
+ except Exception as e:
252
+ # Handle errors
253
+ raise HTTPException(status_code=500, detail=str(e))
254
+
255
+ return "Website loaded successfully!"
256
+
257
+ ## POST - /answer_with_chat_history
258
+ # Retrieve the answer to the question using LLM and the RAG chain maintaining the chat history
259
+ @app.post("/answer_with_chat_history/{llm}")
260
+ async def get_answer_with_chat_history(llm: str, question: Question):
261
+ user_question = question.question
262
+ resource = question.resource
263
+ selected_llm = llm
264
+
265
+ try:
266
+ # Initialize the LLM
267
+ if selected_llm == "GPT-4":
268
+ llm = ChatOpenAI(model="gpt-4-turbo", temperature=0)
269
+ elif selected_llm == "GROQ":
270
+ llm = ChatGroq(groq_api_key=os.environ["GROQ_API_KEY"], model_name="Llama3-8b-8192")
271
+
272
+ # extract relevant context from the document using the retriever with similarity search
273
+ if resource == "file":
274
+ retriever = file_vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
275
+ elif resource == "web":
276
+ retriever = website_vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
277
+
278
+ ### Contextualize question ###
279
+ contextualize_q_system_prompt = """Given a chat history and the latest user question \
280
+ which might reference context in the chat history, formulate a standalone question \
281
+ which can be understood without the chat history. Do NOT answer the question, \
282
+ just reformulate it if needed and otherwise return it as is."""
283
+ contextualize_q_prompt = ChatPromptTemplate.from_messages(
284
+ [
285
+ ("system", contextualize_q_system_prompt),
286
+ MessagesPlaceholder("chat_history"),
287
+ ("human", "{input}"),
288
+ ]
289
+ )
290
+ history_aware_retriever = create_history_aware_retriever(
291
+ llm, retriever, contextualize_q_prompt
292
+ )
293
+
294
+ ### Answer question ###
295
+ qa_system_prompt = """You are an assistant for question-answering tasks. \
296
+ Use the following pieces of retrieved context to answer the question. \
297
+ If you don't know the answer, just say that you don't know. \
298
+ Use three sentences maximum and keep the answer concise.\
299
+ {context}"""
300
+ qa_prompt = ChatPromptTemplate.from_messages(
301
+ [
302
+ ("system", qa_system_prompt),
303
+ MessagesPlaceholder("chat_history"),
304
+ ("human", "{input}"),
305
+ ]
306
+ )
307
+
308
+ question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
309
+
310
+ rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
311
+
312
+ ### Statefully manage chat history ###
313
+ store = {}
314
+ def get_session_history(session_id: str) -> BaseChatMessageHistory:
315
+ if session_id not in store:
316
+ store[session_id] = ChatMessageHistory()
317
+ return store[session_id]
318
+
319
+ conversational_rag_chain = RunnableWithMessageHistory(
320
+ rag_chain,
321
+ get_session_history,
322
+ input_messages_key="input",
323
+ history_messages_key="chat_history",
324
+ output_messages_key="answer",
325
+ )
326
+
327
+ response = conversational_rag_chain.invoke(
328
+ {"input": user_question},
329
+ config={
330
+ "configurable": {"session_id": "abc123"}
331
+ }, # constructs a key "abc123" in `store`.
332
+ )["answer"]
333
+ except Exception as e:
334
+ # Handle errors
335
+ raise HTTPException(status_code=500, detail=str(e))
336
+
337
+ return response
pages/File-chat.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import utils
4
+ from utils import setup_logging, log_error
5
+
6
+ # Custom CSS
7
+ with open('styles.css') as f:
8
+ css = f.read()
9
+
10
+ st.markdown(f'<style>{css}</style>', unsafe_allow_html=True)
11
+
12
+ # Setup Logging
13
+ setup_logging()
14
+
15
+ ## LOGO and TITLE
16
+ ## -------------------------------------------------------------------------------------------
17
+ # Show the logo and title side by side
18
+ col1, col2 = st.columns([1, 4])
19
+ with col1:
20
+ st.image("brainbot.png", width=100)
21
+ with col2:
22
+ st.title("File-Chat")
23
+
24
+ question = None
25
+ llm = st.session_state["llm"]
26
+
27
+ if "current_file" in st.session_state:
28
+ current_file = st.session_state['current_file']
29
+ if st.sidebar.button("Upload New File"):
30
+ st.switch_page("BrainBot.py")
31
+ st.subheader("Your file has been uploaded successfully. You can now chat with it.")
32
+ st.success(current_file)
33
+ question = st.chat_input("Type your question here...")
34
+
35
+ else:
36
+ st.warning("Upload a file to begin chat with it.")
37
+ if st.button("Upload File"):
38
+ st.switch_page("BrainBot.py")
39
+
40
+ ## CHAT
41
+ # Clear the file chat history if user has uploaded a new file
42
+ if st.session_state['uploaded_file'] == True:
43
+ st.session_state['file_chat_history'] = []
44
+
45
+ # Display the file chat history
46
+ for message in st.session_state['file_chat_history']:
47
+ with st.chat_message("user"):
48
+ st.write(message["Human"])
49
+ with st.chat_message("ai"):
50
+ st.markdown(utils.format_response(message["AI"]))
51
+
52
+ ## QUESTION - WITH CHAT HISTORY
53
+ ## -------------------------------------------------------------------------------------------
54
+ # Retrieve the answer to the question asked by the user
55
+ if question is not None:
56
+ # Display the question entered by the user in chat
57
+ with st.chat_message("user"):
58
+ st.write(question)
59
+
60
+ resource = "file"
61
+
62
+ try:
63
+ # Send POST request to a FastAPI endpoint to retrieve an answer for the question
64
+ data = {"question": question, "resource": resource}
65
+ FASTAPI_URL = f"http://localhost:8000/answer_with_chat_history/{llm}"
66
+
67
+ with st.spinner("Generating response..."):
68
+ response = requests.post(FASTAPI_URL, json=data)
69
+ # Append the response to the chat history
70
+ st.session_state['file_chat_history'].append({"Human": question, "AI": response.text})
71
+ st.session_state['uploaded_file'] = False
72
+ # Display the AI's response to the question in chat
73
+ with st.chat_message("ai"):
74
+ # Format the response
75
+ formatted_response = utils.format_response(response.text)
76
+ st.markdown(formatted_response)
77
+ except Exception as e:
78
+ log_error(str(e))
79
+ st.switch_page("error.py")
pages/Image-scan.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import utils
4
+ from utils import setup_logging, log_error
5
+
6
+ # Custom CSS
7
+ with open('styles.css') as f:
8
+ css = f.read()
9
+
10
+ st.markdown(f'<style>{css}</style>', unsafe_allow_html=True)
11
+
12
+ # Setup Logging
13
+ setup_logging()
14
+
15
+ ## LOGO and TITLE
16
+ ## -------------------------------------------------------------------------------------------
17
+ # Show the logo and title side by side
18
+ col1, col2 = st.columns([1, 4])
19
+ with col1:
20
+ st.image("brainbot.png", width=100)
21
+ with col2:
22
+ st.title("Image-Scan")
23
+
24
+ llm = st.session_state["llm"]
25
+
26
+ if "current_image" in st.session_state:
27
+ current_image = st.session_state['current_image']
28
+ if st.sidebar.button("Upload New Image"):
29
+ st.switch_page("BrainBot.py")
30
+ st.subheader("Your image has been uploaded successfully.")
31
+ st.success(current_image)
32
+ else:
33
+ st.warning("Upload an image to interpret it.")
34
+ if st.button("Upload Image"):
35
+ st.switch_page("BrainBot.py")
36
+
37
+ ## CHAT
38
+ # Clear the image chat history if user has uploaded a new image
39
+ if st.session_state['uploaded_image'] == True:
40
+ st.session_state['image_chat_history'] = []
41
+
42
+ # Display the image chat history
43
+ for image in st.session_state['image_chat_history']:
44
+ with st.chat_message("user"):
45
+ st.image(image["path"], caption=current_image)
46
+ with st.chat_message("ai"):
47
+ st.markdown(utils.format_response(image["Description"]))
48
+
49
+ ## IMAGE
50
+ # Display the image uploaded by the user
51
+ if "temp_img_path" in st.session_state and st.session_state['uploaded_image'] == True:
52
+ temp_img_path = st.session_state['temp_img_path']
53
+ with st.chat_message("human"):
54
+ st.image(temp_img_path, width=300, caption=current_image)
55
+
56
+ try:
57
+ # Send POST request to a FastAPI endpoint with temporary image path
58
+ FASTAPI_URL = f"http://localhost:8000/image/{llm}"
59
+ with st.spinner("Interpreting image..."):
60
+ response = requests.post(FASTAPI_URL, json={"image_path": temp_img_path})
61
+ # Append the image and response to the chat history
62
+ st.session_state['image_chat_history'].append({"path": temp_img_path, "Description": response.text})
63
+ st.session_state['uploaded_image'] = False
64
+
65
+ # Display the AI's interpretation of the image in chat
66
+ with st.chat_message("assistant"):
67
+ # Format the response
68
+ formatted_response = utils.format_response(response.text)
69
+ st.markdown(formatted_response)
70
+ except Exception as e:
71
+ log_error(str(e))
72
+ st.switch_page("error.py")
pages/Web-chat.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import utils
4
+ from utils import setup_logging, log_error
5
+
6
+ # Custom CSS
7
+ with open('styles.css') as f:
8
+ css = f.read()
9
+
10
+ st.markdown(f'<style>{css}</style>', unsafe_allow_html=True)
11
+
12
+ # Setup Logging
13
+ setup_logging()
14
+
15
+ ## LOGO and TITLE
16
+ ## -------------------------------------------------------------------------------------------
17
+ # Show the logo and title side by side
18
+ col1, col2 = st.columns([1, 4])
19
+ with col1:
20
+ st.image("brainbot.png", width=100)
21
+ with col2:
22
+ st.title("Web-Chat")
23
+
24
+ question = None
25
+ llm = st.session_state["llm"]
26
+
27
+ if "current_website" in st.session_state:
28
+ current_website = st.session_state['current_website']
29
+ if st.sidebar.button("Upload New Webpage Link"):
30
+ st.switch_page("BrainBot.py")
31
+ st.subheader("Your website content has been uploaded successfully. You can now chat with it.")
32
+ st.success(current_website)
33
+ question = st.chat_input("Type your question here...")
34
+ else:
35
+ st.warning("Upload a webpage link to begin chat with it.")
36
+ if st.button("Upload Webpage Link"):
37
+ st.switch_page("BrainBot.py")
38
+
39
+ ## CHAT
40
+ # Clear the web chat history if user has uploaded a new webpage link
41
+ if st.session_state['uploaded_link'] == True:
42
+ st.session_state['web_chat_history'] = []
43
+
44
+ # Display the web chat history
45
+ for message in st.session_state['web_chat_history']:
46
+ with st.chat_message("user"):
47
+ st.write(message["Human"])
48
+ with st.chat_message("ai"):
49
+ st.markdown(utils.format_response(message["AI"]))
50
+
51
+ ## QUESTION - WITH CHAT HISTORY
52
+ ## -------------------------------------------------------------------------------------------
53
+ # Retrieve the answer to the question asked by the user
54
+ if question is not None:
55
+ # Display the question entered by the user in chat
56
+ with st.chat_message("user"):
57
+ st.write(question)
58
+
59
+ resource = "web"
60
+
61
+ try:
62
+ # Send POST request to a FastAPI endpoint to retrieve an answer for the question
63
+ data = {"question": question, "resource": resource}
64
+ FASTAPI_URL = f"http://localhost:8000/answer_with_chat_history/{llm}"
65
+ with st.spinner("Generating response..."):
66
+ response = requests.post(FASTAPI_URL, json=data)
67
+ # Append the response to the chat history
68
+ st.session_state['web_chat_history'].append({"Human": question, "AI": response.text})
69
+ st.session_state['uploaded_link'] = False
70
+ # Display the AI's response to the question in chat
71
+ with st.chat_message("ai"):
72
+ # Format the response
73
+ formatted_response = utils.format_response(response.text)
74
+ st.markdown(formatted_response)
75
+ except Exception as e:
76
+ log_error(e)
77
+ st.switch_page("error.py")
requirements.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bs4
2
+ docx2txt
3
+ faiss-cpu
4
+ fastapi
5
+ langchain
6
+ langchain-community
7
+ langchain-core
8
+ langchain-groq
9
+ langchain-openai
10
+ Pillow
11
+ pydantic
12
+ pypdf
13
+ python-dotenv
14
+ python-pptx
15
+ requests
16
+ sentence-transformers
17
+ streamlit
18
+ validators
19
+ uvicorn
styles.css ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ img {
2
+ border-radius: 10px;
3
+ }
4
+
5
+ .stApp {
6
+ background: linear-gradient(to bottom, rgba(247,251,252,1) 0%,rgba(217,237,242,1) 40%,rgba(173,217,228,1) 100%); /* W3C, IE10+, FF16+, Chrome26+, Opera12+, Safari7+ */
7
+ }
utils.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import os
3
+ import logging
4
+
5
+ ## LOGGING CONFIGURATION
6
+ ## -------------------------------------------------------------------------------------------
7
+ # Configure logging to write to a file
8
+ def setup_logging():
9
+ logging.basicConfig(filename='app.log', level=logging.ERROR)
10
+
11
+ def log_error(message):
12
+ logging.error(message)
13
+
14
+ ## HELPER FUNCTIONS
15
+ ## ------------------------------------------------------------------------------------------
16
+ # Function to format response received from a FastAPI endpoint
17
+ def format_response(response_text):
18
+ # Replace \n with newline character in markdown
19
+ response_text = re.sub(r'\\n', '\n', response_text)
20
+
21
+ # Check for bullet points and replace with markdown syntax
22
+ response_text = re.sub(r'^\s*-\s+(.*)$', r'* \1', response_text, flags=re.MULTILINE)
23
+
24
+ # Check for numbered lists and replace with markdown syntax
25
+ response_text = re.sub(r'^\s*\d+\.\s+(.*)$', r'1. \1', response_text, flags=re.MULTILINE)
26
+
27
+ # Check for headings and replace with markdown syntax
28
+ response_text = re.sub(r'^\s*(#+)\s+(.*)$', r'\1 \2', response_text, flags=re.MULTILINE)
29
+
30
+ return response_text
31
+
32
+ # Function to unlink all images when the application closes
33
+ def unlink_images(folder_path):
34
+ # List all files in the folder
35
+ image_files = os.listdir(folder_path)
36
+
37
+ # Iterate over image files and unlink them
38
+ for image_file in image_files:
39
+ try:
40
+ os.unlink(os.path.join(folder_path, image_file))
41
+ print(f"Deleted: {image_file}")
42
+ except Exception as e:
43
+ print(f"Error deleting {image_file}: {e}")