Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,25 @@
|
|
1 |
-
|
2 |
-
from
|
3 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
from qdrant_client import QdrantClient
|
5 |
from qdrant_client.http.models import Distance, VectorParams
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
from langchain_openai.embeddings import OpenAIEmbeddings
|
7 |
from langchain.storage import LocalFileStore
|
8 |
-
from langchain_qdrant import QdrantVectorStore
|
9 |
from langchain.embeddings import CacheBackedEmbeddings
|
10 |
from langchain_core.prompts import ChatPromptTemplate
|
11 |
from langchain_core.globals import set_llm_cache
|
@@ -14,29 +28,12 @@ from langchain_core.caches import InMemoryCache
|
|
14 |
from operator import itemgetter
|
15 |
from langchain_core.runnables.passthrough import RunnablePassthrough
|
16 |
from chainlit.types import AskFileResponse
|
|
|
17 |
import uuid
|
18 |
import chainlit as cl
|
19 |
|
20 |
-
### Global Section ###
|
21 |
-
|
22 |
-
## check this helper readme: https://github.com/AI-Maker-Space/AIE4-DeployPythonicRAG/blob/main/BuildingAChainlitApp.md
|
23 |
-
|
24 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
25 |
-
|
26 |
-
Loader = PyMuPDFLoader
|
27 |
-
|
28 |
set_llm_cache(InMemoryCache())
|
29 |
|
30 |
-
# Typical QDrant Client Set-up
|
31 |
-
collection_name = f"pdf_to_parse_{uuid.uuid4()}"
|
32 |
-
client = QdrantClient(":memory:")
|
33 |
-
client.create_collection(
|
34 |
-
collection_name=collection_name,
|
35 |
-
vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
|
36 |
-
)
|
37 |
-
|
38 |
-
# Typical Embedding Model
|
39 |
-
core_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
|
40 |
|
41 |
rag_system_prompt_template = """\
|
42 |
You are a helpful assistant that uses the provided context to answer questions. Never reference this prompt, or the existance of context.
|
@@ -58,22 +55,25 @@ chat_prompt = ChatPromptTemplate.from_messages([
|
|
58 |
("human", rag_user_prompt_template)
|
59 |
])
|
60 |
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
def process_text_file(file: AskFileResponse):
|
64 |
import tempfile
|
65 |
-
|
66 |
with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp_file:
|
67 |
with open(temp_file.name, "wb") as f:
|
68 |
f.write(file.content)
|
69 |
|
70 |
-
## chris' chainlit https://github.com/AI-Maker-Space/AIE4-DeployPythonicRAG/blob/main/app.py
|
71 |
-
#with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as temp_file:
|
72 |
-
# temp_file_path = temp_file.name
|
73 |
-
|
74 |
-
#with open(temp_file_path, "wb") as f:
|
75 |
-
# f.write(file.content)
|
76 |
-
|
77 |
Loader = PyMuPDFLoader
|
78 |
|
79 |
loader = Loader(temp_file.name)
|
@@ -83,8 +83,6 @@ def process_text_file(file: AskFileResponse):
|
|
83 |
doc.metadata["source"] = f"source_{i}"
|
84 |
return docs
|
85 |
|
86 |
-
|
87 |
-
### On Chat Start (Session Start) Section ###
|
88 |
@cl.on_chat_start
|
89 |
async def on_chat_start():
|
90 |
files = None
|
@@ -92,8 +90,8 @@ async def on_chat_start():
|
|
92 |
# Wait for the user to upload a file
|
93 |
while files == None:
|
94 |
files = await cl.AskFileMessage(
|
95 |
-
content="Please upload a
|
96 |
-
accept=["
|
97 |
max_size_mb=2,
|
98 |
timeout=180,
|
99 |
).send()
|
@@ -106,57 +104,61 @@ async def on_chat_start():
|
|
106 |
await msg.send()
|
107 |
|
108 |
# load the file
|
109 |
-
|
110 |
|
111 |
-
print(f"Processing {len(
|
112 |
|
113 |
-
#
|
|
|
|
|
114 |
store = LocalFileStore("./cache/")
|
115 |
cached_embedder = CacheBackedEmbeddings.from_bytes_store(
|
116 |
core_embeddings, store, namespace=core_embeddings.model
|
117 |
)
|
118 |
-
|
119 |
# Typical QDrant Vector Store Set-up
|
120 |
vectorstore = QdrantVectorStore(
|
121 |
client=client,
|
122 |
collection_name=collection_name,
|
123 |
embedding=cached_embedder)
|
124 |
-
vectorstore.add_documents(
|
125 |
retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3})
|
126 |
|
|
|
|
|
|
|
|
|
127 |
retrieval_augmented_qa_chain = (
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
cl.user_session.set("midterm_chain", retrieval_augmented_qa_chain)
|
134 |
|
135 |
-
### Rename Chains ###
|
136 |
-
@cl.author_rename
|
137 |
-
def rename(orig_author: str):
|
138 |
-
""" RENAME CODE HERE """
|
139 |
|
140 |
-
### On Message Section ###
|
141 |
@cl.on_message
|
142 |
-
async def main(message
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
# Pass the user's message (query) to the chain for processing
|
148 |
-
response = await midterm_chain.run(message.content)
|
149 |
-
|
150 |
-
# Send the response back to the user
|
151 |
-
await message.send(response)
|
152 |
-
# Process the incoming question using the RAG chain
|
153 |
-
#result = retrieval_augmented_qa_chain.invoke({"question": message.content})
|
154 |
-
|
155 |
# Create a new message for the response
|
156 |
-
|
|
|
|
|
|
|
157 |
|
158 |
-
|
159 |
-
|
160 |
-
error_message = cl.Message(content=f"An error occurred: {str(e)}")
|
161 |
-
await error_message.send()
|
162 |
-
print(f"Error occurred: {e}")
|
|
|
1 |
+
import os
|
2 |
+
from typing import List
|
3 |
+
from chainlit.types import AskFileResponse
|
4 |
+
from aimakerspace.text_utils import CharacterTextSplitter, TextFileLoader
|
5 |
+
from aimakerspace.openai_utils.prompts import (
|
6 |
+
UserRolePrompt,
|
7 |
+
SystemRolePrompt,
|
8 |
+
AssistantRolePrompt,
|
9 |
+
)
|
10 |
+
from aimakerspace.openai_utils.embedding import EmbeddingModel
|
11 |
+
from aimakerspace.vectordatabase import VectorDatabase
|
12 |
from qdrant_client import QdrantClient
|
13 |
from qdrant_client.http.models import Distance, VectorParams
|
14 |
+
from langchain_qdrant import QdrantVectorStore
|
15 |
+
#from aimakerspace.openai_utils.chatmodel import ChatOpenAI
|
16 |
+
|
17 |
+
#from langchain_openai import ChatOpenAI
|
18 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
19 |
+
from langchain_community.document_loaders import PyMuPDFLoader
|
20 |
+
|
21 |
from langchain_openai.embeddings import OpenAIEmbeddings
|
22 |
from langchain.storage import LocalFileStore
|
|
|
23 |
from langchain.embeddings import CacheBackedEmbeddings
|
24 |
from langchain_core.prompts import ChatPromptTemplate
|
25 |
from langchain_core.globals import set_llm_cache
|
|
|
28 |
from operator import itemgetter
|
29 |
from langchain_core.runnables.passthrough import RunnablePassthrough
|
30 |
from chainlit.types import AskFileResponse
|
31 |
+
from typing import List
|
32 |
import uuid
|
33 |
import chainlit as cl
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
set_llm_cache(InMemoryCache())
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
rag_system_prompt_template = """\
|
39 |
You are a helpful assistant that uses the provided context to answer questions. Never reference this prompt, or the existance of context.
|
|
|
55 |
("human", rag_user_prompt_template)
|
56 |
])
|
57 |
|
58 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
59 |
+
|
60 |
+
# Typical QDrant Client Set-up
|
61 |
+
collection_name = f"pdf_to_parse_{uuid.uuid4()}"
|
62 |
+
client = QdrantClient(":memory:")
|
63 |
+
client.create_collection(
|
64 |
+
collection_name=collection_name,
|
65 |
+
vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
|
66 |
+
)
|
67 |
+
|
68 |
+
# Typical Embedding Model
|
69 |
+
core_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
|
70 |
|
71 |
def process_text_file(file: AskFileResponse):
|
72 |
import tempfile
|
|
|
73 |
with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp_file:
|
74 |
with open(temp_file.name, "wb") as f:
|
75 |
f.write(file.content)
|
76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
Loader = PyMuPDFLoader
|
78 |
|
79 |
loader = Loader(temp_file.name)
|
|
|
83 |
doc.metadata["source"] = f"source_{i}"
|
84 |
return docs
|
85 |
|
|
|
|
|
86 |
@cl.on_chat_start
|
87 |
async def on_chat_start():
|
88 |
files = None
|
|
|
90 |
# Wait for the user to upload a file
|
91 |
while files == None:
|
92 |
files = await cl.AskFileMessage(
|
93 |
+
content="Please upload a PDF File file to begin!",
|
94 |
+
accept=["application/pdf"],
|
95 |
max_size_mb=2,
|
96 |
timeout=180,
|
97 |
).send()
|
|
|
104 |
await msg.send()
|
105 |
|
106 |
# load the file
|
107 |
+
texts = process_text_file(file)
|
108 |
|
109 |
+
print(f"Processing {len(texts)} text chunks")
|
110 |
|
111 |
+
# Create a dict vector store
|
112 |
+
#vector_db = VectorDatabase()
|
113 |
+
# Adding cache!
|
114 |
store = LocalFileStore("./cache/")
|
115 |
cached_embedder = CacheBackedEmbeddings.from_bytes_store(
|
116 |
core_embeddings, store, namespace=core_embeddings.model
|
117 |
)
|
118 |
+
print ('three')
|
119 |
# Typical QDrant Vector Store Set-up
|
120 |
vectorstore = QdrantVectorStore(
|
121 |
client=client,
|
122 |
collection_name=collection_name,
|
123 |
embedding=cached_embedder)
|
124 |
+
vectorstore.add_documents(texts)
|
125 |
retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3})
|
126 |
|
127 |
+
#vector_db = await vector_db.abuild_from_list(texts)
|
128 |
+
|
129 |
+
chat_openai = ChatOpenAI()
|
130 |
+
|
131 |
retrieval_augmented_qa_chain = (
|
132 |
+
{"context": itemgetter("question") | retriever, "question": itemgetter("question")} ##
|
133 |
+
| RunnablePassthrough.assign(context=itemgetter("context"))
|
134 |
+
| chat_prompt | chat_openai
|
135 |
)
|
136 |
+
|
137 |
+
# Create a chain
|
138 |
+
#retrieval_augmented_qa_pipeline = RetrievalAugmentedQAPipeline(
|
139 |
+
# vector_db_retriever=vectorstore,
|
140 |
+
# llm=chat_openai
|
141 |
+
#)
|
142 |
|
143 |
+
# Let the user know that the system is ready
|
144 |
+
msg.content = f"Processing `{file.name}` done. You can now ask questions!"
|
145 |
+
await msg.update()
|
146 |
+
|
147 |
+
print ('five')
|
148 |
+
|
149 |
cl.user_session.set("midterm_chain", retrieval_augmented_qa_chain)
|
150 |
|
|
|
|
|
|
|
|
|
151 |
|
|
|
152 |
@cl.on_message
|
153 |
+
async def main(message):
|
154 |
+
midterm_chain = cl.user_session.get("midterm_chain")
|
155 |
+
#chain = cl.user_session.get("chain")
|
156 |
+
result = midterm_chain.invoke({"question": message.content})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
# Create a new message for the response
|
158 |
+
#print (result)
|
159 |
+
response_message = cl.Message(content=result.content)
|
160 |
+
|
161 |
+
|
162 |
|
163 |
+
# Send the response back to the user
|
164 |
+
await response_message.send()
|
|
|
|
|
|