northstaranlyticsma24 commited on
Commit
d2bc317
·
verified ·
1 Parent(s): 47a4311

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -67
app.py CHANGED
@@ -1,11 +1,25 @@
1
- ### Import Section ###
2
- from langchain_text_splitters import RecursiveCharacterTextSplitter
3
- from langchain_community.document_loaders import PyMuPDFLoader
 
 
 
 
 
 
 
 
4
  from qdrant_client import QdrantClient
5
  from qdrant_client.http.models import Distance, VectorParams
 
 
 
 
 
 
 
6
  from langchain_openai.embeddings import OpenAIEmbeddings
7
  from langchain.storage import LocalFileStore
8
- from langchain_qdrant import QdrantVectorStore
9
  from langchain.embeddings import CacheBackedEmbeddings
10
  from langchain_core.prompts import ChatPromptTemplate
11
  from langchain_core.globals import set_llm_cache
@@ -14,29 +28,12 @@ from langchain_core.caches import InMemoryCache
14
  from operator import itemgetter
15
  from langchain_core.runnables.passthrough import RunnablePassthrough
16
  from chainlit.types import AskFileResponse
 
17
  import uuid
18
  import chainlit as cl
19
 
20
- ### Global Section ###
21
-
22
- ## check this helper readme: https://github.com/AI-Maker-Space/AIE4-DeployPythonicRAG/blob/main/BuildingAChainlitApp.md
23
-
24
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
25
-
26
- Loader = PyMuPDFLoader
27
-
28
  set_llm_cache(InMemoryCache())
29
 
30
- # Typical QDrant Client Set-up
31
- collection_name = f"pdf_to_parse_{uuid.uuid4()}"
32
- client = QdrantClient(":memory:")
33
- client.create_collection(
34
- collection_name=collection_name,
35
- vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
36
- )
37
-
38
- # Typical Embedding Model
39
- core_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
40
 
41
  rag_system_prompt_template = """\
42
  You are a helpful assistant that uses the provided context to answer questions. Never reference this prompt, or the existance of context.
@@ -58,22 +55,25 @@ chat_prompt = ChatPromptTemplate.from_messages([
58
  ("human", rag_user_prompt_template)
59
  ])
60
 
61
- chat_model = ChatOpenAI(model="gpt-4o-mini")
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  def process_text_file(file: AskFileResponse):
64
  import tempfile
65
-
66
  with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp_file:
67
  with open(temp_file.name, "wb") as f:
68
  f.write(file.content)
69
 
70
- ## chris' chainlit https://github.com/AI-Maker-Space/AIE4-DeployPythonicRAG/blob/main/app.py
71
- #with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as temp_file:
72
- # temp_file_path = temp_file.name
73
-
74
- #with open(temp_file_path, "wb") as f:
75
- # f.write(file.content)
76
-
77
  Loader = PyMuPDFLoader
78
 
79
  loader = Loader(temp_file.name)
@@ -83,8 +83,6 @@ def process_text_file(file: AskFileResponse):
83
  doc.metadata["source"] = f"source_{i}"
84
  return docs
85
 
86
-
87
- ### On Chat Start (Session Start) Section ###
88
  @cl.on_chat_start
89
  async def on_chat_start():
90
  files = None
@@ -92,8 +90,8 @@ async def on_chat_start():
92
  # Wait for the user to upload a file
93
  while files == None:
94
  files = await cl.AskFileMessage(
95
- content="Please upload a Text File file to begin!",
96
- accept=["text/plain"],
97
  max_size_mb=2,
98
  timeout=180,
99
  ).send()
@@ -106,57 +104,61 @@ async def on_chat_start():
106
  await msg.send()
107
 
108
  # load the file
109
- docs = process_text_file(file)
110
 
111
- print(f"Processing {len(docs)} text chunks")
112
 
113
- # Adding cache!
 
 
114
  store = LocalFileStore("./cache/")
115
  cached_embedder = CacheBackedEmbeddings.from_bytes_store(
116
  core_embeddings, store, namespace=core_embeddings.model
117
  )
118
-
119
  # Typical QDrant Vector Store Set-up
120
  vectorstore = QdrantVectorStore(
121
  client=client,
122
  collection_name=collection_name,
123
  embedding=cached_embedder)
124
- vectorstore.add_documents(docs)
125
  retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3})
126
 
 
 
 
 
127
  retrieval_augmented_qa_chain = (
128
- {"context": itemgetter("question") | retriever, "question": itemgetter("question")} ##
129
- | RunnablePassthrough.assign(context=itemgetter("context"))
130
- | chat_prompt | chat_model
131
  )
 
 
 
 
 
 
132
 
 
 
 
 
 
 
133
  cl.user_session.set("midterm_chain", retrieval_augmented_qa_chain)
134
 
135
- ### Rename Chains ###
136
- @cl.author_rename
137
- def rename(orig_author: str):
138
- """ RENAME CODE HERE """
139
 
140
- ### On Message Section ###
141
  @cl.on_message
142
- async def main(message: cl.Message):
143
- try:
144
- # Retrieve the chain stored in the session
145
- midterm_chain = cl.user_session.get("midterm_chain")
146
-
147
- # Pass the user's message (query) to the chain for processing
148
- response = await midterm_chain.run(message.content)
149
-
150
- # Send the response back to the user
151
- await message.send(response)
152
- # Process the incoming question using the RAG chain
153
- #result = retrieval_augmented_qa_chain.invoke({"question": message.content})
154
-
155
  # Create a new message for the response
156
- #response_message = cl.Message(content=result["response"].content)
 
 
 
157
 
158
- except Exception as e:
159
- # Handle any exception and log it or send a response back to the user
160
- error_message = cl.Message(content=f"An error occurred: {str(e)}")
161
- await error_message.send()
162
- print(f"Error occurred: {e}")
 
1
+ import os
2
+ from typing import List
3
+ from chainlit.types import AskFileResponse
4
+ from aimakerspace.text_utils import CharacterTextSplitter, TextFileLoader
5
+ from aimakerspace.openai_utils.prompts import (
6
+ UserRolePrompt,
7
+ SystemRolePrompt,
8
+ AssistantRolePrompt,
9
+ )
10
+ from aimakerspace.openai_utils.embedding import EmbeddingModel
11
+ from aimakerspace.vectordatabase import VectorDatabase
12
  from qdrant_client import QdrantClient
13
  from qdrant_client.http.models import Distance, VectorParams
14
+ from langchain_qdrant import QdrantVectorStore
15
+ #from aimakerspace.openai_utils.chatmodel import ChatOpenAI
16
+
17
+ #from langchain_openai import ChatOpenAI
18
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
19
+ from langchain_community.document_loaders import PyMuPDFLoader
20
+
21
  from langchain_openai.embeddings import OpenAIEmbeddings
22
  from langchain.storage import LocalFileStore
 
23
  from langchain.embeddings import CacheBackedEmbeddings
24
  from langchain_core.prompts import ChatPromptTemplate
25
  from langchain_core.globals import set_llm_cache
 
28
  from operator import itemgetter
29
  from langchain_core.runnables.passthrough import RunnablePassthrough
30
  from chainlit.types import AskFileResponse
31
+ from typing import List
32
  import uuid
33
  import chainlit as cl
34
 
 
 
 
 
 
 
 
 
35
  set_llm_cache(InMemoryCache())
36
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  rag_system_prompt_template = """\
39
  You are a helpful assistant that uses the provided context to answer questions. Never reference this prompt, or the existance of context.
 
55
  ("human", rag_user_prompt_template)
56
  ])
57
 
58
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
59
+
60
+ # Typical QDrant Client Set-up
61
+ collection_name = f"pdf_to_parse_{uuid.uuid4()}"
62
+ client = QdrantClient(":memory:")
63
+ client.create_collection(
64
+ collection_name=collection_name,
65
+ vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
66
+ )
67
+
68
+ # Typical Embedding Model
69
+ core_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
70
 
71
  def process_text_file(file: AskFileResponse):
72
  import tempfile
 
73
  with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp_file:
74
  with open(temp_file.name, "wb") as f:
75
  f.write(file.content)
76
 
 
 
 
 
 
 
 
77
  Loader = PyMuPDFLoader
78
 
79
  loader = Loader(temp_file.name)
 
83
  doc.metadata["source"] = f"source_{i}"
84
  return docs
85
 
 
 
86
  @cl.on_chat_start
87
  async def on_chat_start():
88
  files = None
 
90
  # Wait for the user to upload a file
91
  while files == None:
92
  files = await cl.AskFileMessage(
93
+ content="Please upload a PDF File file to begin!",
94
+ accept=["application/pdf"],
95
  max_size_mb=2,
96
  timeout=180,
97
  ).send()
 
104
  await msg.send()
105
 
106
  # load the file
107
+ texts = process_text_file(file)
108
 
109
+ print(f"Processing {len(texts)} text chunks")
110
 
111
+ # Create a dict vector store
112
+ #vector_db = VectorDatabase()
113
+ # Adding cache!
114
  store = LocalFileStore("./cache/")
115
  cached_embedder = CacheBackedEmbeddings.from_bytes_store(
116
  core_embeddings, store, namespace=core_embeddings.model
117
  )
118
+ print ('three')
119
  # Typical QDrant Vector Store Set-up
120
  vectorstore = QdrantVectorStore(
121
  client=client,
122
  collection_name=collection_name,
123
  embedding=cached_embedder)
124
+ vectorstore.add_documents(texts)
125
  retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3})
126
 
127
+ #vector_db = await vector_db.abuild_from_list(texts)
128
+
129
+ chat_openai = ChatOpenAI()
130
+
131
  retrieval_augmented_qa_chain = (
132
+ {"context": itemgetter("question") | retriever, "question": itemgetter("question")} ##
133
+ | RunnablePassthrough.assign(context=itemgetter("context"))
134
+ | chat_prompt | chat_openai
135
  )
136
+
137
+ # Create a chain
138
+ #retrieval_augmented_qa_pipeline = RetrievalAugmentedQAPipeline(
139
+ # vector_db_retriever=vectorstore,
140
+ # llm=chat_openai
141
+ #)
142
 
143
+ # Let the user know that the system is ready
144
+ msg.content = f"Processing `{file.name}` done. You can now ask questions!"
145
+ await msg.update()
146
+
147
+ print ('five')
148
+
149
  cl.user_session.set("midterm_chain", retrieval_augmented_qa_chain)
150
 
 
 
 
 
151
 
 
152
  @cl.on_message
153
+ async def main(message):
154
+ midterm_chain = cl.user_session.get("midterm_chain")
155
+ #chain = cl.user_session.get("chain")
156
+ result = midterm_chain.invoke({"question": message.content})
 
 
 
 
 
 
 
 
 
157
  # Create a new message for the response
158
+ #print (result)
159
+ response_message = cl.Message(content=result.content)
160
+
161
+
162
 
163
+ # Send the response back to the user
164
+ await response_message.send()