Spaces:
Sleeping
Sleeping
Commit
·
1cb46fc
1
Parent(s):
99a3f34
file loaded completed
Browse files- app.py +27 -11
- requirements.txt +1 -1
- src/config.py +1 -1
- src/model.py +7 -5
- src/utils.py +6 -6
app.py
CHANGED
@@ -8,17 +8,11 @@ from src.utils import get_docSearch, get_source
|
|
8 |
from src.model import load_chain
|
9 |
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
welcome_message = """ Upload your file here"""
|
18 |
|
19 |
@cl.on_chat_start
|
20 |
async def start():
|
21 |
-
await cl.Message("you are in ").send()
|
22 |
logging.info(f"app started")
|
23 |
files = None
|
24 |
while files is None:
|
@@ -30,7 +24,7 @@ async def start():
|
|
30 |
).send()
|
31 |
logging.info("uploader excecuted")
|
32 |
file = files[0]
|
33 |
-
msg = cl.Message(content=f"Processing
|
34 |
await msg.send()
|
35 |
|
36 |
logging.info("processing started")
|
@@ -47,22 +41,38 @@ async def start():
|
|
47 |
## let the user know when system is ready
|
48 |
|
49 |
msg.content = f"{file.name} processed. You begin asking questions"
|
50 |
-
|
51 |
await msg.update()
|
52 |
|
53 |
logging.info("processing completed")
|
54 |
|
55 |
cl.user_session.set("chain", chain)
|
56 |
|
|
|
|
|
57 |
@cl.on_message
|
58 |
async def main(message):
|
|
|
|
|
59 |
chain = cl.user_session.get("chain")
|
|
|
|
|
60 |
cb = cl.AsyncLangchainCallbackHandler(
|
61 |
-
stream_final_answer=True, answer_prefix_tokens=["FINAL","ANSWER"]
|
62 |
)
|
|
|
|
|
|
|
63 |
|
64 |
cb.answer_reached = True
|
|
|
|
|
65 |
res = await chain.acall(message, callbacks=[cb])
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
answer = res["answer"]
|
68 |
sources = res["sources"].strip()
|
@@ -73,11 +83,17 @@ async def main(message):
|
|
73 |
metadatas = [doc.metadata for doc in docs]
|
74 |
all_sources = [m["source"]for m in metadatas]
|
75 |
|
76 |
-
|
|
|
|
|
|
|
|
|
77 |
|
78 |
if cb.has_streamed_final_answer:
|
79 |
cb.final_stream.elements = source_elements
|
80 |
await cb.final_stream.update()
|
|
|
81 |
else:
|
82 |
await cl.Message(content=answer, elements=source_elements).send()
|
|
|
83 |
|
|
|
8 |
from src.model import load_chain
|
9 |
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
welcome_message = """ Upload your file here"""
|
12 |
|
13 |
@cl.on_chat_start
|
14 |
async def start():
|
15 |
+
await cl.Message(content="you are in ").send()
|
16 |
logging.info(f"app started")
|
17 |
files = None
|
18 |
while files is None:
|
|
|
24 |
).send()
|
25 |
logging.info("uploader excecuted")
|
26 |
file = files[0]
|
27 |
+
msg = cl.Message(content=f"Processing {file.name}....")
|
28 |
await msg.send()
|
29 |
|
30 |
logging.info("processing started")
|
|
|
41 |
## let the user know when system is ready
|
42 |
|
43 |
msg.content = f"{file.name} processed. You begin asking questions"
|
|
|
44 |
await msg.update()
|
45 |
|
46 |
logging.info("processing completed")
|
47 |
|
48 |
cl.user_session.set("chain", chain)
|
49 |
|
50 |
+
logging.info("chain saved for active session")
|
51 |
+
|
52 |
@cl.on_message
|
53 |
async def main(message):
|
54 |
+
|
55 |
+
|
56 |
chain = cl.user_session.get("chain")
|
57 |
+
|
58 |
+
logging.info(f"retrived chain for QA {type(chain)}")
|
59 |
cb = cl.AsyncLangchainCallbackHandler(
|
60 |
+
stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"]
|
61 |
)
|
62 |
+
|
63 |
+
logging.info("define call backs")
|
64 |
+
|
65 |
|
66 |
cb.answer_reached = True
|
67 |
+
logging.info("answer reached")
|
68 |
+
|
69 |
res = await chain.acall(message, callbacks=[cb])
|
70 |
+
logging.info("define res")
|
71 |
+
|
72 |
+
|
73 |
+
logging.info("call backs ")
|
74 |
+
|
75 |
+
|
76 |
|
77 |
answer = res["answer"]
|
78 |
sources = res["sources"].strip()
|
|
|
83 |
metadatas = [doc.metadata for doc in docs]
|
84 |
all_sources = [m["source"]for m in metadatas]
|
85 |
|
86 |
+
|
87 |
+
|
88 |
+
source_elements = get_source(sources,all_sources,docs,cl)
|
89 |
+
|
90 |
+
logging.info("getting source")
|
91 |
|
92 |
if cb.has_streamed_final_answer:
|
93 |
cb.final_stream.elements = source_elements
|
94 |
await cb.final_stream.update()
|
95 |
+
logging.info("call back triggred")
|
96 |
else:
|
97 |
await cl.Message(content=answer, elements=source_elements).send()
|
98 |
+
logging.info("post message")
|
99 |
|
requirements.txt
CHANGED
@@ -4,4 +4,4 @@ python-dotenv
|
|
4 |
chainlit
|
5 |
chromadb
|
6 |
tiktoken
|
7 |
-
tokenizers
|
|
|
4 |
chainlit
|
5 |
chromadb
|
6 |
tiktoken
|
7 |
+
tokenizers
|
src/config.py
CHANGED
@@ -9,5 +9,5 @@ class Config:
|
|
9 |
streaming = True
|
10 |
chain_type = "stuff"
|
11 |
max_token_limit = 4098
|
12 |
-
embeddings = OpenAIEmbeddings(
|
13 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
|
|
9 |
streaming = True
|
10 |
chain_type = "stuff"
|
11 |
max_token_limit = 4098
|
12 |
+
embeddings = OpenAIEmbeddings()
|
13 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
src/model.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from langchain.chains import RetrievalQAWithSourcesChain
|
2 |
from langchain.chat_models import ChatOpenAI
|
3 |
import logging
|
4 |
import os
|
@@ -12,13 +12,15 @@ from src.config import Config
|
|
12 |
|
13 |
def load_model():
|
14 |
model = ChatOpenAI(temperature=Config.temperature,
|
15 |
-
streaming=Config.streaming
|
16 |
return model
|
17 |
|
18 |
|
19 |
def load_chain(docsearch):
|
20 |
model = load_model()
|
21 |
-
chain = RetrievalQAWithSourcesChain.from_chain_type(
|
22 |
-
|
23 |
-
|
|
|
|
|
24 |
return chain
|
|
|
1 |
+
from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
|
2 |
from langchain.chat_models import ChatOpenAI
|
3 |
import logging
|
4 |
import os
|
|
|
12 |
|
13 |
def load_model():
|
14 |
model = ChatOpenAI(temperature=Config.temperature,
|
15 |
+
streaming=Config.streaming)
|
16 |
return model
|
17 |
|
18 |
|
19 |
def load_chain(docsearch):
|
20 |
model = load_model()
|
21 |
+
chain = RetrievalQAWithSourcesChain.from_chain_type(
|
22 |
+
ChatOpenAI(temperature=0, streaming=True),
|
23 |
+
chain_type="stuff",
|
24 |
+
retriever=docsearch.as_retriever(max_tokens_limit=4097),
|
25 |
+
)
|
26 |
return chain
|
src/utils.py
CHANGED
@@ -1,15 +1,13 @@
|
|
1 |
from chainlit.types import AskFileResponse
|
2 |
import click
|
3 |
from langchain.document_loaders import TextLoader
|
4 |
-
from langchain.document_loaders import
|
5 |
from langchain.vectorstores import Chroma
|
6 |
|
7 |
|
8 |
from src.config import Config
|
9 |
-
# import chainlit as cl
|
10 |
import logging
|
11 |
-
|
12 |
-
import os
|
13 |
from dotenv import load_dotenv
|
14 |
|
15 |
load_dotenv()
|
@@ -23,7 +21,7 @@ def process_file(file: AskFileResponse):
|
|
23 |
if file.type == "text/plain":
|
24 |
Loader = TextLoader
|
25 |
elif file.type == "application/pdf":
|
26 |
-
Loader =
|
27 |
|
28 |
with tempfile.NamedTemporaryFile() as tempfile:
|
29 |
tempfile.write(file.content)
|
@@ -48,7 +46,9 @@ def get_docSearch(file,cl):
|
|
48 |
|
49 |
docsearch = Chroma.from_documents(docs, Config.embeddings)
|
50 |
|
51 |
-
logging.info("embedding completed")
|
|
|
|
|
52 |
|
53 |
return docsearch
|
54 |
|
|
|
1 |
from chainlit.types import AskFileResponse
|
2 |
import click
|
3 |
from langchain.document_loaders import TextLoader
|
4 |
+
from langchain.document_loaders import PyPDFLoader
|
5 |
from langchain.vectorstores import Chroma
|
6 |
|
7 |
|
8 |
from src.config import Config
|
|
|
9 |
import logging
|
10 |
+
|
|
|
11 |
from dotenv import load_dotenv
|
12 |
|
13 |
load_dotenv()
|
|
|
21 |
if file.type == "text/plain":
|
22 |
Loader = TextLoader
|
23 |
elif file.type == "application/pdf":
|
24 |
+
Loader = PyPDFLoader
|
25 |
|
26 |
with tempfile.NamedTemporaryFile() as tempfile:
|
27 |
tempfile.write(file.content)
|
|
|
46 |
|
47 |
docsearch = Chroma.from_documents(docs, Config.embeddings)
|
48 |
|
49 |
+
logging.info(f"embedding completed {type(Config.embeddings)}")
|
50 |
+
|
51 |
+
logging.info(f"type of docsearch {type(docsearch)}")
|
52 |
|
53 |
return docsearch
|
54 |
|