Technocoloredgeek commited on
Commit
e14ab2c
1 Parent(s): f564d2b

Docker-python

Browse files
Files changed (1) hide show
  1. app.py +91 -0
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import asyncio
3
+ from typing import List
4
+ from chainlit.types import AskFileResponse
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
7
+ from langchain_community.vectorstores import Chroma
8
+ from langchain_community.embeddings import OpenAIEmbeddings
9
+ from langchain.chat_models import ChatOpenAI
10
+ from langchain.schema import SystemMessage, HumanMessage
11
+ from PyPDF2 import PdfReader
12
+ import chainlit as cl
13
+
14
+ # Set up API key
15
+ os.environ["OPENAI_API_KEY"] = "your-api-key-here" # You'll need to handle this securely
16
+
17
+ # Set up prompts
18
+ system_template = "Use the following context to answer a user's question. If you cannot find the answer in the context, say you don't know the answer."
19
+ system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)
20
+
21
+ human_template = "Context:\n{context}\n\nQuestion:\n{question}"
22
+ human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
23
+
24
+ chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
25
+
26
+ class RetrievalAugmentedQAPipeline:
27
+ def __init__(self, llm: ChatOpenAI, vector_db: Chroma) -> None:
28
+ self.llm = llm
29
+ self.vector_db = vector_db
30
+
31
+ async def arun_pipeline(self, user_query: str):
32
+ context_docs = self.vector_db.similarity_search(user_query, k=2)
33
+ context_list = [doc.page_content for doc in context_docs]
34
+ context_prompt = "\n".join(context_list)
35
+
36
+ max_context_length = 12000
37
+ if len(context_prompt) > max_context_length:
38
+ context_prompt = context_prompt[:max_context_length]
39
+
40
+ messages = chat_prompt.format_prompt(context=context_prompt, question=user_query).to_messages()
41
+
42
+ async for chunk in self.llm.astream(messages):
43
+ yield chunk.content
44
+
45
+ def process_pdf(file: AskFileResponse) -> List[str]:
46
+ pdf_reader = PdfReader(file.content)
47
+ text = "\n".join([page.extract_text() for page in pdf_reader.pages])
48
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=40)
49
+ return text_splitter.split_text(text)
50
+
51
+ @cl.on_chat_start
52
+ async def on_chat_start():
53
+ files = await cl.AskFileMessage(
54
+ content="Please upload a PDF file to begin!",
55
+ accept=["application/pdf"],
56
+ max_size_mb=20,
57
+ ).send()
58
+
59
+ if not files:
60
+ await cl.Message(content="No file was uploaded. Please try again.").send()
61
+ return
62
+
63
+ file = files[0]
64
+ msg = cl.Message(content=f"Processing `{file.name}`...")
65
+ await msg.send()
66
+
67
+ texts = process_pdf(file)
68
+
69
+ embeddings = OpenAIEmbeddings()
70
+ vector_db = Chroma.from_texts(texts, embeddings)
71
+
72
+ chat_openai = ChatOpenAI()
73
+ retrieval_augmented_qa_pipeline = RetrievalAugmentedQAPipeline(vector_db=vector_db, llm=chat_openai)
74
+
75
+ cl.user_session.set("pipeline", retrieval_augmented_qa_pipeline)
76
+
77
+ msg.content = f"Processing `{file.name}` done. You can now ask questions!"
78
+ await msg.update()
79
+
80
+ @cl.on_message
81
+ async def main(message: cl.Message):
82
+ pipeline = cl.user_session.get("pipeline")
83
+ if not pipeline:
84
+ await cl.Message(content="Please upload a PDF file first.").send()
85
+ return
86
+
87
+ msg = cl.Message(content="")
88
+ async for chunk in pipeline.arun_pipeline(message.content):
89
+ await msg.stream_token(chunk)
90
+
91
+ await msg.send()