meidkad commited on
Commit
3994b08
1 Parent(s): 51b7431

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +113 -0
  2. mental_health_Document.pdf +0 -0
  3. pic.jpg +0 -0
  4. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import PyPDF2
2
+ from langchain_community.embeddings import OllamaEmbeddings
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain_community.vectorstores import Chroma
5
+ from langchain.chains import ConversationalRetrievalChain
6
+ from langchain_community.chat_models import ChatOllama
7
+ from langchain.memory import ChatMessageHistory, ConversationBufferMemory
8
+ import chainlit as cl
9
+
10
+
11
+
12
+ @cl.on_chat_start
13
+ async def on_chat_start():
14
+ files = None #Initialize variable to store uploaded files
15
+
16
+ # Wait for the user to upload a file
17
+ while files is None:
18
+ files = await cl.AskFileMessage(
19
+ content="Please upload a pdf file to begin!",
20
+ accept=["application/pdf"],
21
+ max_size_mb=100,# Optionally limit the file size
22
+ timeout=180, # Set a timeout for user response,
23
+ ).send()
24
+
25
+ file = files[0] # Get the first uploaded file
26
+ print(file) # Print the file object for debugging
27
+
28
+ # Sending an image with the local file path
29
+ elements = [
30
+ cl.Image(name="image", display="inline", path="pic.jpg")
31
+ ]
32
+ # Inform the user that processing has started
33
+ msg = cl.Message(content=f"Processing `{file.name}`...",elements=elements)
34
+ await msg.send()
35
+
36
+ # Read the PDF file
37
+ pdf = PyPDF2.PdfReader(file.path)
38
+ pdf_text = ""
39
+ for page in pdf.pages:
40
+ pdf_text += page.extract_text()
41
+
42
+
43
+ # Split the text into chunks
44
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=50)
45
+ texts = text_splitter.split_text(pdf_text)
46
+
47
+ # Create a metadata for each chunk
48
+ metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
49
+
50
+ # Create a Chroma vector store
51
+ embeddings = OllamaEmbeddings(model="nomic-embed-text")
52
+ docsearch = await cl.make_async(Chroma.from_texts)(
53
+ texts, embeddings, metadatas=metadatas
54
+ )
55
+
56
+ # Initialize message history for conversation
57
+ message_history = ChatMessageHistory()
58
+
59
+ # Memory for conversational context
60
+ memory = ConversationBufferMemory(
61
+ memory_key="chat_history",
62
+ output_key="answer",
63
+ chat_memory=message_history,
64
+ return_messages=True,
65
+ )
66
+
67
+ # Create a chain that uses the Chroma vector store
68
+ chain = ConversationalRetrievalChain.from_llm(
69
+ ChatOllama(model="gemma:7b"),
70
+ chain_type="stuff",
71
+ retriever=docsearch.as_retriever(),
72
+ memory=memory,
73
+ return_source_documents=True,
74
+ )
75
+
76
+ # Let the user know that the system is ready
77
+ msg.content = f"Processing `{file.name}` done. You can now ask questions!"
78
+ await msg.update()
79
+ #store the chain in user session
80
+ cl.user_session.set("chain", chain)
81
+
82
+
83
+ @cl.on_message
84
+ async def main(message: cl.Message):
85
+ # Retrieve the chain from user session
86
+ chain = cl.user_session.get("chain")
87
+ #call backs happens asynchronously/parallel
88
+ cb = cl.AsyncLangchainCallbackHandler()
89
+
90
+ # call the chain with user's message content
91
+ res = await chain.ainvoke(message.content, callbacks=[cb])
92
+ answer = res["answer"]
93
+ source_documents = res["source_documents"]
94
+
95
+ text_elements = [] # Initialize list to store text elements
96
+
97
+ # Process source documents if available
98
+ if source_documents:
99
+ for source_idx, source_doc in enumerate(source_documents):
100
+ source_name = f"source_{source_idx}"
101
+ # Create the text element referenced in the message
102
+ text_elements.append(
103
+ cl.Text(content=source_doc.page_content, name=source_name)
104
+ )
105
+ source_names = [text_el.name for text_el in text_elements]
106
+
107
+ # Add source references to the answer
108
+ if source_names:
109
+ answer += f"\nSources: {', '.join(source_names)}"
110
+ else:
111
+ answer += "\nNo sources found"
112
+ #return results
113
+ await cl.Message(content=answer, elements=text_elements).send()
mental_health_Document.pdf ADDED
Binary file (128 kB). View file
 
pic.jpg ADDED
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ chainlit==1.0.200
2
+ langchain
3
+ langchain_community
4
+ PyPDF2
5
+ chromadb