akash015 commited on
Commit
4002837
·
verified ·
1 Parent(s): a39222a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -8
app.py CHANGED
@@ -15,9 +15,8 @@ import pdfkit
15
  from paddleocr import PaddleOCR
16
  import fitz
17
  import asyncio
18
- # from langchain_nomic.embeddings import NomicEmbeddings
19
 
20
- # initialise LLM model
21
  llm_groq = ChatGroq(
22
  model_name='llama3-70b-8192'
23
  )
@@ -25,9 +24,6 @@ llm_groq = ChatGroq(
25
  # Initialize anonymizer
26
  anonymizer = PresidioReversibleAnonymizer(analyzed_fields=['PERSON', 'EMAIL_ADDRESS', 'PHONE_NUMBER', 'IBAN_CODE', 'CREDIT_CARD', 'CRYPTO', 'IP_ADDRESS', 'LOCATION', 'DATE_TIME', 'NRP', 'MEDICAL_LICENSE', 'URL', 'US_BANK_NUMBER', 'US_DRIVER_LICENSE', 'US_ITIN', 'US_PASSPORT', 'US_SSN'], faker_seed=18)
27
 
28
- # initalise nomic embedding model
29
- # embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
30
-
31
  def extract_text_from_pdf(file_path):
32
  pdf = PyPDF2.PdfReader(file_path)
33
  pdf_text = ""
@@ -148,7 +144,10 @@ async def on_chat_start():
148
  # without splitting into chunks
149
  # {
150
  # Create a Chroma vector store
151
- embeddings = OllamaEmbeddings(model="nomic-embed-text")
 
 
 
152
  docsearch = await cl.make_async(Chroma.from_texts)(
153
  [anonymized_text], embeddings, metadatas=[{"source": "0-pl"}]
154
  )
@@ -192,10 +191,9 @@ async def main(message: cl.Message):
192
  # Call the chain with user's message content
193
  res = await chain.ainvoke(message.content, callbacks=[cb])
194
  answer = anonymizer.deanonymize(
195
- res["answer"]
196
  )
197
  text_elements = []
198
 
199
  # Return results
200
  await cl.Message(content=answer, elements=text_elements).send()
201
-
 
15
  from paddleocr import PaddleOCR
16
  import fitz
17
  import asyncio
18
+ from langchain_nomic.embeddings import NomicEmbeddings
19
 
 
20
  llm_groq = ChatGroq(
21
  model_name='llama3-70b-8192'
22
  )
 
24
  # Initialize anonymizer
25
  anonymizer = PresidioReversibleAnonymizer(analyzed_fields=['PERSON', 'EMAIL_ADDRESS', 'PHONE_NUMBER', 'IBAN_CODE', 'CREDIT_CARD', 'CRYPTO', 'IP_ADDRESS', 'LOCATION', 'DATE_TIME', 'NRP', 'MEDICAL_LICENSE', 'URL', 'US_BANK_NUMBER', 'US_DRIVER_LICENSE', 'US_ITIN', 'US_PASSPORT', 'US_SSN'], faker_seed=18)
26
 
 
 
 
27
  def extract_text_from_pdf(file_path):
28
  pdf = PyPDF2.PdfReader(file_path)
29
  pdf_text = ""
 
144
  # without splitting into chunks
145
  # {
146
  # Create a Chroma vector store
147
+
148
+ # embeddings = OllamaEmbeddings(model="nomic-embed-text")
149
+ embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
150
+
151
  docsearch = await cl.make_async(Chroma.from_texts)(
152
  [anonymized_text], embeddings, metadatas=[{"source": "0-pl"}]
153
  )
 
191
  # Call the chain with user's message content
192
  res = await chain.ainvoke(message.content, callbacks=[cb])
193
  answer = anonymizer.deanonymize(
194
+ "ok"+res["answer"]
195
  )
196
  text_elements = []
197
 
198
  # Return results
199
  await cl.Message(content=answer, elements=text_elements).send()