Spaces:

akash015
/

Shield-Chat-v2

Sleeping

akash015 commited on Jun 25, 2024

Commit

4002837

verified ·

1 Parent(s): a39222a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,9 +15,8 @@ import pdfkit
 from paddleocr import PaddleOCR
 import fitz
 import asyncio
-# from langchain_nomic.embeddings import NomicEmbeddings
-# initialise LLM model
 llm_groq = ChatGroq(
             model_name='llama3-70b-8192'
     )
@@ -25,9 +24,6 @@ llm_groq = ChatGroq(
 # Initialize anonymizer
 anonymizer = PresidioReversibleAnonymizer(analyzed_fields=['PERSON', 'EMAIL_ADDRESS', 'PHONE_NUMBER', 'IBAN_CODE', 'CREDIT_CARD', 'CRYPTO', 'IP_ADDRESS', 'LOCATION', 'DATE_TIME', 'NRP', 'MEDICAL_LICENSE', 'URL', 'US_BANK_NUMBER', 'US_DRIVER_LICENSE', 'US_ITIN', 'US_PASSPORT', 'US_SSN'], faker_seed=18)
-# initalise nomic embedding model
-# embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
 def extract_text_from_pdf(file_path):
     pdf = PyPDF2.PdfReader(file_path)
     pdf_text = ""
@@ -148,7 +144,10 @@ async def on_chat_start():
     # without splitting into chunks
     # {
     # Create a Chroma vector store
-    embeddings = OllamaEmbeddings(model="nomic-embed-text")
     docsearch = await cl.make_async(Chroma.from_texts)(
         [anonymized_text], embeddings, metadatas=[{"source": "0-pl"}]
     )
@@ -192,10 +191,9 @@ async def main(message: cl.Message):
     # Call the chain with user's message content
     res = await chain.ainvoke(message.content, callbacks=[cb])
     answer = anonymizer.deanonymize(
-        res["answer"]
     )
     text_elements = []
     # Return results
     await cl.Message(content=answer, elements=text_elements).send()

 from paddleocr import PaddleOCR
 import fitz
 import asyncio
+from langchain_nomic.embeddings import NomicEmbeddings
 llm_groq = ChatGroq(
             model_name='llama3-70b-8192'
     )
 # Initialize anonymizer
 anonymizer = PresidioReversibleAnonymizer(analyzed_fields=['PERSON', 'EMAIL_ADDRESS', 'PHONE_NUMBER', 'IBAN_CODE', 'CREDIT_CARD', 'CRYPTO', 'IP_ADDRESS', 'LOCATION', 'DATE_TIME', 'NRP', 'MEDICAL_LICENSE', 'URL', 'US_BANK_NUMBER', 'US_DRIVER_LICENSE', 'US_ITIN', 'US_PASSPORT', 'US_SSN'], faker_seed=18)
 def extract_text_from_pdf(file_path):
     pdf = PyPDF2.PdfReader(file_path)
     pdf_text = ""
     # without splitting into chunks
     # {
     # Create a Chroma vector store
+    # embeddings = OllamaEmbeddings(model="nomic-embed-text")
+    embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
     docsearch = await cl.make_async(Chroma.from_texts)(
         [anonymized_text], embeddings, metadatas=[{"source": "0-pl"}]
     )
     # Call the chain with user's message content
     res = await chain.ainvoke(message.content, callbacks=[cb])
     answer = anonymizer.deanonymize(
+        "ok"+res["answer"]
     )
     text_elements = []
     # Return results
     await cl.Message(content=answer, elements=text_elements).send()