Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -15,9 +15,8 @@ import pdfkit
|
|
15 |
from paddleocr import PaddleOCR
|
16 |
import fitz
|
17 |
import asyncio
|
18 |
-
|
19 |
|
20 |
-
# initialise LLM model
|
21 |
llm_groq = ChatGroq(
|
22 |
model_name='llama3-70b-8192'
|
23 |
)
|
@@ -25,9 +24,6 @@ llm_groq = ChatGroq(
|
|
25 |
# Initialize anonymizer
|
26 |
anonymizer = PresidioReversibleAnonymizer(analyzed_fields=['PERSON', 'EMAIL_ADDRESS', 'PHONE_NUMBER', 'IBAN_CODE', 'CREDIT_CARD', 'CRYPTO', 'IP_ADDRESS', 'LOCATION', 'DATE_TIME', 'NRP', 'MEDICAL_LICENSE', 'URL', 'US_BANK_NUMBER', 'US_DRIVER_LICENSE', 'US_ITIN', 'US_PASSPORT', 'US_SSN'], faker_seed=18)
|
27 |
|
28 |
-
# initalise nomic embedding model
|
29 |
-
# embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
|
30 |
-
|
31 |
def extract_text_from_pdf(file_path):
|
32 |
pdf = PyPDF2.PdfReader(file_path)
|
33 |
pdf_text = ""
|
@@ -148,7 +144,10 @@ async def on_chat_start():
|
|
148 |
# without splitting into chunks
|
149 |
# {
|
150 |
# Create a Chroma vector store
|
151 |
-
|
|
|
|
|
|
|
152 |
docsearch = await cl.make_async(Chroma.from_texts)(
|
153 |
[anonymized_text], embeddings, metadatas=[{"source": "0-pl"}]
|
154 |
)
|
@@ -192,10 +191,9 @@ async def main(message: cl.Message):
|
|
192 |
# Call the chain with user's message content
|
193 |
res = await chain.ainvoke(message.content, callbacks=[cb])
|
194 |
answer = anonymizer.deanonymize(
|
195 |
-
res["answer"]
|
196 |
)
|
197 |
text_elements = []
|
198 |
|
199 |
# Return results
|
200 |
await cl.Message(content=answer, elements=text_elements).send()
|
201 |
-
|
|
|
15 |
from paddleocr import PaddleOCR
|
16 |
import fitz
|
17 |
import asyncio
|
18 |
+
from langchain_nomic.embeddings import NomicEmbeddings
|
19 |
|
|
|
20 |
llm_groq = ChatGroq(
|
21 |
model_name='llama3-70b-8192'
|
22 |
)
|
|
|
24 |
# Initialize anonymizer
|
25 |
anonymizer = PresidioReversibleAnonymizer(analyzed_fields=['PERSON', 'EMAIL_ADDRESS', 'PHONE_NUMBER', 'IBAN_CODE', 'CREDIT_CARD', 'CRYPTO', 'IP_ADDRESS', 'LOCATION', 'DATE_TIME', 'NRP', 'MEDICAL_LICENSE', 'URL', 'US_BANK_NUMBER', 'US_DRIVER_LICENSE', 'US_ITIN', 'US_PASSPORT', 'US_SSN'], faker_seed=18)
|
26 |
|
|
|
|
|
|
|
27 |
def extract_text_from_pdf(file_path):
|
28 |
pdf = PyPDF2.PdfReader(file_path)
|
29 |
pdf_text = ""
|
|
|
144 |
# without splitting into chunks
|
145 |
# {
|
146 |
# Create a Chroma vector store
|
147 |
+
|
148 |
+
# embeddings = OllamaEmbeddings(model="nomic-embed-text")
|
149 |
+
embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
|
150 |
+
|
151 |
docsearch = await cl.make_async(Chroma.from_texts)(
|
152 |
[anonymized_text], embeddings, metadatas=[{"source": "0-pl"}]
|
153 |
)
|
|
|
191 |
# Call the chain with user's message content
|
192 |
res = await chain.ainvoke(message.content, callbacks=[cb])
|
193 |
answer = anonymizer.deanonymize(
|
194 |
+
"ok"+res["answer"]
|
195 |
)
|
196 |
text_elements = []
|
197 |
|
198 |
# Return results
|
199 |
await cl.Message(content=answer, elements=text_elements).send()
|
|