Update app.py
Browse files
app.py
CHANGED
@@ -17,17 +17,17 @@ token=""
|
|
17 |
repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
18 |
emb = "sentence-transformers/all-mpnet-base-v2"
|
19 |
hf = HuggingFaceEmbeddings(model_name=emb)
|
20 |
-
|
21 |
#db.persist()
|
22 |
# Load the document, split it into chunks, embed each chunk and load it into the vector store.
|
23 |
#raw_documents = TextLoader('state_of_the_union.txt').load()
|
24 |
def embed_fn(inp):
|
25 |
-
db=Chroma()
|
26 |
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=10)
|
27 |
documents = text_splitter.split_text(inp)
|
28 |
out_emb= hf.embed_documents(documents)
|
29 |
string_representation = dumps(out_emb, pretty=True)
|
30 |
-
db.from_texts(documents
|
31 |
|
32 |
def proc_doc(doc_in):
|
33 |
for doc in doc_in:
|
@@ -59,11 +59,10 @@ def read_pdf(pdf_path):
|
|
59 |
text = f'{text}\n{page.extract_text()}'
|
60 |
return text
|
61 |
def run_llm(input_text,history):
|
62 |
-
db=Chroma()
|
63 |
MAX_TOKENS=20000
|
64 |
try:
|
65 |
qur= hf.embed_query(input_text)
|
66 |
-
docs = db.similarity_search_by_vector(qur, k=3
|
67 |
|
68 |
print(docs)
|
69 |
except Exception as e:
|
|
|
17 |
repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
18 |
emb = "sentence-transformers/all-mpnet-base-v2"
|
19 |
hf = HuggingFaceEmbeddings(model_name=emb)
|
20 |
+
db = Chroma(persist_directory=f"{cwd}/chroma_langchain_db",embedding_function=HuggingFaceEmbeddings(model_name=emb))
|
21 |
#db.persist()
|
22 |
# Load the document, split it into chunks, embed each chunk and load it into the vector store.
|
23 |
#raw_documents = TextLoader('state_of_the_union.txt').load()
|
24 |
def embed_fn(inp):
|
25 |
+
#db=Chroma()
|
26 |
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=10)
|
27 |
documents = text_splitter.split_text(inp)
|
28 |
out_emb= hf.embed_documents(documents)
|
29 |
string_representation = dumps(out_emb, pretty=True)
|
30 |
+
db.from_texts(documents)
|
31 |
|
32 |
def proc_doc(doc_in):
|
33 |
for doc in doc_in:
|
|
|
59 |
text = f'{text}\n{page.extract_text()}'
|
60 |
return text
|
61 |
def run_llm(input_text,history):
|
|
|
62 |
MAX_TOKENS=20000
|
63 |
try:
|
64 |
qur= hf.embed_query(input_text)
|
65 |
+
docs = db.similarity_search_by_vector(qur, k=3)
|
66 |
|
67 |
print(docs)
|
68 |
except Exception as e:
|