tree3po commited on
Commit
733b136
·
verified ·
1 Parent(s): 1b2534d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -5
app.py CHANGED
@@ -17,17 +17,17 @@ token=""
17
  repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
18
  emb = "sentence-transformers/all-mpnet-base-v2"
19
  hf = HuggingFaceEmbeddings(model_name=emb)
20
- #db = Chroma()
21
  #db.persist()
22
  # Load the document, split it into chunks, embed each chunk and load it into the vector store.
23
  #raw_documents = TextLoader('state_of_the_union.txt').load()
24
  def embed_fn(inp):
25
- db=Chroma()
26
  text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=10)
27
  documents = text_splitter.split_text(inp)
28
  out_emb= hf.embed_documents(documents)
29
  string_representation = dumps(out_emb, pretty=True)
30
- db.from_texts(documents,persist_directory=f"{cwd}/chroma_langchain_db",embedding_function=HuggingFaceEmbeddings(model_name=emb))
31
 
32
  def proc_doc(doc_in):
33
  for doc in doc_in:
@@ -59,11 +59,10 @@ def read_pdf(pdf_path):
59
  text = f'{text}\n{page.extract_text()}'
60
  return text
61
  def run_llm(input_text,history):
62
- db=Chroma()
63
  MAX_TOKENS=20000
64
  try:
65
  qur= hf.embed_query(input_text)
66
- docs = db.similarity_search_by_vector(qur, k=3,persist_directory=f"{cwd}/chroma_langchain_db",embedding_function=HuggingFaceEmbeddings(model_name=emb))
67
 
68
  print(docs)
69
  except Exception as e:
 
17
  repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
18
  emb = "sentence-transformers/all-mpnet-base-v2"
19
  hf = HuggingFaceEmbeddings(model_name=emb)
20
+ db = Chroma(persist_directory=f"{cwd}/chroma_langchain_db",embedding_function=HuggingFaceEmbeddings(model_name=emb))
21
  #db.persist()
22
  # Load the document, split it into chunks, embed each chunk and load it into the vector store.
23
  #raw_documents = TextLoader('state_of_the_union.txt').load()
24
  def embed_fn(inp):
25
+ #db=Chroma()
26
  text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=10)
27
  documents = text_splitter.split_text(inp)
28
  out_emb= hf.embed_documents(documents)
29
  string_representation = dumps(out_emb, pretty=True)
30
+ db.from_texts(documents)
31
 
32
  def proc_doc(doc_in):
33
  for doc in doc_in:
 
59
  text = f'{text}\n{page.extract_text()}'
60
  return text
61
  def run_llm(input_text,history):
 
62
  MAX_TOKENS=20000
63
  try:
64
  qur= hf.embed_query(input_text)
65
+ docs = db.similarity_search_by_vector(qur, k=3)
66
 
67
  print(docs)
68
  except Exception as e: