dammy commited on
Commit
93544b0
·
1 Parent(s): a969a6a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -18
app.py CHANGED
@@ -45,31 +45,29 @@ def local_query(query, context):
45
  def run_query(btn, history, query):
46
 
47
 
 
48
 
49
-
50
- # file_name = btn.name
51
-
52
- # loader = PDFMinerLoader(file_name)
53
- # doc = loader.load()
54
 
55
- # text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
56
- # texts = text_splitter.split_documents(doc)
57
 
58
- # texts = [i.page_content for i in texts]
59
 
60
- # doc_emb = st_model.encode(texts)
61
- # doc_emb = doc_emb.tolist()
62
 
63
- # ids = [str(uuid.uuid1()) for _ in doc_emb]
64
 
65
- # client = chromadb.Client()
66
- # collection = client.create_collection("test_db")
67
 
68
- # collection.add(
69
- # embeddings=doc_emb,
70
- # documents=texts,
71
- # ids=ids
72
- # )
73
 
74
 
75
 
 
45
  def run_query(btn, history, query):
46
 
47
 
48
+ file_name = btn.name
49
 
50
+ loader = PDFMinerLoader(file_name)
51
+ doc = loader.load()
 
 
 
52
 
53
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
54
+ texts = text_splitter.split_documents(doc)
55
 
56
+ texts = [i.page_content for i in texts]
57
 
58
+ doc_emb = st_model.encode(texts)
59
+ doc_emb = doc_emb.tolist()
60
 
61
+ ids = [str(uuid.uuid1()) for _ in doc_emb]
62
 
63
+ client = chromadb.Client()
64
+ collection = client.create_collection("test_db")
65
 
66
+ collection.add(
67
+ embeddings=doc_emb,
68
+ documents=texts,
69
+ ids=ids
70
+ )
71
 
72
 
73