Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -37,19 +37,20 @@ def load_doc(list_file_path, chunk_size, chunk_overlap):
|
|
37 |
pages.extend(loader.load())
|
38 |
text_splitter = RecursiveCharacterTextSplitter(
|
39 |
chunk_size=chunk_size,
|
40 |
-
chunk_overlap=chunk_overlap
|
41 |
-
)
|
42 |
doc_splits = text_splitter.split_documents(pages)
|
43 |
return doc_splits
|
44 |
|
|
|
45 |
def create_db(splits, collection_name):
|
46 |
-
|
|
|
47 |
new_client = chromadb.EphemeralClient()
|
48 |
vectordb = Chroma.from_documents(
|
49 |
documents=splits,
|
50 |
embedding=embedding,
|
51 |
client=new_client,
|
52 |
-
collection_name=collection_name
|
53 |
)
|
54 |
return vectordb
|
55 |
|
|
|
37 |
pages.extend(loader.load())
|
38 |
text_splitter = RecursiveCharacterTextSplitter(
|
39 |
chunk_size=chunk_size,
|
40 |
+
chunk_overlap=chunk_overlap)
|
|
|
41 |
doc_splits = text_splitter.split_documents(pages)
|
42 |
return doc_splits
|
43 |
|
44 |
+
# Create vector database
|
45 |
def create_db(splits, collection_name):
|
46 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
47 |
+
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", device=device)
|
48 |
new_client = chromadb.EphemeralClient()
|
49 |
vectordb = Chroma.from_documents(
|
50 |
documents=splits,
|
51 |
embedding=embedding,
|
52 |
client=new_client,
|
53 |
+
collection_name=collection_name,
|
54 |
)
|
55 |
return vectordb
|
56 |
|