Spaces:
Sleeping
Sleeping
Mikeplockhart
commited on
Commit
•
30a523b
1
Parent(s):
909ab2a
Update utils.py
Browse filesLoading json data
utils.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import chromadb
|
2 |
from sentence_transformers import CrossEncoder, SentenceTransformer
|
|
|
3 |
|
4 |
def chroma_client_setup():
|
5 |
chroma_client = chromadb.Client()
|
@@ -9,6 +10,11 @@ def chroma_client_setup():
|
|
9 |
)
|
10 |
return collection
|
11 |
|
|
|
|
|
|
|
|
|
|
|
12 |
def embedding_function(items_to_embed: list[str]):
|
13 |
sentence_model = SentenceTransformer(
|
14 |
"mixedbread-ai/mxbai-embed-large-v1"
|
@@ -19,10 +25,10 @@ def embedding_function(items_to_embed: list[str]):
|
|
19 |
)
|
20 |
return embedded_items
|
21 |
|
22 |
-
def chroma_upserting(collection,
|
23 |
collection.add(
|
24 |
documents=[item['doc'] for item in payload],
|
25 |
-
embeddings=
|
26 |
metadatas=payload,
|
27 |
ids=[f"id{item}" for item in range(len(embedfings))]
|
28 |
)
|
|
|
1 |
import chromadb
|
2 |
from sentence_transformers import CrossEncoder, SentenceTransformer
|
3 |
+
import json
|
4 |
|
5 |
def chroma_client_setup():
|
6 |
chroma_client = chromadb.Client()
|
|
|
10 |
)
|
11 |
return collection
|
12 |
|
13 |
+
def load_data():
|
14 |
+
with open("test_json.json", "r") as f:
|
15 |
+
data = json.load(f)
|
16 |
+
return data
|
17 |
+
|
18 |
def embedding_function(items_to_embed: list[str]):
|
19 |
sentence_model = SentenceTransformer(
|
20 |
"mixedbread-ai/mxbai-embed-large-v1"
|
|
|
25 |
)
|
26 |
return embedded_items
|
27 |
|
28 |
+
def chroma_upserting(collection, payload:list[dict]):
|
29 |
collection.add(
|
30 |
documents=[item['doc'] for item in payload],
|
31 |
+
embeddings=embedding_function([item['doc'] for item in payload]),
|
32 |
metadatas=payload,
|
33 |
ids=[f"id{item}" for item in range(len(embedfings))]
|
34 |
)
|