Mikeplockhart commited on
Commit
30a523b
1 Parent(s): 909ab2a

Update utils.py

Browse files

Loading json data

Files changed (1) hide show
  1. utils.py +8 -2
utils.py CHANGED
@@ -1,5 +1,6 @@
1
  import chromadb
2
  from sentence_transformers import CrossEncoder, SentenceTransformer
 
3
 
4
  def chroma_client_setup():
5
  chroma_client = chromadb.Client()
@@ -9,6 +10,11 @@ def chroma_client_setup():
9
  )
10
  return collection
11
 
 
 
 
 
 
12
  def embedding_function(items_to_embed: list[str]):
13
  sentence_model = SentenceTransformer(
14
  "mixedbread-ai/mxbai-embed-large-v1"
@@ -19,10 +25,10 @@ def embedding_function(items_to_embed: list[str]):
19
  )
20
  return embedded_items
21
 
22
- def chroma_upserting(collection, embeddings:list[list[str]], payload:list[dict]):
23
  collection.add(
24
  documents=[item['doc'] for item in payload],
25
- embeddings=embeddings,
26
  metadatas=payload,
27
  ids=[f"id{item}" for item in range(len(embedfings))]
28
  )
 
1
  import chromadb
2
  from sentence_transformers import CrossEncoder, SentenceTransformer
3
+ import json
4
 
5
  def chroma_client_setup():
6
  chroma_client = chromadb.Client()
 
10
  )
11
  return collection
12
 
13
+ def load_data():
14
+ with open("test_json.json", "r") as f:
15
+ data = json.load(f)
16
+ return data
17
+
18
  def embedding_function(items_to_embed: list[str]):
19
  sentence_model = SentenceTransformer(
20
  "mixedbread-ai/mxbai-embed-large-v1"
 
25
  )
26
  return embedded_items
27
 
28
+ def chroma_upserting(collection, payload:list[dict]):
29
  collection.add(
30
  documents=[item['doc'] for item in payload],
31
+ embeddings=embedding_function([item['doc'] for item in payload]),
32
  metadatas=payload,
33
  ids=[f"id{item}" for item in range(len(embedfings))]
34
  )