Mikeplockhart commited on
Commit
001fdb5
1 Parent(s): 26645e1

Create utils.py

Browse files

Simple utils add red on iPad

Files changed (1) hide show
  1. utils.py +42 -0
utils.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chromadb
2
+ from sentence_transformers import CrossEncoder, SentenceTransformer
3
+
4
+ def chroma_client_setup():
5
+ chroma_client = chromadb.Client()
6
+ collection = client.create_collection(
7
+ name="food_collection",
8
+ metadata={"hnsw:space": "cosine"} # l2 is the default
9
+ )
10
+ return collection
11
+
12
+ def embedding_function(items_to_embed: list[str]):
13
+ sentence_model = SentenceTransformer(
14
+ "mixedbread-ai/mxbai-embed-large-v1"
15
+ )
16
+ embedded_items = sentence_model.encode(
17
+ items_to_embed,
18
+ show_progress_bar=True
19
+ )
20
+ return embedded_items
21
+
22
+ def chroma_upserting(collection, embeddings:list[list[str]], payload:list[dict]):
23
+ collection.add(
24
+ documents=[item['doc'] for item in payload],
25
+ embeddings=embeddings,
26
+ metadatas=payload,
27
+ ids=[f"id{item}" for item in range(len(embedfings))]
28
+ )
29
+
30
+ def search_chroma(collection, query:str):
31
+ results = collection.query(
32
+ query_embeddings=embedding_function([query]),
33
+ n_results=5
34
+ )
35
+ return results
36
+
37
+ def reranking_results(query: str, top_k_results: list[str]):
38
+ # Load the model, here we use our base sized model
39
+ rerank_model = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")
40
+ reranked_results = rerank_model.rank(query, top_k_results, return_documents=True)
41
+ return reranked_results
42
+