all_the_food_public

Sleeping

File size: 1,900 Bytes

001fdb5
 
30a523b
001fdb5
 
61ad695
001fdb5
dc52b39
001fdb5
 
 
 
 
30a523b
61ad695
30a523b
 
 
 
001fdb5
61ad695
001fdb5
 
 
 
70ff032
001fdb5
a2ee5aa
 
2cbfdf4
70ff032
 
 
 
 
001fdb5
30a523b
61ad695
f70cd59
ad1662f
f70cd59
 
 
35ed3c5
f70cd59
 
 
001fdb5

import chromadb
from sentence_transformers import CrossEncoder, SentenceTransformer
import json

def chroma_client_setup():
    print("Setup client")
    chroma_client = chromadb.Client()
    collection = chroma_client.create_collection(
        name="food_collection",
        metadata={"hnsw:space": "cosine"} # l2 is the default
    )
    return collection

def load_data():
    print("load data")
    with open("test_json.json", "r") as f:
        data = json.load(f)
    return data

def embedding_function(items_to_embed: list[str]):
    print("embedding")
    sentence_model = SentenceTransformer(
        "mixedbread-ai/mxbai-embed-large-v1"
    )
    embedded_items = sentence_model.encode(
        items_to_embed
    )
    print(len(embedded_items))
    print(type(embedded_items[0]))
    print(type(embedded_items[0][0]))
    embedded_list = [item.tolist() for item in embedded_items]
    print(len(embedded_list))
    print(type(embedded_list[0]))
    print(type(embedded_list[0][0]))
    return embedded_list

def chroma_upserting(collection, payload:list[dict]):
    print('upserting')
    print("printing item:")
    embedding = embedding_function([item['doc'] for item in payload])
    print(type(embedding))
    collection.add(
        documents=[item['doc'] for item in payload],
        embeddings=embedding,
        #metadatas=item,
        ids=[f"id_{idx}" for idx, _ in enumerate(payload)]
        )

def search_chroma(collection, query:str):
    results = collection.query(
        query_embeddings=embedding_function([query]),
        n_results=5
    )
    return results

def reranking_results(query: str, top_k_results: list[str]):
    # Load the model, here we use our base sized model
    rerank_model = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")
    reranked_results = rerank_model.rank(query, top_k_results, return_documents=True)
    return reranked_results