import lancedb import gradio as gr from sentence_transformers import SentenceTransformer from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch import os db = lancedb.connect(".lancedb") TABLE = db.open_table(os.getenv("TABLE_NAME")) VECTOR_COLUMN = os.getenv("VECTOR_COLUMN", "vector") TEXT_COLUMN = os.getenv("TEXT_COLUMN", "text") BATCH_SIZE = int(os.getenv("BATCH_SIZE", 32)) CROSS_ENCODER = os.getenv("CROSS_ENCODER") retriever = SentenceTransformer(os.getenv("EMB_MODEL")) cross_encoder = AutoModelForSequenceClassification.from_pretrained(CROSS_ENCODER) cross_encoder.eval() cross_encoder_tokenizer = AutoTokenizer.from_pretrained(CROSS_ENCODER) def reranking(query, list_of_documents, k): received_tokens = cross_encoder_tokenizer([query] * len(list_of_documents), list_of_documents, padding=True, truncation=True, return_tensors="pt") with torch.no_grad(): logits_on_tokens = cross_encoder(**received_tokens).logits probabilities = logits_on_tokens.reshape(-1).tolist() documents = sorted(zip(list_of_documents, probabilities), key=lambda x: x[1], reverse=True) result = [document[0] for document in documents[:k]] return result def retrieve(query, top_k_retriever=30, use_reranking=True, top_k_reranker=5): query_vec = retriever.encode(query) try: documents = TABLE.search(query_vec, vector_column_name=VECTOR_COLUMN).limit(top_k_retriever).to_list() documents = [doc[TEXT_COLUMN] for doc in documents] if use_reranking: documents = reranking(query, documents, top_k_reranker) return documents except Exception as e: raise gr.Error(str(e))