import logging from pathlib import Path import time import lancedb from sentence_transformers import SentenceTransformer # Setting up the logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Start the timer for loading the QdrantDocumentStore start_time = time.perf_counter() proj_dir = Path(__file__).parents[1] # Log the time taken to load the QdrantDocumentStore db = lancedb.connect(proj_dir/"lancedb") tbl = db.open_table('arabic-wiki') lancedb_loading_time = time.perf_counter() - start_time logger.info(f"Time taken to load LanceDB: {lancedb_loading_time:.6f} seconds") # Start the timer for loading the EmbeddingRetriever start_time = time.perf_counter() name="sentence-transformers/paraphrase-multilingual-minilm-l12-v2" st_model = SentenceTransformer(name, device='cuda') # used for both training and querying def embed_func(query): return st_model.encode(query) def vector_search(query_vector, top_k): return tbl.search(query_vector).limit(top_k).to_list() def retriever(query, top_k=3): query_vector = embed_func(query) documents = vector_search(query_vector, top_k) return documents # Log the time taken to load the EmbeddingRetriever retriever_loading_time = time.perf_counter() - start_time logger.info(f"Time taken to load EmbeddingRetriever: {retriever_loading_time:.6f} seconds")