NCTCMumbai's picture
Update backend/semantic_search.py
4400012
raw
history blame
1.05 kB
import logging
import lancedb
import os
from pathlib import Path
from sentence_transformers import SentenceTransformer
#from FlagEmbedding import LLMEmbedder, FlagReranker # Al document present here https://github.com/FlagOpen/FlagEmbedding/tree/master
#EMB_MODEL_NAME = "thenlper/gte-base"
EMB_MODEL_NAME = 'BAAI/llm-embedder'
task = "qa" # Encode for a specific task (qa, icl, chat, lrlm, tool, convsearch)
#EMB_MODEL_NAME = LLMEmbedder('BAAI/llm-embedder', use_fp16=False) # Load model (automatically use GPUs)
#reranker_model = FlagReranker('BAAI/bge-reranker-base', use_fp16=True) # use_fp16 speeds up computation with a slight performance degradation
#EMB_MODEL_NAME = "thenlper/gte-base"
#DB_TABLE_NAME = "Huggingface_docs"
DB_TABLE_NAME = "doc_embed1"
# Setting up the logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
retriever = SentenceTransformer(EMB_MODEL_NAME)
# db
db_uri = os.path.join(Path(__file__).parents[1], ".lancedb1")
db = lancedb.connect(db_uri)
table = db.open_table(DB_TABLE_NAME)