import gradio as gr import os import time from sentence_transformers import SentenceTransformer from qdrant_client import QdrantClient, models qdrant = QdrantClient( url=os.environ['QDRANT_URL'], api_key=os.environ['QDRANT_API_KEY'], ) encoder = SentenceTransformer(model_name_or_path='BAAI/bge-small-en-v1.5') def compute_embedding(sentences, emb_model): return emb_model.encode(sentences=sentences) def quantized_vector_search(embedding, top_k=5): return qdrant.search( collection_name='questions-binaryq', query_vector=embedding, limit=top_k, with_payload=True, search_params=models.SearchParams( exact=False, quantization=models.QuantizationSearchParams( ignore=False, rescore=True, oversampling=2.0, ) ) ) def query(question, top_k=5): collections_names = list(map(lambda x: x.name, qdrant.get_collections().collections)) if 'questions-binaryq' not in collections_names: return {}, {} start_time = time.time() emb = compute_embedding(question, encoder) encoding_time = time.time() - start_time start_time = time.time() bq_results = quantized_vector_search(emb, top_k) bq_query_time = time.time() - start_time bq_results_dict = {} for bq_result in bq_results: bq_results_dict[bq_result.payload['question']] = round(bq_result.score, 3) return bq_results_dict, { "Encoding Time": str(round(encoding_time, 3)) + " s", "Query Time (w/ Binary Quantization)": str(round(bq_query_time, 3)) + " s", } with gr.Blocks() as semantic_search_demo: gr.Markdown( """ # Quora Similar Questions Finder using Semantic Search 🔍 Welcome to the Quora Similar Questions Finder, a tool designed to enhance search experience. This space leverages advanced machine learning techniques to find the most relevant questions from {} questions based on your input. ## Features - **Dataset**: Utilizes the Quora duplicate questions dataset from Hugging Face `datasets`, ensuring a wide coverage of topics and queries. - **Advanced NLP Model**: Employs SentenceTransformer's [`BAAI/bge-small-en-v1.5`](https://huggingface.co/BAAI/bge-small-en-v1.5) model to create embeddings for each unique question. This model supports a maximum sequence length of 512 and provides an embedding dimension of 384, allowing for semantic understanding of questions. - **Efficient Storage**: Embeddings are efficiently stored in a vector index on `Qdrant` cloud, uploaded in batches of size 200. ## How to Use 1. **Enter a Question**: Simply type in your question. 2. **Select K Value (1-8)**: Choose how many similar questions you want to find. 3. **Find Similar Questions**: Hit the button and the system will generate an embedding for your input question. It then performs a cosine distance query to the Qdrant cloud to retrieve the most similar questions based on your specified K value. Experience the power of semantic search and find the answers you need more efficiently. """.format(qdrant.get_collection('questions-binaryq').vectors_count) ) with gr.Row(): with gr.Column(scale=4): bq_results = gr.Label( label="Most similar questions w/ Binary Quantization", value={} ) with gr.Column(scale=1): input_question = gr.Textbox( label="Question", placeholder="Enter your question here" ) top_k_slider = gr.Slider( value=3, minimum=1, maximum=30, label="Top K", interactive=True, step=1 ) button = gr.Button("Find similar questions") json = gr.JSON() gr.Examples( examples=[["Obama", 5]], inputs=[input_question, top_k_slider] ) button.click( fn=query, inputs=[ input_question, top_k_slider ], outputs=[ bq_results, json ] ) semantic_search_demo.launch(share=True)