Spaces:
Runtime error
Runtime error
import gradio as gr | |
import os | |
import time | |
from sentence_transformers import SentenceTransformer | |
from qdrant_client import QdrantClient, models | |
qdrant = QdrantClient( | |
url=os.environ['QDRANT_URL'], | |
api_key=os.environ['QDRANT_API_KEY'], | |
) | |
encoder = SentenceTransformer(model_name_or_path='BAAI/bge-small-en-v1.5') | |
def compute_embedding(sentences, emb_model): | |
return emb_model.encode(sentences=sentences) | |
def quantized_vector_search(embedding, top_k=5): | |
return qdrant.search( | |
collection_name='questions-binaryq', | |
query_vector=embedding, | |
limit=top_k, | |
with_payload=True, | |
search_params=models.SearchParams( | |
exact=False, | |
quantization=models.QuantizationSearchParams( | |
ignore=False, | |
rescore=True, | |
oversampling=2.0, | |
) | |
) | |
) | |
def query(question, top_k=5): | |
collections_names = list(map(lambda x: x.name, qdrant.get_collections().collections)) | |
if 'questions-binaryq' not in collections_names: | |
return {}, {} | |
start_time = time.time() | |
emb = compute_embedding(question, encoder) | |
encoding_time = time.time() - start_time | |
start_time = time.time() | |
bq_results = quantized_vector_search(emb, top_k) | |
bq_query_time = time.time() - start_time | |
bq_results_dict = {} | |
for bq_result in bq_results: | |
bq_results_dict[bq_result.payload['question']] = round(bq_result.score, 3) | |
return bq_results_dict, { | |
"Encoding Time": str(round(encoding_time, 3)) + " s", | |
"Query Time (w/ Binary Quantization)": str(round(bq_query_time, 3)) + " s", | |
} | |
with gr.Blocks() as semantic_search_demo: | |
gr.Markdown( | |
""" | |
# Quora Similar Questions Finder using Semantic Search π | |
Welcome to the Quora Similar Questions Finder, a tool designed to enhance search experience. | |
This space leverages advanced machine learning techniques to find the most relevant questions from {} questions based on your input. | |
## Features | |
- **Dataset**: Utilizes the Quora duplicate questions dataset from Hugging Face `datasets`, ensuring a wide coverage of topics and queries. | |
- **Advanced NLP Model**: Employs SentenceTransformer's [`BAAI/bge-small-en-v1.5`](https://huggingface.co/BAAI/bge-small-en-v1.5) model to create embeddings for each unique question. This model supports a maximum sequence length of 512 and provides an embedding dimension of 384, allowing for semantic understanding of questions. | |
- **Efficient Storage**: Embeddings are efficiently stored in a vector index on `Qdrant` cloud, uploaded in batches of size 200. | |
## How to Use | |
1. **Enter a Question**: Simply type in your question. | |
2. **Select K Value (1-8)**: Choose how many similar questions you want to find. | |
3. **Find Similar Questions**: Hit the button and the system will generate an embedding for your input question. It then performs a cosine distance query to the Qdrant cloud to retrieve the most similar questions based on your specified K value. | |
Experience the power of semantic search and find the answers you need more efficiently. | |
""".format(qdrant.get_collection('questions-binaryq').vectors_count) | |
) | |
with gr.Row(): | |
with gr.Column(scale=4): | |
bq_results = gr.Label( | |
label="Most similar questions w/ Binary Quantization", | |
value={} | |
) | |
with gr.Column(scale=1): | |
input_question = gr.Textbox( | |
label="Question", | |
placeholder="Enter your question here" | |
) | |
top_k_slider = gr.Slider( | |
value=3, | |
minimum=1, | |
maximum=30, | |
label="Top K", | |
interactive=True, | |
step=1 | |
) | |
button = gr.Button("Find similar questions") | |
json = gr.JSON() | |
gr.Examples( | |
examples=[["Obama", 5]], | |
inputs=[input_question, top_k_slider] | |
) | |
button.click( | |
fn=query, | |
inputs=[ | |
input_question, | |
top_k_slider | |
], | |
outputs=[ | |
bq_results, | |
json | |
] | |
) | |
semantic_search_demo.launch(share=True) |