yalaa's picture
Update app.py
eef425b verified
raw
history blame
No virus
3.96 kB
import gradio as gr
import os
import time
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient, models
qdrant = QdrantClient(
url=os.environ['QDRANT_URL'],
api_key=os.environ['QDRANT_API_KEY'],
)
encoder = SentenceTransformer(model_name_or_path='BAAI/bge-small-en-v1.5')
def compute_embedding(sentences, emb_model):
return emb_model.encode(sentences=sentences)
def quantized_vector_search(embedding, top_k=5):
return qdrant.search(
collection_name='questions-binaryq',
query_vector=embedding,
limit=top_k,
with_payload=True,
search_params=models.SearchParams(
exact=False,
quantization=models.QuantizationSearchParams(
ignore=False,
rescore=True,
oversampling=2.0,
)
)
)
def query(question, top_k=5):
collections_names = list(map(lambda x: x.name, qdrant.get_collections().collections))
if 'questions-binaryq' not in collections_names:
return {}, {}
start_time = time.time()
emb = compute_embedding(question, encoder)
encoding_time = time.time() - start_time
start_time = time.time()
bq_results = quantized_vector_search(emb, top_k)
bq_query_time = time.time() - start_time
bq_results_dict = {}
for bq_result in bq_results:
bq_results_dict[bq_result.payload['question']] = round(bq_result.score, 3)
return bq_results_dict, {
"Encoding Time": str(round(encoding_time, 3)) + " s",
"Query Time (w/ Binary Quantization)": str(round(bq_query_time, 3)) + " s",
}
with gr.Blocks() as semantic_search_demo:
gr.Markdown(
"""
# Quora Similar Questions Finder using Semantic Search πŸ”
Welcome to the Quora Similar Questions Finder, a tool designed to enhance search experience.
This space leverages advanced machine learning techniques to find the most relevant questions from {} questions based on your input.
## Features
- **Dataset**: Utilizes the Quora duplicate questions dataset from Hugging Face `datasets`, ensuring a wide coverage of topics and queries.
- **Advanced NLP Model**: Employs SentenceTransformer's [`BAAI/bge-small-en-v1.5`](https://huggingface.co/BAAI/bge-small-en-v1.5) model to create embeddings for each unique question. This model supports a maximum sequence length of 512 and provides an embedding dimension of 384, allowing for semantic understanding of questions.
- **Efficient Storage**: Embeddings are efficiently stored in a vector index on `Qdrant` cloud, uploaded in batches of size 200.
## How to Use
1. **Enter a Question**: Simply type in your question.
2. **Select K Value (1-8)**: Choose how many similar questions you want to find.
3. **Find Similar Questions**: Hit the button and the system will generate an embedding for your input question. It then performs a cosine distance query to the Qdrant cloud to retrieve the most similar questions based on your specified K value.
Experience the power of semantic search and find the answers you need more efficiently.
""".format(qdrant.get_collection('questions-binaryq').vectors_count)
)
with gr.Row():
with gr.Column(scale=4):
bq_results = gr.Label(
label="Most similar questions w/ Binary Quantization",
value={}
)
with gr.Column(scale=1):
input_question = gr.Textbox(
label="Question",
placeholder="Enter your question here"
)
top_k_slider = gr.Slider(
value=3,
minimum=1,
maximum=30,
label="Top K",
interactive=True,
step=1
)
button = gr.Button("Find similar questions")
json = gr.JSON()
gr.Examples(
examples=[["Obama", 5]],
inputs=[input_question, top_k_slider]
)
button.click(
fn=query,
inputs=[
input_question,
top_k_slider
],
outputs=[
bq_results,
json
]
)
semantic_search_demo.launch(share=True)