import gradio as gr
import os
import time

from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient, models


qdrant = QdrantClient(
    url=os.environ['QDRANT_URL'],
    api_key=os.environ['QDRANT_API_KEY'],
)
encoder = SentenceTransformer(model_name_or_path='BAAI/bge-small-en-v1.5')


def compute_embedding(sentences, emb_model):
  return emb_model.encode(sentences=sentences)


def quantized_vector_search(embedding, top_k=5):
  return qdrant.search(
    collection_name='questions-binaryq',
    query_vector=embedding,
    limit=top_k,
    with_payload=True,
    search_params=models.SearchParams(
      exact=False,
      quantization=models.QuantizationSearchParams(
        ignore=False,
        rescore=True,
        oversampling=2.0,
      )
    )
  )


def query(question, top_k=5):
  collections_names = list(map(lambda x: x.name, qdrant.get_collections().collections))
  
  if 'questions-binaryq' not in collections_names:
    return {}, {}

  start_time = time.time()
  emb = compute_embedding(question, encoder)
  encoding_time = time.time() - start_time

  start_time = time.time()
  bq_results = quantized_vector_search(emb, top_k)
  bq_query_time = time.time() - start_time

  bq_results_dict = {}
  for bq_result in bq_results:
    bq_results_dict[bq_result.payload['question']] = round(bq_result.score, 3)

  return bq_results_dict, {
    "Encoding Time": str(round(encoding_time, 3)) + " s",
    "Query Time (w/ Binary Quantization)": str(round(bq_query_time, 3)) + " s",
  }


with gr.Blocks() as semantic_search_demo:
  gr.Markdown(
    """
    # Quora Similar Questions Finder using Semantic Search 🔍
    Welcome to the Quora Similar Questions Finder, a tool designed to enhance search experience.
    This space leverages advanced machine learning techniques to find the most relevant questions from {} questions based on your input.
    ## Features
    - **Dataset**: Utilizes the Quora duplicate questions dataset from Hugging Face `datasets`, ensuring a wide coverage of topics and queries.
    - **Advanced NLP Model**: Employs SentenceTransformer's [`BAAI/bge-small-en-v1.5`](https://huggingface.co/BAAI/bge-small-en-v1.5) model to create embeddings for each unique question. This model supports a maximum sequence length of 512 and provides an embedding dimension of 384, allowing for semantic understanding of questions.
    - **Efficient Storage**: Embeddings are efficiently stored in a vector index on `Qdrant` cloud, uploaded in batches of size 200.
    ## How to Use
    1. **Enter a Question**: Simply type in your question.
    2. **Select K Value (1-8)**: Choose how many similar questions you want to find.
    3. **Find Similar Questions**: Hit the button and the system will generate an embedding for your input question. It then performs a cosine distance query to the Qdrant cloud to retrieve the most similar questions based on your specified K value.
    Experience the power of semantic search and find the answers you need more efficiently.
    """.format(qdrant.get_collection('questions-binaryq').vectors_count)
  )

  with gr.Row():

    with gr.Column(scale=4):
      bq_results = gr.Label(
        label="Most similar questions w/ Binary Quantization",
        value={}
      )
    
    with gr.Column(scale=1):
      input_question = gr.Textbox(
        label="Question", 
        placeholder="Enter your question here"
      )
      top_k_slider = gr.Slider(
        value=3, 
        minimum=1, 
        maximum=30, 
        label="Top K", 
        interactive=True, 
        step=1
      )
      button = gr.Button("Find similar questions")
      json = gr.JSON()
      gr.Examples(
        examples=[["Obama", 5]],
        inputs=[input_question, top_k_slider]
      )

  button.click(
    fn=query, 
    inputs=[
      input_question, 
      top_k_slider
    ], 
    outputs=[
      bq_results, 
      json
    ]
  )


semantic_search_demo.launch(share=True)