File size: 1,413 Bytes
c49578b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fb4c0c
c49578b
 
 
7fb4c0c
 
 
c49578b
 
 
 
7fb4c0c
ff9b8ad
c49578b
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import gradio as gr
import torch
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import semantic_search

title_dataset = load_dataset("pyimagesearch/blog-title", data_files="bp-title.csv")

title_embeddings = load_dataset("pyimagesearch/blog-title", data_files="embeddings.csv")
title_embeddings = torch.from_numpy(title_embeddings["train"].to_pandas().to_numpy()).to(torch.float)

model = SentenceTransformer("paraphrase-MiniLM-L6-v2")

title="Title Semantic Search"
description="Provide a blog post title, and we'll find the most similar titles from our already written blog posts."

examples=[
    "Introduction to Keras",
    "Conditional GANs with Keras",
    "A Gentle Introduction to PyTorch with Deep Learning",
]

def get_titles(query):
    query_embed = model.encode(query)
    hits = semantic_search(query_embed, title_embeddings, top_k=5)[0]
    titles = dict()
    for hit in hits:
        index = hit["corpus_id"]
        selected_title = title_dataset["train"]["title"][index]
        score = hit["score"]
        titles[selected_title] = score
    return titles

space = gr.Interface(
    fn=get_titles,
    inputs=gr.Textbox(label="Input Title"),
    # outputs=gr.Textbox(label="Similar Titles"),
    outputs=gr.Label(num_top_classes=5),
    title=title,
    description=description,
    examples=examples,
)

space.launch()