|
import gradio as gr |
|
|
|
|
|
|
|
from sentence_transformers import SentenceTransformer |
|
import numpy as np |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
from datasets import load_dataset |
|
|
|
embedding_model = SentenceTransformer("thenlper/gte-large") |
|
|
|
|
|
dataset = load_dataset("hugginglearners/netflix-shows") |
|
|
|
|
|
def combine_description_title_and_genre(description, listed_in, title): |
|
return f"{description} Genre: {listed_in} Title: {title}" |
|
|
|
|
|
def get_embedding(text): |
|
return embedding_model.encode(text) |
|
|
|
|
|
def vector_search(query): |
|
query_embedding = get_embedding(query) |
|
|
|
|
|
embeddings = np.array([get_embedding(combine_description_title_and_genre(item["description"], item["listed_in"],item["title"])) for item in dataset]) |
|
|
|
|
|
similarities = cosine_similarity([query_embedding], embeddings) |
|
|
|
|
|
ratings = np.array([item["rating"] for item in dataset]) |
|
adjusted_similarities = similarities * ratings.reshape(-1, 1) |
|
|
|
|
|
top_n = 3 |
|
top_indices = adjusted_similarities[0].argsort()[-top_n:][::-1] |
|
top_items = [dataset[i] for i in top_indices] |
|
|
|
|
|
search_result = "" |
|
for item in top_items: |
|
search_result += f"Title: {item['title']}, Description: {item['description']}, Genre: {item['listed_in']}, Rating: {item['rating']}\n" |
|
|
|
return search_result |
|
|
|
|
|
def movie_search(query): |
|
return vector_search(query) |
|
|
|
iface = gr.Interface(fn=movie_search, |
|
inputs="text", |
|
outputs="text", |
|
live=True, |
|
title="Netflix Recommendation System", |
|
description="Enter a query to get Netflix recommendations based on description and genre.") |
|
|
|
iface.launch() |
|
|
|
|
|
|
|
|
|
|