Spaces:
Running
Running
import gradio as gr | |
from sklearn.metrics.pairwise import cosine_similarity | |
import numpy as np | |
from joblib import load | |
import h5py | |
# Load the model and data once at startup | |
with h5py.File('complete_artist_data.hdf5', 'r') as f: | |
# Deserialize the vectorizer | |
vectorizer_bytes = f['vectorizer'][()].tobytes() | |
vectorizer_buffer = BytesIO(vectorizer_bytes) | |
vectorizer = load(vectorizer_buffer) | |
# Load X_artist | |
X_artist = f['X_artist'][:] | |
# Load artist names and decode to strings | |
artist_names = [name.decode() for name in f['artist_names'][:]] | |
def find_similar_artists(new_tags_string): | |
new_image_tags = [tag.strip() for tag in new_tags_string.split(",")] | |
unseen_tags = set(new_image_tags) - set(vectorizer.vocabulary_.keys()) | |
unseen_tags_str = f'Unseen Tags: {", ".join(unseen_tags)}' if unseen_tags else 'No unseen tags.' | |
X_new_image = vectorizer.transform([','.join(new_image_tags)]) | |
similarities = cosine_similarity(X_new_image, X_artist)[0] | |
top_n = 20 | |
top_artist_indices = np.argsort(similarities)[-top_n:][::-1] | |
bottom_artist_indices = np.argsort(similarities)[:top_n] | |
top_artists = [(artist_names[i], similarities[i]) for i in top_artist_indices] | |
bottom_artists = [(artist_names[i], similarities[i]) for i in bottom_artist_indices] | |
top_artists_str = "\n".join([f"{rank+1}. {artist} - similarity score: {score:.4f}" for rank, (artist, score) in enumerate(top_artists)]) | |
bottom_artists_str = "\n".join([f"{rank+1}. {artist} - similarity score: {score:.4f}" for rank, (artist, score) in enumerate(bottom_artists)]) | |
output_str = f"{unseen_tags_str}\n\nTop 10 artists:\n{top_artists_str}\n\nBottom 10 artists:\n{bottom_artists_str}" | |
return output_str | |
iface = gr.Interface( | |
fn=find_similar_artists, | |
inputs="text", | |
outputs="text", | |
title="Artist Similarity Finder", | |
description="Enter image tags to find similar artists based on learned similarities." | |
) | |
iface.launch() | |