File size: 2,170 Bytes
a509ff9
 
 
 
 
a33aa9b
 
a509ff9
 
7dc19f7
a509ff9
 
 
 
 
 
 
 
 
 
 
5792300
a509ff9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5792300
 
 
 
a509ff9
5792300
 
a509ff9
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import gradio as gr
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from joblib import load
import h5py
from io import BytesIO


# Load the model and data once at startup
with h5py.File('complete_artist_data.hdf5', 'r') as f:
    # Deserialize the vectorizer
    vectorizer_bytes = f['vectorizer'][()].tobytes()
    vectorizer_buffer = BytesIO(vectorizer_bytes)
    vectorizer = load(vectorizer_buffer)
    
    # Load X_artist
    X_artist = f['X_artist'][:]
    
    # Load artist names and decode to strings
    artist_names = [name.decode() for name in f['artist_names'][:]]

def find_similar_artists(new_tags_string, top_n):
    new_image_tags = [tag.strip() for tag in new_tags_string.split(",")]
    unseen_tags = set(new_image_tags) - set(vectorizer.vocabulary_.keys())
    unseen_tags_str = f'Unseen Tags: {", ".join(unseen_tags)}' if unseen_tags else 'No unseen tags.'
    
    X_new_image = vectorizer.transform([','.join(new_image_tags)])
    similarities = cosine_similarity(X_new_image, X_artist)[0]
    
    top_artist_indices = np.argsort(similarities)[-top_n:][::-1]
    bottom_artist_indices = np.argsort(similarities)[:top_n]
    
    top_artists = [(artist_names[i], similarities[i]) for i in top_artist_indices]
    bottom_artists = [(artist_names[i], similarities[i]) for i in bottom_artist_indices]
    
    top_artists_str = "\n".join([f"{rank+1}. {artist} - similarity score: {score:.4f}" for rank, (artist, score) in enumerate(top_artists)])
    bottom_artists_str = "\n".join([f"{rank+1}. {artist} - similarity score: {score:.4f}" for rank, (artist, score) in enumerate(bottom_artists)])
    
    output_str = f"{unseen_tags_str}\n\nTop 10 artists:\n{top_artists_str}\n\nBottom 10 artists:\n{bottom_artists_str}"
    return output_str

iface = gr.Interface(
    fn=find_similar_artists,
    inputs=[
        gr.Textbox(label="Enter image tags", placeholder="fox, outside, detailed background"),
        gr.Slider(minimum=1, maximum=100, default=10, step=1, label="Number of artists")
    ],
    outputs="text",
    title="Tagset Completer",
    description="Enter a list of comma-separated e6 tags"
)

iface.launch()