Spaces:
Running
Running
FoodDesert
commited on
Commit
•
1e4bd6c
1
Parent(s):
90290aa
Upload app.py
Browse files
app.py
CHANGED
@@ -113,20 +113,17 @@ def extract_tags(tree):
|
|
113 |
|
114 |
|
115 |
# Load the model and data once at startup
|
116 |
-
with h5py.File('
|
|
|
117 |
vectorizer_bytes = f['vectorizer'][()].tobytes()
|
118 |
# Use io.BytesIO to convert bytes back to a file-like object for joblib to load
|
119 |
vectorizer_buffer = BytesIO(vectorizer_bytes)
|
120 |
vectorizer = load(vectorizer_buffer)
|
121 |
|
122 |
-
#
|
123 |
-
|
124 |
-
|
125 |
-
X_artist_reduced = f['X_artist_reduced'][:]
|
126 |
artist_names = [name.decode() for name in f['artist_names'][:]]
|
127 |
-
# Recreate PCA transformation (not the exact PCA object but its transformation ability)
|
128 |
-
def pca_transform(X):
|
129 |
-
return (X - pca_mean) @ pca_components.T
|
130 |
|
131 |
|
132 |
with h5py.File('conditional_tag_probabilities_matrix.h5', 'r') as f:
|
@@ -288,8 +285,8 @@ def find_similar_artists(new_tags_string, top_n, similarity_weight):
|
|
288 |
###unseen_tags = list(set(OrderedDict.fromkeys(new_image_tags)) - set(vectorizer.vocabulary_.keys())) #We may want this line again later. These are the tags that were not used to calculate the artists list.
|
289 |
unseen_tags_data = find_similar_tags(new_image_tags, similarity_weight)
|
290 |
|
291 |
-
|
292 |
-
similarities = cosine_similarity(
|
293 |
|
294 |
top_artist_indices = np.argsort(similarities)[-top_n:][::-1]
|
295 |
top_artists = [(artist_names[i], similarities[i]) for i in top_artist_indices]
|
|
|
113 |
|
114 |
|
115 |
# Load the model and data once at startup
|
116 |
+
with h5py.File('complete_artist_data.hdf5', 'r') as f:
|
117 |
+
# Deserialize the vectorizer
|
118 |
vectorizer_bytes = f['vectorizer'][()].tobytes()
|
119 |
# Use io.BytesIO to convert bytes back to a file-like object for joblib to load
|
120 |
vectorizer_buffer = BytesIO(vectorizer_bytes)
|
121 |
vectorizer = load(vectorizer_buffer)
|
122 |
|
123 |
+
# Load X_artist
|
124 |
+
X_artist = f['X_artist'][:]
|
125 |
+
# Load artist names and decode to strings
|
|
|
126 |
artist_names = [name.decode() for name in f['artist_names'][:]]
|
|
|
|
|
|
|
127 |
|
128 |
|
129 |
with h5py.File('conditional_tag_probabilities_matrix.h5', 'r') as f:
|
|
|
285 |
###unseen_tags = list(set(OrderedDict.fromkeys(new_image_tags)) - set(vectorizer.vocabulary_.keys())) #We may want this line again later. These are the tags that were not used to calculate the artists list.
|
286 |
unseen_tags_data = find_similar_tags(new_image_tags, similarity_weight)
|
287 |
|
288 |
+
X_new_image = vectorizer.transform([','.join(new_image_tags)])
|
289 |
+
similarities = cosine_similarity(X_new_image, X_artist)[0]
|
290 |
|
291 |
top_artist_indices = np.argsort(similarities)[-top_n:][::-1]
|
292 |
top_artists = [(artist_names[i], similarities[i]) for i in top_artist_indices]
|