Spaces:
Running
Running
FoodDesert
commited on
Commit
•
22f7149
1
Parent(s):
3f3bfef
Upload app.py
Browse files
app.py
CHANGED
@@ -101,10 +101,6 @@ plain: /([^,\\\[\]():|]|\\.)+/
|
|
101 |
# Initialize the parser
|
102 |
parser = Lark(grammar, start='start')
|
103 |
|
104 |
-
|
105 |
-
special_tags = ["score:0", "score:1", "score:2", "score:3", "score:4", "score:5", "score:6", "score:7", "score:8", "score:9"]
|
106 |
-
|
107 |
-
|
108 |
# Function to extract tags
|
109 |
def extract_tags(tree):
|
110 |
tags = []
|
@@ -118,6 +114,14 @@ def extract_tags(tree):
|
|
118 |
_traverse(tree)
|
119 |
return tags
|
120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
|
122 |
# Load the model and data once at startup
|
123 |
with h5py.File('complete_artist_data.hdf5', 'r') as f:
|
@@ -283,6 +287,8 @@ def find_similar_tags(test_tags, similarity_weight):
|
|
283 |
def find_similar_artists(new_tags_string, top_n, similarity_weight):
|
284 |
try:
|
285 |
new_tags_string = new_tags_string.lower()
|
|
|
|
|
286 |
# Parse the prompt
|
287 |
parsed = parser.parse(new_tags_string)
|
288 |
# Extract tags from the parsed tree
|
@@ -292,7 +298,7 @@ def find_similar_artists(new_tags_string, top_n, similarity_weight):
|
|
292 |
###unseen_tags = list(set(OrderedDict.fromkeys(new_image_tags)) - set(vectorizer.vocabulary_.keys())) #We may want this line again later. These are the tags that were not used to calculate the artists list.
|
293 |
unseen_tags_data = find_similar_tags(new_image_tags, similarity_weight)
|
294 |
|
295 |
-
X_new_image = vectorizer.transform([','.join(new_image_tags)])
|
296 |
similarities = cosine_similarity(X_new_image, X_artist)[0]
|
297 |
|
298 |
top_artist_indices = np.argsort(similarities)[-top_n:][::-1]
|
|
|
101 |
# Initialize the parser
|
102 |
parser = Lark(grammar, start='start')
|
103 |
|
|
|
|
|
|
|
|
|
104 |
# Function to extract tags
|
105 |
def extract_tags(tree):
|
106 |
tags = []
|
|
|
114 |
_traverse(tree)
|
115 |
return tags
|
116 |
|
117 |
+
|
118 |
+
special_tags = ["score:0", "score:1", "score:2", "score:3", "score:4", "score:5", "score:6", "score:7", "score:8", "score:9"]
|
119 |
+
def remove_special_tags(original_string):
|
120 |
+
tags = [tag.strip() for tag in original_string.split(",")]
|
121 |
+
remaining_tags = [tag for tag in tags if tag not in special_tags]
|
122 |
+
removed_tags = [tag for tag in tags if tag in special_tags]
|
123 |
+
return ", ".join(remaining_tags), removed_tags
|
124 |
+
|
125 |
|
126 |
# Load the model and data once at startup
|
127 |
with h5py.File('complete_artist_data.hdf5', 'r') as f:
|
|
|
287 |
def find_similar_artists(new_tags_string, top_n, similarity_weight):
|
288 |
try:
|
289 |
new_tags_string = new_tags_string.lower()
|
290 |
+
new_tags_string, removed_tags = remove_special_tags(new_tags_string)
|
291 |
+
|
292 |
# Parse the prompt
|
293 |
parsed = parser.parse(new_tags_string)
|
294 |
# Extract tags from the parsed tree
|
|
|
298 |
###unseen_tags = list(set(OrderedDict.fromkeys(new_image_tags)) - set(vectorizer.vocabulary_.keys())) #We may want this line again later. These are the tags that were not used to calculate the artists list.
|
299 |
unseen_tags_data = find_similar_tags(new_image_tags, similarity_weight)
|
300 |
|
301 |
+
X_new_image = vectorizer.transform([','.join(new_image_tags + removed_tags)])
|
302 |
similarities = cosine_similarity(X_new_image, X_artist)[0]
|
303 |
|
304 |
top_artist_indices = np.argsort(similarities)[-top_n:][::-1]
|