Spaces:
Running
Running
FoodDesert
commited on
Upload 2 files
Browse files- .gitattributes +1 -0
- app.py +107 -22
- wiki_pages-2023-08-08.csv +3 -0
.gitattributes
CHANGED
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
artist_data.hdf5 filter=lfs diff=lfs merge=lfs -text
|
37 |
complete_artist_data.hdf5 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
artist_data.hdf5 filter=lfs diff=lfs merge=lfs -text
|
37 |
complete_artist_data.hdf5 filter=lfs diff=lfs merge=lfs -text
|
38 |
+
wiki_pages-2023-08-08.csv filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -22,10 +22,14 @@ faq_content="""
|
|
22 |
|
23 |
## What is the purpose of this tool?
|
24 |
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
29 |
|
30 |
## Does input order matter?
|
31 |
|
@@ -33,7 +37,7 @@ No
|
|
33 |
|
34 |
## Should I use underscores or spaces in the input tags?
|
35 |
|
36 |
-
|
37 |
|
38 |
## Can I use parentheses or weights as in the Stable Diffusion Automatic1111 WebUI?
|
39 |
|
@@ -46,6 +50,10 @@ An example that illustrates acceptable parentheses and weight formatting is:
|
|
46 |
Some data is excluded from consideration if it did not occur frequently enough in the sample from which the application makes its calculations.
|
47 |
If an artist or tag is too infrequent, we might not think we have enough data to make predictions about it.
|
48 |
|
|
|
|
|
|
|
|
|
49 |
## Are there any special tags?
|
50 |
|
51 |
Yes. We normalized the favorite counts of each image to a range of 0-9, with 0 being the lowest favcount, and 9 being the highest.
|
@@ -188,6 +196,63 @@ def build_aliases_dict(filename, reverse=False):
|
|
188 |
else:
|
189 |
aliases_dict[tag] = alias_list
|
190 |
return aliases_dict
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
|
192 |
|
193 |
#Imagine we are adding smoothing_value to the number of times word_j occurs in each document for smoothing.
|
@@ -234,9 +299,32 @@ def geometric_mean_given_words(target_word, context_words, co_occurrence_matrix,
|
|
234 |
|
235 |
return geometric_mean
|
236 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
237 |
|
238 |
-
def find_similar_tags(test_tags, similarity_weight, allow_nsfw_tags):
|
239 |
|
|
|
240 |
#Initialize stuff
|
241 |
if not hasattr(find_similar_tags, "fasttext_small_model"):
|
242 |
find_similar_tags.fasttext_small_model = compress_fasttext.models.CompressedFastTextKeyedVectors.load('e621FastTextModel010Replacement_small.bin')
|
@@ -245,11 +333,15 @@ def find_similar_tags(test_tags, similarity_weight, allow_nsfw_tags):
|
|
245 |
find_similar_tags.tag2aliases = build_aliases_dict(tag_aliases_file)
|
246 |
if not hasattr(find_similar_tags, "alias2tags"):
|
247 |
find_similar_tags.alias2tags = build_aliases_dict(tag_aliases_file, reverse=True)
|
|
|
|
|
|
|
|
|
248 |
|
249 |
transformed_tags = [tag.replace(' ', '_') for tag in test_tags]
|
250 |
|
251 |
-
# Find similar tags and prepare data for
|
252 |
-
|
253 |
for tag in test_tags:
|
254 |
if tag in special_tags:
|
255 |
continue
|
@@ -287,22 +379,15 @@ def find_similar_tags(test_tags, similarity_weight, allow_nsfw_tags):
|
|
287 |
geometric_mean = geometric_mean_given_words(word.replace(' ','_'), [context_tag for context_tag in transformed_tags if context_tag != word and context_tag != tag], conditional_co_occurrence_matrix, conditional_vocabulary, conditional_doc_count, smoothing_value=conditional_smoothing)
|
288 |
adjusted_score = (similarity_weight * geometric_mean) + ((1-similarity_weight)*score) # Apply the adjustment function
|
289 |
result[i] = (word, adjusted_score) # Update the tuple with the adjusted score
|
|
|
290 |
|
291 |
-
# Append tag and formatted similar tags to results_data
|
292 |
result = sorted(result, key=lambda x: x[1], reverse=True)[:10]
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
first_entry_for_tag = False
|
298 |
-
else:
|
299 |
-
results_data.append(["", word, sim])
|
300 |
-
results_data.append(["", "", ""]) # Adds a blank line after each group of tags
|
301 |
-
|
302 |
-
if not results_data:
|
303 |
-
results_data.append(["No Unknown Tags Found", "", ""])
|
304 |
|
305 |
-
return
|
306 |
|
307 |
def find_similar_artists(new_tags_string, top_n, similarity_weight, allow_nsfw_tags):
|
308 |
try:
|
@@ -341,7 +426,7 @@ iface = gr.Interface(
|
|
341 |
gr.Checkbox(label="Allow NSFW Tags", value=False)
|
342 |
],
|
343 |
outputs=[
|
344 |
-
gr.
|
345 |
gr.Textbox(label="Top Artists", info="These are the artists most strongly associated with your tags. The number in parenthes is a similarity score between 0 and 1, with higher numbers indicating greater similarity."),
|
346 |
gr.Textbox(label="Dynamic Prompts Format", info="For if you're using the Automatic1111 webui (https://github.com/AUTOMATIC1111/stable-diffusion-webui) with the Dynamic Prompts extension activated (https://github.com/adieyal/sd-dynamic-prompts) and want to try them all individually.")
|
347 |
],
|
|
|
22 |
|
23 |
## What is the purpose of this tool?
|
24 |
|
25 |
+
Since Stable Diffusion's initial release in 2022, users have developed a myriad of fine-tuned text to image models, each with unique "linguistic" preferences depending on the data from which it was fine-tuned.
|
26 |
+
Some models react best when prompted with verbose scene descriptions akin to DALL-E, while others fine-tuned on images scraped from popular image boards understand those boards' tag sets.
|
27 |
+
This tool serves as a linguistic bridge to the e621 image board tag lexicon, on which many popular models such as Fluffyrock, Fluffusion, and Pony Diffusion v6 were trained.
|
28 |
+
|
29 |
+
When you enter a txt2img prompt and press the "submit" button, the Tagset Completer parses your prompt and checks that all your tags are valid e621 tags.
|
30 |
+
If it finds any that are not, it recommends some valid e621 tags you can use to replace them in the "Unseen Tags" table.
|
31 |
+
Additionally, in the "Top Artists" text box, it lists the artists who would most likely draw an image having the set of tags you provided.
|
32 |
+
This is useful to align your prompt with the expected input to an e621-trained model.
|
33 |
|
34 |
## Does input order matter?
|
35 |
|
|
|
37 |
|
38 |
## Should I use underscores or spaces in the input tags?
|
39 |
|
40 |
+
As a rule, e621-trained models replace underscores in tags with spaces, so spaces are preferred.
|
41 |
|
42 |
## Can I use parentheses or weights as in the Stable Diffusion Automatic1111 WebUI?
|
43 |
|
|
|
50 |
Some data is excluded from consideration if it did not occur frequently enough in the sample from which the application makes its calculations.
|
51 |
If an artist or tag is too infrequent, we might not think we have enough data to make predictions about it.
|
52 |
|
53 |
+
## Why do some suggested tags not have summaries or wiki links?
|
54 |
+
|
55 |
+
Both of these features are extracted from the tag wiki pages, but some valid e621 tags do not have wiki pages.
|
56 |
+
|
57 |
## Are there any special tags?
|
58 |
|
59 |
Yes. We normalized the favorite counts of each image to a range of 0-9, with 0 being the lowest favcount, and 9 being the highest.
|
|
|
196 |
else:
|
197 |
aliases_dict[tag] = alias_list
|
198 |
return aliases_dict
|
199 |
+
|
200 |
+
|
201 |
+
def build_tag_count_dict(filename):
|
202 |
+
with open(filename, 'r', newline='', encoding='utf-8') as csvfile:
|
203 |
+
reader = csv.reader(csvfile)
|
204 |
+
result_dict = {}
|
205 |
+
for row in reader:
|
206 |
+
key = row[0]
|
207 |
+
value = int(row[2]) if row[2].isdigit() else None
|
208 |
+
if value is not None:
|
209 |
+
result_dict[key] = value
|
210 |
+
return result_dict
|
211 |
+
|
212 |
+
import csv
|
213 |
+
|
214 |
+
|
215 |
+
def build_tag_id_wiki_dict(filename='wiki_pages-2023-08-08.csv'):
|
216 |
+
"""
|
217 |
+
Reads a CSV file and returns a dictionary mapping tag names to tuples of
|
218 |
+
(number, most relevant line from the wiki entry). Rows with a non-integer in the first column are ignored.
|
219 |
+
The most relevant line is the first line that does not start with "thumb" and is not blank.
|
220 |
+
|
221 |
+
Parameters:
|
222 |
+
- filename: The path to the CSV file.
|
223 |
+
|
224 |
+
Returns:
|
225 |
+
- A dictionary where each key is a tag name and each value is a tuple (number, most relevant wiki entry line).
|
226 |
+
"""
|
227 |
+
tag_data = {}
|
228 |
+
with open(filename, 'r', encoding='utf-8') as csvfile:
|
229 |
+
reader = csv.reader(csvfile)
|
230 |
+
|
231 |
+
# Skip the header row
|
232 |
+
next(reader)
|
233 |
+
|
234 |
+
for row in reader:
|
235 |
+
try:
|
236 |
+
# Attempt to convert the first column to an integer
|
237 |
+
number = int(row[0])
|
238 |
+
except ValueError:
|
239 |
+
# If conversion fails, skip this row
|
240 |
+
continue
|
241 |
+
|
242 |
+
tag = row[3]
|
243 |
+
wiki_entry_full = row[4]
|
244 |
+
|
245 |
+
# Process the wiki_entry to find the most relevant line
|
246 |
+
relevant_line = ''
|
247 |
+
for line in wiki_entry_full.split('\n'):
|
248 |
+
if line.strip() and not line.startswith("thumb"):
|
249 |
+
relevant_line = line
|
250 |
+
break
|
251 |
+
|
252 |
+
# Map the tag to a tuple of (number, relevant_line)
|
253 |
+
tag_data[tag] = (number, relevant_line)
|
254 |
+
|
255 |
+
return tag_data
|
256 |
|
257 |
|
258 |
#Imagine we are adding smoothing_value to the number of times word_j occurs in each document for smoothing.
|
|
|
299 |
|
300 |
return geometric_mean
|
301 |
|
302 |
+
|
303 |
+
def create_html_tables_for_tags(tag, result, tag2count, tag2idwiki):
|
304 |
+
# Wrap the tag part in a <span> with styles for bold and larger font
|
305 |
+
html_str = f"<div style='display: inline-block; margin: 20px; vertical-align: top;'><table><thead><tr><th colspan='3' style='text-align: center; padding-bottom: 10px;'>Unknown Tag: <span style='font-weight: bold; font-size: 20px;'>{tag}</span></th></tr></thead><tbody><tr style='border-bottom: 1px solid #000;'><th>Corrected Tag</th><th>Similarity</th><th>Count</th></tr>"
|
306 |
+
# Loop through the results and add table rows for each
|
307 |
+
for word, sim in result:
|
308 |
+
word_with_underscores = word.replace(' ', '_')
|
309 |
+
count = tag2count.get(word_with_underscores, 0) # Get the count if available, otherwise default to 0
|
310 |
+
tag_id, wiki_entry = tag2idwiki.get(word_with_underscores, (None, ''))
|
311 |
+
# Check if tag_id and wiki_entry are valid
|
312 |
+
if tag_id is not None and wiki_entry:
|
313 |
+
# Construct the URL for the tag's wiki page
|
314 |
+
wiki_url = f"https://e621.net/wiki_pages/{tag_id}"
|
315 |
+
# Make the tag a hyperlink with a tooltip
|
316 |
+
tag_element = f"<a href='{wiki_url}' target='_blank' title='{wiki_entry}'>{word}</a>"
|
317 |
+
else:
|
318 |
+
# Display the word without any hyperlink or tooltip
|
319 |
+
tag_element = word
|
320 |
+
# Include the tag element in the table row
|
321 |
+
html_str += f"<tr><td style='border: none; padding: 5px; height: 20px;'>{tag_element}</td><td style='border: none; padding: 5px; height: 20px;'>{round(sim, 3)}</td><td style='border: none; padding: 5px; height: 20px;'>{count}</td></tr>"
|
322 |
+
|
323 |
+
html_str += "</tbody></table></div>"
|
324 |
+
return html_str
|
325 |
|
|
|
326 |
|
327 |
+
def find_similar_tags(test_tags, similarity_weight, allow_nsfw_tags):
|
328 |
#Initialize stuff
|
329 |
if not hasattr(find_similar_tags, "fasttext_small_model"):
|
330 |
find_similar_tags.fasttext_small_model = compress_fasttext.models.CompressedFastTextKeyedVectors.load('e621FastTextModel010Replacement_small.bin')
|
|
|
333 |
find_similar_tags.tag2aliases = build_aliases_dict(tag_aliases_file)
|
334 |
if not hasattr(find_similar_tags, "alias2tags"):
|
335 |
find_similar_tags.alias2tags = build_aliases_dict(tag_aliases_file, reverse=True)
|
336 |
+
if not hasattr(find_similar_tags, "tag2count"):
|
337 |
+
find_similar_tags.tag2count = build_tag_count_dict(tag_aliases_file)
|
338 |
+
if not hasattr(find_similar_tags, "tag2idwiki"):
|
339 |
+
find_similar_tags.tag2idwiki = build_tag_id_wiki_dict()
|
340 |
|
341 |
transformed_tags = [tag.replace(' ', '_') for tag in test_tags]
|
342 |
|
343 |
+
# Find similar tags and prepare data for tables
|
344 |
+
html_content = ""
|
345 |
for tag in test_tags:
|
346 |
if tag in special_tags:
|
347 |
continue
|
|
|
379 |
geometric_mean = geometric_mean_given_words(word.replace(' ','_'), [context_tag for context_tag in transformed_tags if context_tag != word and context_tag != tag], conditional_co_occurrence_matrix, conditional_vocabulary, conditional_doc_count, smoothing_value=conditional_smoothing)
|
380 |
adjusted_score = (similarity_weight * geometric_mean) + ((1-similarity_weight)*score) # Apply the adjustment function
|
381 |
result[i] = (word, adjusted_score) # Update the tuple with the adjusted score
|
382 |
+
#print(word, score, geometric_mean, adjusted_score)
|
383 |
|
|
|
384 |
result = sorted(result, key=lambda x: x[1], reverse=True)[:10]
|
385 |
+
html_content += create_html_tables_for_tags(tag, result, find_similar_tags.tag2count, find_similar_tags.tag2idwiki)
|
386 |
+
# If no tags were processed, add a message
|
387 |
+
if not html_content:
|
388 |
+
html_content = "<p>No Unknown Tags Found</p>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
389 |
|
390 |
+
return html_content # Return list of lists for Dataframe
|
391 |
|
392 |
def find_similar_artists(new_tags_string, top_n, similarity_weight, allow_nsfw_tags):
|
393 |
try:
|
|
|
426 |
gr.Checkbox(label="Allow NSFW Tags", value=False)
|
427 |
],
|
428 |
outputs=[
|
429 |
+
gr.HTML(label="Unseen Tags"),
|
430 |
gr.Textbox(label="Top Artists", info="These are the artists most strongly associated with your tags. The number in parenthes is a similarity score between 0 and 1, with higher numbers indicating greater similarity."),
|
431 |
gr.Textbox(label="Dynamic Prompts Format", info="For if you're using the Automatic1111 webui (https://github.com/AUTOMATIC1111/stable-diffusion-webui) with the Dynamic Prompts extension activated (https://github.com/adieyal/sd-dynamic-prompts) and want to try them all individually.")
|
432 |
],
|
wiki_pages-2023-08-08.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d453c0cc8ae09c548e554ceb77b1c1578c277eb2c5a6278a85f89c73566a7b27
|
3 |
+
size 30986436
|