Spaces:

KAZEKOI
/

123123

Sleeping

App Files Files Community

KAZEKOI commited on Nov 30, 2024

Commit

7bb21b9

verified ·

1 Parent(s): 0abc030

Upload 2 files

Browse files

Files changed (2) hide show

app.py +98 -0
caption_index.parquet +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import gradio as gr
+import numpy as np
+import pandas as pd
+from concurrent.futures import ThreadPoolExecutor
+import os
+kaomojis = [
+    "0_0",
+    "(o)_(o)",
+    "+_+",
+    "+_-",
+    "._.",
+    "<o>_<o>",
+    "<|>_<|>",
+    "=_=",
+    ">_<",
+    "3_3",
+    "6_9",
+    ">_o",
+    "@_@",
+    "^_^",
+    "o_o",
+    "u_u",
+    "x_x",
+    "|_|",
+    "||_||",
+]
+index_file = './caption_index.parquet'
+df = pd.read_parquet(index_file)
+def process_input(user_input):
+    user_tags = set(tag.replace(' ', '_') for tag in user_input.split(', '))
+    def match_tags(caption, tags):
+        caption_set = set(caption.split(', '))
+        return tags.issubset(caption_set)
+    def process_chunk(chunk):
+        chunk = chunk.copy()
+        chunk['match'] = chunk.index.to_series().apply(lambda x: match_tags(x, user_tags))
+        return chunk[chunk['match']]
+    chunk_size = 100000
+    chunks = [df.iloc[i:i + chunk_size] for i in range(0, df.shape[0], chunk_size)]
+    with ThreadPoolExecutor(max_workers=8) as executor:
+        results = executor.map(process_chunk, chunks)
+    filtered_df = pd.concat(results)
+    def calculate_weight(score):
+        try:
+            weight = float(score) - 5
+            return max(weight, 0)
+        except ValueError:
+            return 0
+    filtered_df['weight'] = filtered_df['score'].apply(calculate_weight)
+    random_seed = np.random.randint(0, 1000000)
+    np.random.seed(random_seed)
+    sample_size = min(5, len(filtered_df))
+    if sample_size > 0:
+        weights = filtered_df['weight'].to_numpy()
+        weights /= weights.sum()
+        sampled_indices = np.random.choice(filtered_df.index, size=sample_size, p=weights, replace=False)
+        sampled_df = filtered_df.loc[sampled_indices]
+    else:
+        sampled_df = filtered_df
+    output = []
+    for index, row in sampled_df.iterrows():
+        tags = index.split(', ')
+        processed_tags = [tag.replace('_', ' ') if tag not in kaomojis else tag for tag in tags]
+        processed_tags = [tag.replace("(", "\(").replace(")", "\)") for tag in processed_tags]
+        processed_caption = ', '.join(processed_tags)
+        row['name'] = row['name'].replace('danbooru_', 'https://danbooru.donmai.us/posts/')
+        output.append(f"<a href='{row['name']}' target='_blank'>{row['name']}</a>: {processed_caption}<br>")
+    return ''.join(output), len(filtered_df)
+iface = gr.Interface(
+    fn=process_input,
+    inputs=gr.Textbox(label="Input tags separated by ', '"),
+    outputs=[
+        gr.HTML(),
+        gr.Number(label="Matched Images Count")
+    ],
+    title="Prompt Sampling",
+    flagging_mode='never'
+)
+iface.launch()

caption_index.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f6ce8a0c716655604d747131a76984a35dc6f15487e038242d640367a8df66db
+size 86522444