Spaces:

SmilingWolf
/

danbooru2022_image_similarity

Running

App Files Files Community

SmilingWolf commited on Aug 21

Commit

03d2c4c

•

1 Parent(s): 8a0e72f

Update index to danbooru dataset v3

Browse files

Also change model to wd-swinv2-tagger-v3

Files changed (6) hide show

Utils/dbimutils.py +0 -54
app.py +29 -103
index/cosine_ids.npy +2 -2
index/cosine_infos.json +1 -1
index/cosine_knn.index +2 -2
requirements.txt +2 -2

Utils/dbimutils.py DELETED Viewed

@@ -1,54 +0,0 @@
-# DanBooru IMage Utility functions
-import cv2
-import numpy as np
-from PIL import Image
-def smart_imread(img, flag=cv2.IMREAD_UNCHANGED):
-    if img.endswith(".gif"):
-        img = Image.open(img)
-        img = img.convert("RGB")
-        img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
-    else:
-        img = cv2.imread(img, flag)
-    return img
-def smart_24bit(img):
-    if img.dtype is np.dtype(np.uint16):
-        img = (img / 257).astype(np.uint8)
-    if len(img.shape) == 2:
-        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-    elif img.shape[2] == 4:
-        trans_mask = img[:, :, 3] == 0
-        img[trans_mask] = [255, 255, 255, 255]
-        img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
-    return img
-def make_square(img, target_size):
-    old_size = img.shape[:2]
-    desired_size = max(old_size)
-    desired_size = max(desired_size, target_size)
-    delta_w = desired_size - old_size[1]
-    delta_h = desired_size - old_size[0]
-    top, bottom = delta_h // 2, delta_h - (delta_h // 2)
-    left, right = delta_w // 2, delta_w - (delta_w // 2)
-    color = [255, 255, 255]
-    new_im = cv2.copyMakeBorder(
-        img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color
-    )
-    return new_im
-def smart_resize(img, size):
-    # Assumes the image has already gone through make_square
-    if img.shape[0] > size:
-        img = cv2.resize(img, (size, size), interpolation=cv2.INTER_AREA)
-    elif img.shape[0] < size:
-        img = cv2.resize(img, (size, size), interpolation=cv2.INTER_CUBIC)
-    return img

app.py CHANGED Viewed

@@ -1,32 +1,22 @@
 import argparse
-import functools
 import json
-import os
-from pathlib import Path
 import faiss
 import gradio as gr
 import numpy as np
-import PIL.Image
 import requests
-import tensorflow as tf
-from huggingface_hub import hf_hub_download
-from Utils import dbimutils
 TITLE = "## Danbooru Explorer"
 DESCRIPTION = """
 Image similarity-based retrieval tool using:
-- [SmilingWolf/wd-v1-4-convnext-tagger-v2](https://huggingface.co/SmilingWolf/wd-v1-4-convnext-tagger-v2) as feature extractor
 - [Faiss](https://github.com/facebookresearch/faiss) and [autofaiss](https://github.com/criteo/autofaiss) for indexing
 Also, check out [SmilingWolf/danbooru2022_embeddings_playground](https://huggingface.co/spaces/SmilingWolf/danbooru2022_embeddings_playground) for a similar space with experimental support for text input combined with image input.
 """
-CONV_MODEL_REPO = "SmilingWolf/wd-v1-4-convnext-tagger-v2"
-CONV_MODEL_REVISION = "v2.0"
-CONV_FEXT_LAYER = "predictions_norm"
 def parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser()
@@ -34,39 +24,6 @@ def parse_args() -> argparse.Namespace:
     return parser.parse_args()
-def download_model(model_repo, model_revision):
-    model_files = [
-        {"filename": "saved_model.pb", "subfolder": ""},
-        {"filename": "keras_metadata.pb", "subfolder": ""},
-        {"filename": "variables.index", "subfolder": "variables"},
-        {"filename": "variables.data-00000-of-00001", "subfolder": "variables"},
-    ]
-    model_file_paths = []
-    for elem in model_files:
-        model_file_paths.append(
-            Path(
-                hf_hub_download(
-                    model_repo,
-                    revision=model_revision,
-                    **elem,
-                )
-            )
-        )
-    model_path = model_file_paths[0].parents[0]
-    return model_path
-def load_model(model_repo, model_revision, feature_extraction_layer):
-    model_path = download_model(model_repo, model_revision)
-    full_model = tf.keras.models.load_model(model_path)
-    model = tf.keras.models.Model(
-        full_model.inputs, full_model.get_layer(feature_extraction_layer).output
-    )
-    return model
 def danbooru_id_to_url(image_id, selected_ratings, api_username="", api_key=""):
     headers = {"User-Agent": "image_similarity_tool"}
     ratings_to_letters = {
@@ -93,54 +50,30 @@ def danbooru_id_to_url(image_id, selected_ratings, api_username="", api_key=""):
 class SimilaritySearcher:
-    def __init__(self, model, images_ids):
-        self.knn_index = None
-        self.knn_metric = None
-        self.model = model
-        self.images_ids = images_ids
-    def change_index(self, knn_metric):
-        if knn_metric == self.knn_metric:
-            return
-        if knn_metric == "ip":
-            self.knn_index = faiss.read_index("index/ip_knn.index")
-            config = json.loads(open("index/ip_infos.json").read())["index_param"]
-        elif knn_metric == "cosine":
-            self.knn_index = faiss.read_index("index/cosine_knn.index")
-            config = json.loads(open("index/cosine_infos.json").read())["index_param"]
         faiss.ParameterSpace().set_index_parameters(self.knn_index, config)
-        self.knn_metric = knn_metric
     def predict(
-        self, image, selected_ratings, knn_metric, api_username, api_key, n_neighbours
     ):
-        _, height, width, _ = self.model.inputs[0].shape
-        self.change_index(knn_metric)
-        # Alpha to white
-        image = image.convert("RGBA")
-        new_image = PIL.Image.new("RGBA", image.size, "WHITE")
-        new_image.paste(image, mask=image)
-        image = new_image.convert("RGB")
-        image = np.asarray(image)
-        # PIL RGB to OpenCV BGR
-        image = image[:, :, ::-1]
-        image = dbimutils.make_square(image, height)
-        image = dbimutils.smart_resize(image, height)
-        image = image.astype(np.float32)
-        image = np.expand_dims(image, 0)
-        target = self.model(image).numpy()
-        if self.knn_metric == "cosine":
-            faiss.normalize_L2(target)
-        dists, indexes = self.knn_index.search(target, k=n_neighbours)
         neighbours_ids = self.images_ids[indexes][0]
         neighbours_ids = [int(x) for x in neighbours_ids]
@@ -148,7 +81,10 @@ class SimilaritySearcher:
         image_urls = []
         for image_id, dist in zip(neighbours_ids, dists[0]):
             current_url = danbooru_id_to_url(
-                image_id, selected_ratings, api_username, api_key
             )
             if current_url is not None:
                 image_urls.append(current_url)
@@ -158,17 +94,14 @@ class SimilaritySearcher:
 def main():
     args = parse_args()
-    model = load_model(CONV_MODEL_REPO, CONV_MODEL_REVISION, CONV_FEXT_LAYER)
-    images_ids = np.load("index/cosine_ids.npy")
-    searcher = SimilaritySearcher(model=model, images_ids=images_ids)
     with gr.Blocks() as demo:
         gr.Markdown(TITLE)
         gr.Markdown(DESCRIPTION)
         with gr.Row():
-            input = gr.Image(type="pil", label="Input")
             with gr.Column():
                 with gr.Row():
                     api_username = gr.Textbox(label="Danbooru API Username")
@@ -179,12 +112,6 @@ def main():
                     label="Ratings",
                 )
                 with gr.Row():
-                    selected_metric = gr.Radio(
-                        choices=["cosine"],
-                        value="cosine",
-                        label="Metric selection",
-                        visible=False,
-                    )
                     n_neighbours = gr.Slider(
                         minimum=1,
                         maximum=20,
@@ -198,12 +125,11 @@ def main():
         find_btn.click(
             fn=searcher.predict,
             inputs=[
-                input,
                 selected_ratings,
-                selected_metric,
                 api_username,
                 api_key,
-                n_neighbours,
             ],
             outputs=[similar_images],
         )

 import argparse
 import json
 import faiss
 import gradio as gr
 import numpy as np
 import requests
+from imgutils.tagging import wd14
 TITLE = "## Danbooru Explorer"
 DESCRIPTION = """
 Image similarity-based retrieval tool using:
+- [SmilingWolf/wd-swinv2-tagger-v3](https://huggingface.co/SmilingWolf/wd-swinv2-tagger-v3) as feature extractor
+- [dghs-imgutils](https://github.com/deepghs/imgutils) for feature extraction
 - [Faiss](https://github.com/facebookresearch/faiss) and [autofaiss](https://github.com/criteo/autofaiss) for indexing
 Also, check out [SmilingWolf/danbooru2022_embeddings_playground](https://huggingface.co/spaces/SmilingWolf/danbooru2022_embeddings_playground) for a similar space with experimental support for text input combined with image input.
 """
 def parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser()
     return parser.parse_args()
 def danbooru_id_to_url(image_id, selected_ratings, api_username="", api_key=""):
     headers = {"User-Agent": "image_similarity_tool"}
     ratings_to_letters = {
 class SimilaritySearcher:
+    def __init__(self):
+        self.images_ids = np.load("index/cosine_ids.npy")
+        self.knn_index = faiss.read_index("index/cosine_knn.index")
+        config = json.loads(open("index/cosine_infos.json").read())["index_param"]
         faiss.ParameterSpace().set_index_parameters(self.knn_index, config)
     def predict(
+        self,
+        img_input,
+        selected_ratings,
+        n_neighbours,
+        api_username,
+        api_key,
     ):
+        embeddings = wd14.get_wd14_tags(
+            img_input,
+            model_name="SwinV2_v3",
+            fmt=("embedding"),
+        )
+        embeddings = np.expand_dims(embeddings, 0)
+        faiss.normalize_L2(embeddings)
+        dists, indexes = self.knn_index.search(embeddings, k=n_neighbours)
         neighbours_ids = self.images_ids[indexes][0]
         neighbours_ids = [int(x) for x in neighbours_ids]
         image_urls = []
         for image_id, dist in zip(neighbours_ids, dists[0]):
             current_url = danbooru_id_to_url(
+                image_id,
+                selected_ratings,
+                api_username,
+                api_key,
             )
             if current_url is not None:
                 image_urls.append(current_url)
 def main():
     args = parse_args()
+    searcher = SimilaritySearcher()
     with gr.Blocks() as demo:
         gr.Markdown(TITLE)
         gr.Markdown(DESCRIPTION)
         with gr.Row():
+            img_input = gr.Image(type="pil", label="Input")
             with gr.Column():
                 with gr.Row():
                     api_username = gr.Textbox(label="Danbooru API Username")
                     label="Ratings",
                 )
                 with gr.Row():
                     n_neighbours = gr.Slider(
                         minimum=1,
                         maximum=20,
         find_btn.click(
             fn=searcher.predict,
             inputs=[
+                img_input,
                 selected_ratings,
+                n_neighbours,
                 api_username,
                 api_key,
             ],
             outputs=[similar_images],
         )

index/cosine_ids.npy CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df724519c8c1981e49d80e2430261deb4fb6edf6d9c04e134427879710747394
-size 21830676

 version https://git-lfs.github.com/spec/v1
+oid sha256:36f75b729ccdb6f46abbae84e1587a4e93387846a31f5ecd6ed0523ec731e3be
+size 26567768

index/cosine_infos.json CHANGED Viewed

@@ -1 +1 @@

- {"index_key": "OPQ256_1280,IVF16384_HNSW32,PQ256x8", "index_param": "nprobe=16,efSearch=32,ht=2048", "index_path": "/home/SmilingWolf/eval/index/~~ConvNextBV1_01_14_2023_08h37m46s_cosine_knn~~.index", "size in bytes": ~~1535843672~~, "avg_search_speed_ms": 10.~~164478485783887~~, "99p_search_speed_ms": 12.~~419190758373587~~, "reconstruction error %": 22.~~007358074188232~~, "nb vectors": ~~5457637~~, "vectors dimension": 1024, "compression ratio": 14.~~555180035276402~~}

+ {"index_key": "OPQ256_1280,IVF16384_HNSW32,PQ256x8", "index_param": "nprobe=16,efSearch=32,ht=2048", "index_path": "/home/SmilingWolf/eval/index/swinv2_base_2024_03_13_17h37m11s_cosine_knn.index", "size in bytes": 1848491744, "avg_search_speed_ms": 12.240526417507978, "99p_search_speed_ms": 15.45338472060394, "reconstruction error %": 20.334696769714355, "nb vectors": 6641910, "vectors dimension": 1024, "compression ratio": 14.717546588079324}

index/cosine_knn.index CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3a718ab8370df8b9d84002c55f945ef241e4cc3450d306c2ecd97661f51022ad
-size 1535843672

 version https://git-lfs.github.com/spec/v1
+oid sha256:615ea848c51b153c4481ecfffcde206c56fc607eb88be99b996ab14f413b985a
+size 1848491744

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
 pillow>=9.0.0
-opencv-python
-tensorflow-cpu~=2.15.1
 faiss-cpu

 pillow>=9.0.0
 faiss-cpu
+dghs-imgutils
+onnxruntime