danbooru-tags-transformer

Runtime error

App Files Files Community

p1atdev commited on Feb 22

Commit

31d9259

•

1 Parent(s): 6fa0b33

feat: add optimized models, use tokenizer chat template and better ui

Browse files

Files changed (1) hide show

app.py +166 -75

app.py CHANGED Viewed

@@ -4,13 +4,14 @@ import os
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import gradio as gr
 MODEL_NAME = (
     os.environ.get("MODEL_NAME")
     if os.environ.get("MODEL_NAME") is not None
-    else "p1atdev/dart-test-3-sft-1"
 )
 HF_READ_TOKEN = os.environ.get("HF_READ_TOKEN")
@@ -21,16 +22,32 @@ tokenizer = AutoTokenizer.from_pretrained(
     trust_remote_code=True,
     token=HF_READ_TOKEN,
 )
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_NAME,
-    trust_remote_code=True,
-    token=HF_READ_TOKEN,
-)
 try:
-    model = torch.compile(model)
 except:
-    print("torch compile not supported")
 BOS = "<|bos|>"
 EOS = "<|eos|>"
@@ -45,6 +62,11 @@ GENERAL_EOS = "</general>"
 INPUT_END = "<|input_end|>"
 RATING_BOS_ID = tokenizer.convert_tokens_to_ids(RATING_BOS)
 RATING_EOS_ID = tokenizer.convert_tokens_to_ids(RATING_EOS)
 COPYRIGHT_BOS_ID = tokenizer.convert_tokens_to_ids(COPYRIGHT_BOS)
@@ -54,9 +76,6 @@ CHARACTER_EOS_ID = tokenizer.convert_tokens_to_ids(CHARACTER_EOS)
 GENERAL_BOS_ID = tokenizer.convert_tokens_to_ids(GENERAL_BOS)
 GENERAL_EOS_ID = tokenizer.convert_tokens_to_ids(GENERAL_EOS)
-INPUT_END_ID = tokenizer.convert_tokens_to_ids(INPUT_END)
 assert isinstance(RATING_BOS_ID, int)
 assert isinstance(RATING_EOS_ID, int)
 assert isinstance(COPYRIGHT_BOS_ID, int)
@@ -65,7 +84,6 @@ assert isinstance(CHARACTER_BOS_ID, int)
 assert isinstance(CHARACTER_EOS_ID, int)
 assert isinstance(GENERAL_BOS_ID, int)
 assert isinstance(GENERAL_EOS_ID, int)
-assert isinstance(INPUT_END_ID, int)
 SPECIAL_TAGS = [
     BOS,
@@ -79,6 +97,10 @@ SPECIAL_TAGS = [
     GENERAL_BOS,
     GENERAL_EOS,
     INPUT_END,
 ]
 SPECIAL_TAG_IDS = tokenizer.convert_tokens_to_ids(SPECIAL_TAGS)
@@ -95,6 +117,13 @@ RATING_TAGS = {
 }
 RATING_TAG_IDS = {k: tokenizer.convert_tokens_to_ids(v) for k, v in RATING_TAGS.items()}
 def load_tags(path: str | Path):
     if isinstance(path, str):
@@ -115,34 +144,10 @@ PEOPLE_TAG_IDS_LIST = tokenizer.convert_tokens_to_ids(PEOPLE_TAGS_LIST)
 assert isinstance(PEOPLE_TAG_IDS_LIST, list)
-def compose_prompt(
-    rating: str = "rating:sfw, rating:general",
-    copyright: str = "",
-    character: str = "",
-    general: str = "",
-):
-    return "".join(
-        [
-            BOS,
-            RATING_BOS,
-            rating,
-            RATING_EOS,
-            COPYRIGHT_BOS,
-            copyright,
-            COPYRIGHT_EOS,
-            CHARACTER_BOS,
-            character,
-            CHARACTER_EOS,
-            GENERAL_BOS,
-            general,
-            INPUT_END,
-        ]
-    )
 @torch.no_grad()
 def generate(
     input_text: str,
     max_new_tokens: int = 128,
     min_new_tokens: int = 0,
     do_sample: bool = True,
@@ -157,17 +162,17 @@ def generate(
     inputs = tokenizer(
         input_text,
         return_tensors="pt",
-    ).input_ids
     negative_inputs = (
         tokenizer(
             negative_input_text,
             return_tensors="pt",
-        ).input_ids
         if negative_input_text is not None
         else None
     )
-    generated = model.generate(
         inputs,
         max_new_tokens=max_new_tokens,
         min_new_tokens=min_new_tokens,
@@ -270,12 +275,14 @@ def handle_inputs(
     do_cfg: bool = False,
     cfg_scale: float = 1.5,
     negative_tags: str = "",
     max_new_tokens: int = 128,
     min_new_tokens: int = 0,
     temperature: float = 1.0,
     top_p: float = 1.0,
     top_k: int = 20,
     num_beams: int = 1,
 ):
     """
     Returns:
@@ -286,6 +293,9 @@ def handle_inputs(
             input_prompt_raw,
             output_tags_raw,
             elapsed_time,
         ]
     """
@@ -294,18 +304,28 @@ def handle_inputs(
     copyright_tags = ", ".join(copyright_tags_list)
     character_tags = ", ".join(character_tags_list)
-    prompt = compose_prompt(
-        rating=prepare_rating_tags(rating_tags),
-        copyright=copyright_tags,
-        character=character_tags,
-        general=general_tags,
     )
-    negative_prompt = compose_prompt(
-        rating=prepare_rating_tags(rating_tags),
-        copyright="",
-        character="",
-        general=negative_tags,
     )
     bad_words_ids = tokenizer.encode_plus(
@@ -314,6 +334,7 @@ def handle_inputs(
     generated_ids = generate(
         prompt,
         max_new_tokens=max_new_tokens,
         min_new_tokens=min_new_tokens,
         do_sample=True,
@@ -334,6 +355,9 @@ def handle_inputs(
     end_time = time.time()
     elapsed_time = f"Elapsed: {(end_time - start_time) * 1000:.2f} ms"
     return [
         decoded_normal,
         decoded_general_only,
@@ -341,13 +365,44 @@ def handle_inputs(
         prompt,
         decoded_raw,
         elapsed_time,
     ]
 def demo():
     with gr.Blocks() as ui:
         with gr.Row():
             with gr.Column():
                 with gr.Group():
                     rating_dropdown = gr.Dropdown(
                         label="Rating",
@@ -419,26 +474,29 @@ def demo():
                 with gr.Group():
                     general_tags_textbox = gr.Textbox(
-                        label="General tags",
                         placeholder="1girl, ...",
                         lines=4,
                     )
                     ban_tags_textbox = gr.Textbox(
-                        label="Ban tags",
-                        placeholder="",
                         value="",
                         lines=2,
                     )
-                with gr.Accordion(label="Generation config", open=False):
                     with gr.Group():
                         do_cfg_check = gr.Checkbox(
                             label="Do CFG (Classifier Free Guidance)",
                             value=False,
                         )
                         cfg_scale_slider = gr.Slider(
-                            label="Max new tokens",
                             maximum=3.0,
                             minimum=0.1,
                             step=0.1,
@@ -463,6 +521,13 @@ def demo():
                             outputs=[cfg_scale_slider, negative_tags_textbox],
                         )
                     with gr.Group():
                         max_new_tokens_slider = gr.Slider(
                             label="Max new tokens",
@@ -507,27 +572,44 @@ def demo():
                             value=1,
                         )
-                generate_btn = gr.Button("Generate", variant="primary")
             with gr.Column():
-                output_tags_natural = gr.Textbox(
-                    label="Generation result",
-                    # placeholder="tags will be here",
-                    interactive=False,
-                )
-                output_tags_general_only = gr.Textbox(
-                    label="General tags only",
-                    interactive=False,
-                )
-                output_tags_animagine = gr.Textbox(
-                    label="Output tags (AnimagineXL v3 style order)",
-                    # placeholder="tags will be here in Animagine v3 style order",
-                    interactive=False,
-                )
-                elapsed_time_md = gr.Markdown(value="Waiting to generate...")
                 with gr.Accordion(label="Metadata", open=False):
                     input_prompt_raw = gr.Textbox(
@@ -542,6 +624,8 @@ def demo():
                         lines=4,
                     )
             copyright_tags_mode_dropdown.change(
                 on_change_copyright_tags_dropdouwn,
                 inputs=[copyright_tags_mode_dropdown],
@@ -564,12 +648,14 @@ def demo():
                     do_cfg_check,
                     cfg_scale_slider,
                     negative_tags_textbox,
                     max_new_tokens_slider,
                     min_new_tokens_slider,
                     temperature_slider,
                     top_p_slider,
                     top_k_slider,
                     num_beams_slider,
                 ],
                 outputs=[
                     output_tags_natural,
@@ -578,10 +664,15 @@ def demo():
                     input_prompt_raw,
                     output_tags_raw,
                     elapsed_time_md,
                 ],
             )
-    ui.launch()
 if __name__ == "__main__":

 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
+from optimum.onnxruntime import ORTModelForCausalLM
 import gradio as gr
 MODEL_NAME = (
     os.environ.get("MODEL_NAME")
     if os.environ.get("MODEL_NAME") is not None
+    else "p1atdev/dart-v1-sft"
 )
 HF_READ_TOKEN = os.environ.get("HF_READ_TOKEN")
     trust_remote_code=True,
     token=HF_READ_TOKEN,
 )
+model = {
+    "default": AutoModelForCausalLM.from_pretrained(
+        MODEL_NAME,
+        token=HF_READ_TOKEN,
+    ),
+    "ort": ORTModelForCausalLM.from_pretrained(MODEL_NAME),
+    "ort_qantized": ORTModelForCausalLM.from_pretrained(
+        MODEL_NAME, file_name="model_quantized.onnx"
+    ),
+}
+MODEL_BACKEND_MAP = {
+    "Default": "default",
+    "ONNX (normal)": "ort",
+    "ONNX (quantized)": "ort_qantized",
+}
 try:
+    model["default"].to("cuda")
 except:
+    print("No GPU")
+try:
+    model["default"] = torch.compile(model["default"])
+except:
+    print("torch.compile is not supported")
 BOS = "<|bos|>"
 EOS = "<|eos|>"
 INPUT_END = "<|input_end|>"
+LENGTH_VERY_SHORT = "<|very_short|>"
+LENGTH_SHORT = "<|short|>"
+LENGTH_LONG = "<|long|>"
+LENGTH_VERY_LONG = "<|very_long|>"
 RATING_BOS_ID = tokenizer.convert_tokens_to_ids(RATING_BOS)
 RATING_EOS_ID = tokenizer.convert_tokens_to_ids(RATING_EOS)
 COPYRIGHT_BOS_ID = tokenizer.convert_tokens_to_ids(COPYRIGHT_BOS)
 GENERAL_BOS_ID = tokenizer.convert_tokens_to_ids(GENERAL_BOS)
 GENERAL_EOS_ID = tokenizer.convert_tokens_to_ids(GENERAL_EOS)
 assert isinstance(RATING_BOS_ID, int)
 assert isinstance(RATING_EOS_ID, int)
 assert isinstance(COPYRIGHT_BOS_ID, int)
 assert isinstance(CHARACTER_EOS_ID, int)
 assert isinstance(GENERAL_BOS_ID, int)
 assert isinstance(GENERAL_EOS_ID, int)
 SPECIAL_TAGS = [
     BOS,
     GENERAL_BOS,
     GENERAL_EOS,
     INPUT_END,
+    LENGTH_VERY_SHORT,
+    LENGTH_SHORT,
+    LENGTH_LONG,
+    LENGTH_VERY_LONG,
 ]
 SPECIAL_TAG_IDS = tokenizer.convert_tokens_to_ids(SPECIAL_TAGS)
 }
 RATING_TAG_IDS = {k: tokenizer.convert_tokens_to_ids(v) for k, v in RATING_TAGS.items()}
+LENGTH_TAGS = {
+    "very short": LENGTH_VERY_SHORT,
+    "short": LENGTH_SHORT,
+    "long": LENGTH_LONG,
+    "very long": LENGTH_VERY_LONG,
+}
 def load_tags(path: str | Path):
     if isinstance(path, str):
 assert isinstance(PEOPLE_TAG_IDS_LIST, list)
 @torch.no_grad()
 def generate(
     input_text: str,
+    model_backend: str,
     max_new_tokens: int = 128,
     min_new_tokens: int = 0,
     do_sample: bool = True,
     inputs = tokenizer(
         input_text,
         return_tensors="pt",
+    ).input_ids.to(model[MODEL_BACKEND_MAP[model_backend]].device)
     negative_inputs = (
         tokenizer(
             negative_input_text,
             return_tensors="pt",
+        ).input_ids.to(model[MODEL_BACKEND_MAP[model_backend]].device)
         if negative_input_text is not None
         else None
     )
+    generated = model[MODEL_BACKEND_MAP[model_backend]].generate(
         inputs,
         max_new_tokens=max_new_tokens,
         min_new_tokens=min_new_tokens,
     do_cfg: bool = False,
     cfg_scale: float = 1.5,
     negative_tags: str = "",
+    total_token_length: str = "long",
     max_new_tokens: int = 128,
     min_new_tokens: int = 0,
     temperature: float = 1.0,
     top_p: float = 1.0,
     top_k: int = 20,
     num_beams: int = 1,
+    model_backend: str = "ONNX (quantized)",
 ):
     """
     Returns:
             input_prompt_raw,
             output_tags_raw,
             elapsed_time,
+            output_tags_natural_copy_btn,
+            output_tags_general_only_copy_btn,
+            output_tags_animagine_copy_btn
         ]
     """
     copyright_tags = ", ".join(copyright_tags_list)
     character_tags = ", ".join(character_tags_list)
+    token_length_tag = LENGTH_TAGS[total_token_length]
+    prompt: str = tokenizer.apply_chat_template(
+        {  # type: ignore
+            "rating": prepare_rating_tags(rating_tags),
+            "copyright": copyright_tags,
+            "character": character_tags,
+            "general": general_tags,
+            "length": token_length_tag,
+        },
+        tokenize=False,
     )
+    negative_prompt: str = tokenizer.apply_chat_template(
+        {  # type: ignore
+            "rating": prepare_rating_tags(rating_tags),
+            "copyright": "",
+            "character": "",
+            "general": negative_tags,
+            "length": token_length_tag,
+        },
+        tokenize=False,
     )
     bad_words_ids = tokenizer.encode_plus(
     generated_ids = generate(
         prompt,
+        model_backend=model_backend,
         max_new_tokens=max_new_tokens,
         min_new_tokens=min_new_tokens,
         do_sample=True,
     end_time = time.time()
     elapsed_time = f"Elapsed: {(end_time - start_time) * 1000:.2f} ms"
+    # update visibility of buttons
+    set_visible = gr.update(visible=True)
     return [
         decoded_normal,
         decoded_general_only,
         prompt,
         decoded_raw,
         elapsed_time,
+        set_visible,
+        set_visible,
+        set_visible,
     ]
+# ref: https://qiita.com/tregu148/items/fccccbbc47d966dd2fc2
+def copy_text(_text: None):
+    gr.Info("Copied!")
+COPY_ACTION_JS = """\
+(inputs, _outputs) => {
+  // inputs is the string value of the input_text
+  if (inputs.trim() !== "") {
+    navigator.clipboard.writeText(inputs);
+  }
+}"""
 def demo():
     with gr.Blocks() as ui:
+        gr.Markdown(
+            """\
+# Danbooru Tags Transformer Demo """
+        )
         with gr.Row():
             with gr.Column():
+                with gr.Group():
+                    model_backend_radio = gr.Radio(
+                        label="Model backend",
+                        choices=list(MODEL_BACKEND_MAP.keys()),
+                        value="ONNX (quantized)",
+                        interactive=True,
+                    )
                 with gr.Group():
                     rating_dropdown = gr.Dropdown(
                         label="Rating",
                 with gr.Group():
                     general_tags_textbox = gr.Textbox(
+                        label="General tags (the condition to generate tags)",
+                        value="",
                         placeholder="1girl, ...",
                         lines=4,
                     )
                     ban_tags_textbox = gr.Textbox(
+                        label="Ban tags (tags in this field never appear in generation)",
                         value="",
+                        placeholder="official alternate cosutme, english text,...",
                         lines=2,
                     )
+                generate_btn = gr.Button("Generate", variant="primary")
+                with gr.Accordion(label="Generation config (advanced)", open=False):
                     with gr.Group():
                         do_cfg_check = gr.Checkbox(
                             label="Do CFG (Classifier Free Guidance)",
                             value=False,
                         )
                         cfg_scale_slider = gr.Slider(
+                            label="CFG scale",
                             maximum=3.0,
                             minimum=0.1,
                             step=0.1,
                             outputs=[cfg_scale_slider, negative_tags_textbox],
                         )
+                    with gr.Group():
+                        total_token_length_radio = gr.Radio(
+                            label="Total token length",
+                            choices=list(LENGTH_TAGS.keys()),
+                            value="long",
+                        )
                     with gr.Group():
                         max_new_tokens_slider = gr.Slider(
                             label="Max new tokens",
                             value=1,
                         )
             with gr.Column():
+                with gr.Group():
+                    output_tags_natural = gr.Textbox(
+                        label="Generation result",
+                        # placeholder="tags will be here",
+                        interactive=False,
+                    )
+                    output_tags_natural_copy_btn = gr.Button("Copy", visible=False)
+                    output_tags_natural_copy_btn.click(
+                        fn=copy_text,
+                        inputs=[output_tags_natural],
+                        js=COPY_ACTION_JS,
+                    )
+                with gr.Group():
+                    output_tags_general_only = gr.Textbox(
+                        label="General tags only (sorted)",
+                        interactive=False,
+                    )
+                    output_tags_general_only_copy_btn = gr.Button("Copy", visible=False)
+                    output_tags_general_only_copy_btn.click(
+                        fn=copy_text,
+                        inputs=[output_tags_general_only],
+                        js=COPY_ACTION_JS,
+                    )
+                with gr.Group():
+                    output_tags_animagine = gr.Textbox(
+                        label="Output tags (AnimagineXL v3 style order)",
+                        # placeholder="tags will be here in Animagine v3 style order",
+                        interactive=False,
+                    )
+                    output_tags_animagine_copy_btn = gr.Button("Copy", visible=False)
+                    output_tags_animagine_copy_btn.click(
+                        fn=copy_text,
+                        inputs=[output_tags_animagine],
+                        js=COPY_ACTION_JS,
+                    )
                 with gr.Accordion(label="Metadata", open=False):
                     input_prompt_raw = gr.Textbox(
                         lines=4,
                     )
+                elapsed_time_md = gr.Markdown(value="Waiting to generate...")
             copyright_tags_mode_dropdown.change(
                 on_change_copyright_tags_dropdouwn,
                 inputs=[copyright_tags_mode_dropdown],
                     do_cfg_check,
                     cfg_scale_slider,
                     negative_tags_textbox,
+                    total_token_length_radio,
                     max_new_tokens_slider,
                     min_new_tokens_slider,
                     temperature_slider,
                     top_p_slider,
                     top_k_slider,
                     num_beams_slider,
+                    model_backend_radio,
                 ],
                 outputs=[
                     output_tags_natural,
                     input_prompt_raw,
                     output_tags_raw,
                     elapsed_time_md,
+                    output_tags_natural_copy_btn,
+                    output_tags_general_only_copy_btn,
+                    output_tags_animagine_copy_btn,
                 ],
             )
+    ui.launch(
+        share=True,
+    )
 if __name__ == "__main__":