John6666
/

local_gokaygokay_Florence-2-SD3-Captioner_Tagger

Model card Files Files and versions Community

John6666 commited on Jun 27, 2024

Commit

f59d99b

verified ·

1 Parent(s): a4464c2

Upload florence2_sd3_tagger8.py

Browse files

Files changed (1) hide show

florence2_sd3_tagger8.py +120 -0

florence2_sd3_tagger8.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import argparse
+import os
+import re
+from PIL import Image
+import logging
+logger = logging.getLogger(__name__)
+DEFAULT_FLORENCE2_SD3_CAP_REPO = 'John6666/gokaygokay-Florence-2-SD3-Captioner-8bit'
+def fl_modify_caption(caption: str) -> str:
+    """
+    Removes specific prefixes from captions if present, otherwise returns the original caption.
+    Args:
+        caption (str): A string containing a caption.
+    Returns:
+        str: The caption with the prefix removed if it was present, or the original caption.
+    """
+    # Define the prefixes to remove
+    prefix_substrings = [
+        ('captured from ', ''),
+        ('captured at ', '')
+    ]
+    # Create a regex pattern to match any of the prefixes
+    pattern = '|'.join([re.escape(opening) for opening, _ in prefix_substrings])
+    replacers = {opening.lower(): replacer for opening, replacer in prefix_substrings}
+    # Function to replace matched prefix with its corresponding replacement
+    def replace_fn(match):
+        return replacers[match.group(0).lower()]
+    # Apply the regex to the caption
+    modified_caption = re.sub(pattern, replace_fn, caption, count=1, flags=re.IGNORECASE)
+    # If the caption was modified, return the modified version; otherwise, return the original
+    return modified_caption if modified_caption != caption else caption
+def fl_run_example(image, fl_model, fl_processor):
+    image = Image.open(image)
+    task_prompt = "<DESCRIPTION>"
+    prompt = task_prompt + "Describe this image in great detail."
+    # Ensure the image is in RGB mode
+    if image.mode != "RGB":
+        image = image.convert("RGB")
+    inputs = fl_processor(text=prompt, images=image, return_tensors="pt").to("cuda")
+    generated_ids = fl_model.generate(
+        input_ids=inputs["input_ids"],
+        pixel_values=inputs["pixel_values"],
+        max_new_tokens=1024,
+        num_beams=3
+    )
+    generated_text = fl_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+    parsed_answer = fl_processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
+    return fl_modify_caption(parsed_answer["<DESCRIPTION>"])
+def predict_tags_fl2_sd3(image, fl_model, fl_processor):
+    tag = fl_run_example(image, fl_model, fl_processor)
+    return tag
+def main(args):
+    # model location is model_dir + repo_id
+    # repo id may be like "user/repo" or "user/repo/branch", so we need to remove slash
+    model_location = os.path.join(args.model_dir, args.repo_id.replace("/", "_"))
+    if not os.path.exists(model_location) or args.force_download:
+        os.makedirs(args.model_dir, exist_ok=True)
+        logger.info(f"downloading Florence-2-SD3-Captioner model from hf_hub. id: {args.repo_id}")
+        from huggingface_hub import snapshot_download
+        snapshot_download(repo_id=args.repo_id, local_dir=model_location, local_dir_use_symlinks=False)
+    else:
+        logger.info("using existing Florence-2-SD3-Captioner model")
+    from transformers import AutoProcessor, AutoModelForCausalLM
+    import torch
+    fl_model = AutoModelForCausalLM.from_pretrained(f"{model_location}", torch_dtype=torch.float32, low_cpu_mem_usage=True, trust_remote_code=True)
+    fl_processor = AutoProcessor.from_pretrained(f"{model_location}", trust_remote_code=True)
+    image_path = args.image_path
+    tag = predict_tags_fl2_sd3(image_path, fl_model, fl_processor)
+    print(tag)
+def setup_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('image_path')
+    parser.add_argument(
+        "--repo_id",
+        type=str,
+        default=DEFAULT_FLORENCE2_SD3_CAP_REPO,
+        help="repo id for gokaygokay's Florence-2-SD3-Captioner on Hugging Face",
+    )
+    parser.add_argument(
+        "--model_dir",
+        type=str,
+        default="Florence-2-SD3-Captioner_model",
+        help="directory to store Florence-2-SD3-Captioner model",
+    )
+    parser.add_argument(
+        "--force_download",
+        action="store_true",
+        help="force downloading Florence-2-SD3-Captioner model",
+    )
+    return parser
+if __name__ == "__main__":
+    parser = setup_parser()
+    args = parser.parse_args()
+    main(args)