Florence-2-SD3-Captioner

Running on Zero

App Files Files Community

bdsqlsz commited on Jun 29, 2024

Commit

9142ec3

verified ·

1 Parent(s): 9711afa

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -3

app.py CHANGED Viewed

@@ -7,9 +7,9 @@ from PIL import Image
 import subprocess
 subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
-model = AutoModelForCausalLM.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True).to("cuda").eval()
-processor = AutoProcessor.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True)
 TITLE = "# [Florence-2 SD3 Long Captioner](https://huggingface.co/gokaygokay/Florence-2-SD3-Captioner/)"
@@ -47,7 +47,7 @@ def modify_caption(caption: str) -> str:
 def run_example(image):
     image = Image.fromarray(image)
     task_prompt = "<DESCRIPTION>"
-    prompt = task_prompt + "As an AI image annotation expert, please provide accurate annotations for the image to enhance the T5 model understanding of the content. Accurately describe images and images in the form of natural language. Your description should include key elements such as the actions, clothing, hairstyle, facial expressions, environment, dressing style, etc. of the characters in the image, as well as background content and any other important information. If the image has a distinct special style or filter, it needs to be described, otherwise it is not necessary. Your description should be accurate and accurate, only describing the actual content of the image, without describing abstract feelings such as atmosphere or quality, and should not exceed three sentences. These descriptions will be used for image reconstruction, so the closer the similarity to the original image, the better the label quality. Special tags will receive a reward of $10 per image."
     # Ensure the image is in RGB mode
     if image.mode != "RGB":

 import subprocess
 subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
+model = AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-large', trust_remote_code=True).to("cuda").eval()
+processor = AutoProcessor.from_pretrained('microsoft/Florence-2-large', trust_remote_code=True)
 TITLE = "# [Florence-2 SD3 Long Captioner](https://huggingface.co/gokaygokay/Florence-2-SD3-Captioner/)"
 def run_example(image):
     image = Image.fromarray(image)
     task_prompt = "<DESCRIPTION>"
+    prompt = task_prompt + "As an AI image annotation expert, please provide accurate annotations for the image to enhance the T5 model understanding of the content. Accurately describe images and images in the form of natural language. Your description should include key elements such as the actions, clothing, hairstyle, facial expressions, environment, dressing style, etc. of the characters in the image, as well as background content and any other important information. If the image has a distinct special style or filter, it needs to be described, otherwise it is not necessary. Your description should be accurate and accurate, only describing the actual content of the image, without describing abstract feelings such as atmosphere or quality, and should not exceed five sentences. These descriptions will be used for image reconstruction, so the closer the similarity to the original image, the better the label quality. Special tags will receive a reward of $10 per image."
     # Ensure the image is in RGB mode
     if image.mode != "RGB":