Spaces:
Runtime error
Runtime error
from __future__ import annotations | |
import os | |
import gradio as gr | |
import torch | |
from gradio_client import Client | |
DESCRIPTION = "# Comparing image captioning models" | |
ORIGINAL_SPACE_INFO = """\ | |
- [GIT-large fine-tuned on COCO](https://huggingface.co/spaces/library-samples/image-captioning-with-git) | |
- [BLIP-large](https://huggingface.co/spaces/library-samples/image-captioning-with-blip) | |
- [BLIP-2 OPT 6.7B](https://huggingface.co/spaces/merve/BLIP2-with-transformers) | |
- [BLIP-2 T5-XXL](https://huggingface.co/spaces/hysts/BLIP2-with-transformers) | |
- [InstructBLIP](https://huggingface.co/spaces/library-samples/InstructBLIP) | |
- [Fuyu-8B](https://huggingface.co/spaces/adept/fuyu-8b-demo) | |
""" | |
torch.hub.download_url_to_file("http://images.cocodataset.org/val2017/000000039769.jpg", "cats.jpg") | |
torch.hub.download_url_to_file( | |
"https://huggingface.co/datasets/nielsr/textcaps-sample/resolve/main/stop_sign.png", "stop_sign.png" | |
) | |
torch.hub.download_url_to_file( | |
"https://cdn.openai.com/dall-e-2/demos/text2im/astronaut/horse/photo/0.jpg", "astronaut.jpg" | |
) | |
def generate_caption_git(image_path: str) -> str: | |
try: | |
client = Client("library-samples/image-captioning-with-git") | |
return client.predict(image_path, api_name="/caption") | |
except Exception: | |
gr.Warning("The GIT-large Space is currently unavailable. Please try again later.") | |
return "" | |
def generate_caption_blip(image_path: str) -> str: | |
try: | |
client = Client("library-samples/image-captioning-with-blip") | |
return client.predict(image_path, "A picture of", api_name="/caption") | |
except Exception: | |
gr.Warning("The BLIP-large Space is currently unavailable. Please try again later.") | |
return "" | |
def generate_caption_blip2_opt(image_path: str) -> str: | |
try: | |
client = Client("merve/BLIP2-with-transformers") | |
return client.predict( | |
image_path, | |
"Beam search", | |
1, # temperature | |
1, # length penalty | |
1.5, # repetition penalty | |
api_name="/caption", | |
) | |
except Exception: | |
gr.Warning("The BLIP2 OPT6.7B Space is currently unavailable. Please try again later.") | |
return "" | |
def generate_caption_blip2_t5xxl(image_path: str) -> str: | |
try: | |
client = Client("hysts/BLIP2-with-transformers") | |
return client.predict( | |
image_path, | |
"Beam search", | |
1, # temperature | |
1, # length penalty | |
1.5, # repetition penalty | |
50, # max length | |
1, # min length | |
5, # number of beams | |
0.9, # top p | |
api_name="/caption", | |
) | |
except Exception: | |
gr.Warning("The BLIP2 T5-XXL Space is currently unavailable. Please try again later.") | |
return "" | |
def generate_caption_instructblip(image_path: str) -> str: | |
try: | |
client = Client("library-samples/InstructBLIP") | |
return client.predict( | |
image_path, | |
"Describe the image.", | |
"Beam search", | |
5, # beam size | |
256, # max length | |
1, # min length | |
0.9, # top p | |
1.5, # repetition penalty | |
1.0, # length penalty | |
1.0, # temperature | |
api_name="/run", | |
) | |
except Exception: | |
gr.Warning("The InstructBLIP Space is currently unavailable. Please try again later.") | |
return "" | |
def generate_caption_fuyu(image_path: str) -> str: | |
try: | |
client = Client("adept/fuyu-8b-demo") | |
return client.predict(image_path, "Generate a coco style caption.", fn_index=3) | |
except Exception: | |
gr.Warning("The Fuyu-8B Space is currently unavailable. Please try again later.") | |
return "" | |
def generate_captions(image_path: str) -> tuple[str, str, str, str, str, str]: | |
return ( | |
generate_caption_git(image_path), | |
generate_caption_blip(image_path), | |
generate_caption_blip2_opt(image_path), | |
generate_caption_blip2_t5xxl(image_path), | |
generate_caption_instructblip(image_path), | |
generate_caption_fuyu(image_path), | |
) | |
with gr.Blocks(css="style.css") as demo: | |
gr.Markdown(DESCRIPTION) | |
with gr.Row(): | |
with gr.Column(): | |
input_image = gr.Image(type="filepath") | |
run_button = gr.Button("Caption") | |
with gr.Column(): | |
out_git = gr.Textbox(label="GIT-large fine-tuned on COCO") | |
out_blip = gr.Textbox(label="BLIP-large") | |
out_blip2_opt = gr.Textbox(label="BLIP-2 OPT 6.7B") | |
out_blip2_t5xxl = gr.Textbox(label="BLIP-2 T5-XXL") | |
out_instructblip = gr.Textbox(label="InstructBLIP") | |
out_fuyu = gr.Textbox(label="Fuyu-8B") | |
outputs = [ | |
out_git, | |
out_blip, | |
out_blip2_opt, | |
out_blip2_t5xxl, | |
out_instructblip, | |
out_fuyu, | |
] | |
gr.Examples( | |
examples=[ | |
"cats.jpg", | |
"stop_sign.png", | |
"astronaut.jpg", | |
], | |
inputs=input_image, | |
outputs=outputs, | |
fn=generate_captions, | |
cache_examples=os.getenv("CACHE_EXAMPLES") == "1", | |
) | |
with gr.Accordion(label="The original Spaces can be found here:", open=False): | |
gr.Markdown(ORIGINAL_SPACE_INFO) | |
run_button.click( | |
fn=generate_captions, | |
inputs=input_image, | |
outputs=outputs, | |
api_name="caption", | |
) | |
if __name__ == "__main__": | |
demo.queue(max_size=20).launch() | |