import torch import gradio as gr from transformers import pipeline CAPTION_MODELS = { 'blip-base': 'Salesforce/blip-image-captioning-base', 'blip-large': 'Salesforce/blip-image-captioning-large', 'vit-gpt2-coco-en': 'ydshieh/vit-gpt2-coco-en', 'blip2-2.7b-fp16': 'Mediocreatmybest/blip2-opt-2.7b-fp16-sharded', } # Simple caption creation def caption_image(model_choice, image_path): captioner = pipeline(task="image-to-text", model=CAPTION_MODELS[model_choice], max_new_tokens=30, device_map="cpu", use_fast=True ) caption = captioner(image_path)[0]['generated_text'] return str(caption).strip() def launch(model_choice, input): return caption_image(model_choice, input) model_dropdown = gr.inputs.Dropdown(choices=list(CAPTION_MODELS.keys()), label='Model Choice') iface = gr.Interface(launch, inputs=[model_dropdown, "text"], outputs="text") iface.launch()