import gradio as gr from diffusers import DiffusionPipeline from transformers import pipeline get_caption = pipeline("image-to-text",model="Salesforce/blip-image-captioning-base") def captioner(input): output = get_caption(input) return output[0]['generated_text'] generate_pipeline = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5") def generate(prompt): return generate_pipeline(prompt).images[0] def caption_and_generate(image): caption = captioner(image) image = generate(caption) return [caption, image] with gr.Blocks() as demo: gr.Markdown("# Describe-and-Generate game 🖍️") image_upload = gr.Image(label="Your first image",type="pil") btn_all = gr.Button("Caption and generate") caption = gr.Textbox(label="Generated caption") image_output = gr.Image(label="Generated Image") btn_all.click(fn=caption_and_generate, inputs=[image_upload], outputs=[caption, image_output]) demo.launch()