Spaces:

bilgeyucel
/

captionate

Running

File size: 3,169 Bytes

aac5437
95efa40
 
f835f68
b8462d5
95efa40
b8462d5
 
95efa40
8334aa7
b96a262
 
cfadd82
dc74825
b96a262
 
 
1e53050
e833e6f
e3b148f
cfadd82
b8462d5
8334aa7
 
b8462d5
8606aa2
6cd4cbd
8334aa7
102b698
418a286
b8462d5
95efa40
b96a262
95efa40
102b698
1e53050
b8462d5
f835f68
1e53050
6bf02ba
b8462d5
 
 
 
102b698
b8462d5
 
102b698
 
95efa40
b96a262
4d2d71e
cfadd82
 
418a286
1e53050
f03924e
e3b148f
1e53050
 
b96a262
95efa40
cfadd82
85f65db
95efa40

import os
import gradio as gr

from haystack.components.generators import HuggingFaceAPIGenerator
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack import Pipeline
from haystack.utils import Secret
from image_captioner import ImageCaptioner

description = """
# Captionate 📸
### Create Instagram captions for your pics!

* Upload your photo or select one from the examples
* Choose your model
* ✨ Captionate! ✨ 

It uses [Salesforce/blip-image-captioning-base](https://huggingface.co/Salesforce/blip-image-captioning-base) model for image-to-text caption generation task.

For Instagrammable captions, `mistralai/Mistral-7B-Instruct-v0.1` performs best, but try different models to see how they react to the same prompt.

Built by [Bilge Yucel](https://twitter.com/bilgeycl) using [Haystack 2.0](https://github.com/deepset-ai/haystack) 💙
"""

prompt_template = """
You will receive a descriptive text of a photo.
Try to generate a nice Instagram caption with a phrase rhyming with the text. Include emojis in the caption.
                                                                                                                                   
Descriptive text: {{caption}};
Instagram Caption:
"""

hf_api_key = os.environ["HF_API_KEY"]

def generate_caption(image_file_path, model_name):
    image_to_text = ImageCaptioner(model_name="Salesforce/blip-image-captioning-base")
    prompt_builder = PromptBuilder(template=prompt_template)
    generator = HuggingFaceAPIGenerator(api_type="serverless_inference_api", api_params={"model": model_name}, token=Secret.from_token(hf_api_key), generation_kwargs={"max_new_tokens":100})
    
    captioning_pipeline = Pipeline()
    captioning_pipeline.add_component("image_to_text", image_to_text)
    captioning_pipeline.add_component("prompt_builder", prompt_builder)
    captioning_pipeline.add_component("generator", generator)
    
    captioning_pipeline.connect("image_to_text.caption", "prompt_builder.caption")
    captioning_pipeline.connect("prompt_builder", "generator")

    result = captioning_pipeline.run({"image_to_text":{"image_file_path":image_file_path}})
    return result["generator"]["replies"][0]

with gr.Blocks(theme="soft") as demo:
    gr.Markdown(value=description)
    with gr.Row():
        image = gr.Image(type="filepath")
        with gr.Column():
            model_name = gr.Dropdown(
                ["meta-llama/Llama-3.3-70B-Instruct","mistralai/Mistral-7B-Instruct-v0.1","OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5", "tiiuae/falcon-7b-instruct", "tiiuae/falcon-7b", "HuggingFaceH4/starchat-beta", "bigscience/bloom", "google/flan-t5-xxl"], 
                value="mistralai/Mistral-7B-Instruct-v0.1", 
                label="Choose your model!"
                )
            gr.Examples(["./whale.png", "./rainbow.jpeg", "./selfie.png"], inputs=image, label="Click on any example") 
    submit_btn = gr.Button("✨ Captionate ✨")
    caption = gr.Textbox(label="Caption", show_copy_button=True)
    submit_btn.click(fn=generate_caption, inputs=[image, model_name], outputs=[caption])

if __name__ == "__main__":
    demo.launch()