Spaces:

zforkash
/

Image-Transformer-Charater-Text

Sleeping

File size: 1,943 Bytes

fc17b4b
 
 
 
 
 
baa9321
 
 
 
 
 
 
 
fc17b4b
 
baa9321
 
 
 
 
 
 
fc17b4b
 
 
baa9321
fc17b4b
baa9321
 
 
 
 
fc17b4b
baa9321
 
 
 
 
fc17b4b
baa9321
 
fc17b4b
baa9321
fc17b4b
baa9321
fc17b4b
baa9321
 
 
fc17b4b
 
 
baa9321
 
 
 
fc17b4b
baa9321
 
 
fc17b4b

import streamlit as st
from transformers import pipeline
from huggingface_hub import InferenceClient
from PIL import Image
import os


def initialize():
    if 'initialized' not in st.session_state: 
        print("Initializing...")
        st.session_state['initialized'] = True
        st.session_state['api_key'] = os.getenv("HUGGINGFACE_TOKEN")
        st.session_state['client'] = InferenceClient(api_key=st.session_state['api_key'])


def main():
    initialize()
    st.header("Character Captions")
    st.write("Have a character caption any image you upload!")
    character = st.selectbox("Choose a character", ["artist", "elmo", "unintelligible", "goku"])

    uploaded_img = st.file_uploader("Upload an image here")

    if uploaded_img is not None:
        image = Image.open(uploaded_img)
        st.image(image)

        
        image_captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
        response = image_captioner(image)
        caption = response[0]['generated_text']


        
        character_prompts = {
            "artist": f"Describe this caption like you're a artist: {caption}.",
            "elmo": f"Describe this caption like you're elmo: {caption}.",
            "unintelligible": f"Describe this caption in a way that makes no sense: {caption}.",
            "goku": f"Describe this caption like you're goku: {caption}."
        }

        prompt = character_prompts[character]
        messages = [
            { "role": "user", "content": prompt }
        ]

        
        stream = st.session_state['client'].chat.completions.create(
            model="meta-llama/Llama-3.2-3B-Instruct", 
            messages=messages, 
            max_tokens=500,
            stream=True
        )

        response = ''
        for chunk in stream:
            response += chunk.choices[0].delta.content
        
        st.write(response)



if __name__ == '__main__':
    main()