Spaces:

kartik91
/

Image-To-Story

Running

File size: 2,766 Bytes

from transformers import pipeline
from langchain_core.runnables import RunnableLambda
from langchain_huggingface import HuggingFaceEndpoint
from PIL import Image

pipe1 = pipeline("object-detection", model="facebook/detr-resnet-50")
pipe2 = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
repo_id = "mistralai/Mistral-7B-Instruct-v0.2"


llm = HuggingFaceEndpoint(
    repo_id=repo_id,
    temperature=0.5,
    streaming=True
)
def reduce_add(a):
    ll=dict()
    for i in a:
        if i['score'] > 0.89:
            if i['label'] not in ll.keys():
                ll[i['label']] = 1
            else:
                ll[i['label']]+=1
    return "there are \n"+', \n'.join([str(i[1])+' '+i[0]  for i  in ll.items() ])

def image_segmentation_tool(image: str):
    # image = Image.open(image_path)
    segmentation_results = pipe1(image)
    if reduce_add(segmentation_results) == "there are \n":
        raise Passs()
    return reduce_add(segmentation_results)

def image_caption_tool(image: str):
    # image = Image.open(image_path)
    segmentation_results = pipe2(image)
    if segmentation_results[0]["generated_text"] == "":
        raise Passs("no result found use different image to create story") 
    return segmentation_results[0]["generated_text"]

from langchain_core.prompts import PromptTemplate


def story_generation_tool(segmentation_results):
    prompt_template = """
    You are a storyteller. Based on the following segmentation results, create a story:
    {segmentation_results}

    Story:
    """
    prompt = PromptTemplate.from_template(prompt_template)
    story = prompt | llm
    return story.invoke(input={"segmentation_results":segmentation_results})

# def translation_tool(english_text):
#     prompt_template = """
#     You are a translator. Translate the following English text to Hindi:
#     {english_text}

#     Translation:
#     """
#     prompt = PromptTemplate.from_template(prompt_template)
#     translation = prompt | llm
#     return translation.invoke(input={"english_text": english_text})


runnable = RunnableLambda(image_segmentation_tool).with_fallbacks([RunnableLambda(image_caption_tool)])
runnable2 = RunnableLambda(story_generation_tool)
# runnable3 = RunnableLambda(translation_tool)

chain = runnable | runnable2

import gradio as gr

title = "Image to short Story Generator"
description = """
Upload an image, and this app will generate a short story based on the image.
"""


async def sepia(input_img):
    chunks=[]
    async for e in chain.astream(input_img):
        chunks.append(e)
        yield "".join(chunks)

demo = gr.Interface(sepia, gr.Image(type='pil'),"textarea",title=title,
    description=description,live=True
)
if __name__ == "__main__":
    demo.launch()