Spaces:
Sleeping
Sleeping
import os | |
from PIL import Image | |
from gtts import gTTS | |
import torch | |
import gradio as gr | |
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize | |
from transformers import pipeline, GPT2LMHeadModel, GPT2Tokenizer | |
def describe_photo(image): | |
image = Image.fromarray(image.astype('uint8'), 'RGB') | |
captioner = pipeline("image-to-text",model="Salesforce/blip-image-captioning-base") | |
results = captioner(image) | |
text = results[0]['generated_text'] | |
print(f"Image caption is: {text}") | |
return text | |
def generate_story(description): | |
model = GPT2LMHeadModel.from_pretrained("gpt2") | |
tokenizer = GPT2Tokenizer.from_pretrained("gpt2") | |
inputs = tokenizer.encode(description + " [SEP] A funny and friendly story:", return_tensors='pt') | |
outputs = model.generate(input_ids=inputs, | |
max_length=200, | |
num_return_sequences=1, | |
temperature=0.7, | |
no_repeat_ngram_size=2) | |
story = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return story | |
def convert_to_audio(text): | |
tts = gTTS(text) | |
audio_file_path = "audio.mp3" | |
tts.save(audio_file_path) | |
return audio_file_path | |
def audio_to_text(audio_file_path): | |
pipe = pipeline("automatic-speech-recognition", "openai/whisper-large-v2") | |
result = pipe("audio.mp3") | |
print(result) | |
return result['text'] | |
def sentiment_analysis(text): | |
sentiment_analyzer = pipeline("sentiment-analysis") | |
result = sentiment_analyzer(text) | |
print(result) | |
return result | |
def app(image): | |
description = describe_photo(image) | |
story = generate_story(description) | |
audio_file = convert_to_audio(story) | |
transcribed_text = audio_to_text(audio_file) | |
sentiment = sentiment_analysis(transcribed_text) | |
return description,audio_file,transcribed_text, sentiment | |
ui = gr.Interface( | |
fn=app, | |
inputs="image", | |
outputs=["text", "audio", "text", "text"], | |
title="Diego's Story Telling Multimodel LLM Gen AI" | |
) | |
ui.launch() |