Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import pipeline | |
from gtts import gTTS | |
import os | |
def generate_caption(image): | |
# Load the image captioning model | |
caption_model = pipeline("image-to-text", model="facebook/blip-image-captioning-base") | |
# Generate the caption for the uploaded image | |
caption = caption_model(image)[0]["generated_text"] | |
return caption | |
def generate_story(caption): | |
# Load the text generation model | |
text_generation_model = pipeline("text-generation", model="gpt2") | |
# Generate the story based on the caption | |
story = text_generation_model(caption, max_length=200, num_return_sequences=1)[0]["generated_text"] | |
return story | |
def convert_to_audio(story): | |
# Convert the story to audio using gTTS | |
tts = gTTS(text=story, lang="en") | |
tts.save("story_audio.mp3") | |
def main(): | |
st.title("Storytelling Application") | |
# File uploader for the image | |
uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"]) | |
if uploaded_image is not None: | |
# Display the uploaded image | |
st.image(uploaded_image, caption="Uploaded Image", use_column_width=True) | |
# Generate the caption for the image | |
caption = generate_caption(uploaded_image) | |
st.subheader("Generated Caption:") | |
st.write(caption) | |
# Generate the story based on the caption | |
story = generate_story(caption) | |
st.subheader("Generated Story:") | |
st.write(story) | |
# Convert the story to audio | |
convert_to_audio(story) | |
# Display the audio player | |
audio_file = open("story_audio.mp3", "rb") | |
audio_bytes = audio_file.read() | |
st.audio(audio_bytes, format="audio/mp3") | |
if __name__ == "__main__": | |
main() |