Spaces:
Running
Running
File size: 2,719 Bytes
193d60a 839ccc8 193d60a 0815464 193d60a 0815464 f4c33fa b736fc0 853006d b736fc0 0815464 839ccc8 b736fc0 c5af96c b736fc0 c5af96c b736fc0 839ccc8 b736fc0 c5af96c b736fc0 c5af96c b736fc0 44dd12d b736fc0 c5af96c b736fc0 c5af96c b736fc0 839ccc8 b736fc0 44dd12d b736fc0 c5af96c 44dd12d 839ccc8 c5af96c b736fc0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import streamlit as st
from transformers import pipeline
from gtts import gTTS
import os
# function part
# img2text
def img2text(url):
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
text = image_to_text_model(url)[0]["generated_text"]
return text
# text2story
def text2story(text):
prompt = (
"Generate a fun, engaging, and kid-friendly story for children aged 3-10. "
"Based on the following scenario, create a simple, imaginative narrative: " + text
)
story_generator = pipeline("text-generation", model="aspis/gpt2-genre-story-generation")
story_text = story_generator(text, max_length=100, num_return_sequences=1)
return story_text[0]["generated_text"]
# text2audio using gTTS
def text2audio(story_text, filename="output.mp3"):
tts = gTTS(text=story_text, lang='en')
tts.save(filename)
return filename
# Main part
def main():
st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
st.header("Turn Your Image to Audio Story")
if "scenario" not in st.session_state:
st.session_state.scenario = None
if "story" not in st.session_state:
st.session_state.story = None
if "audio_file" not in st.session_state:
st.session_state.audio_file = None
uploaded_file = st.file_uploader("Select an Image...")
if uploaded_file is not None and st.session_state.scenario is None:
print(uploaded_file)
bytes_data = uploaded_file.getvalue()
with open(uploaded_file.name, "wb") as file:
file.write(bytes_data)
st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
# Stage 1: Image to Text
st.text('Processing img2text...')
st.session_state.scenario = img2text(uploaded_file.name)
st.write(st.session_state.scenario)
# Stage 2: Text to Story
st.text('Generating a story...')
st.session_state.story = text2story(st.session_state.scenario)
st.write(st.session_state.story)
# Stage 3: Story to Audio File
st.text('Generating audio...')
audio_filename = text2audio(st.session_state.story)
st.session_state.audio_file = audio_filename
elif st.session_state.scenario:
st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
st.write("Image Caption: ", st.session_state.scenario)
st.write("Generated Story: ", st.session_state.story)
# Play button (No reprocessing)
if st.session_state.audio_file and st.button("Play Audio"):
st.audio(st.session_state.audio_file, format="audio/mp3")
if __name__ == "__main__":
main()
|