File size: 2,719 Bytes
193d60a
 
839ccc8
 
193d60a
 
0815464
 
 
 
193d60a
 
0815464
 
f4c33fa
 
 
 
b736fc0
853006d
b736fc0
0815464
839ccc8
 
 
 
 
b736fc0
 
c5af96c
b736fc0
c5af96c
b736fc0
 
 
 
 
839ccc8
 
b736fc0
c5af96c
b736fc0
 
c5af96c
 
 
 
b736fc0
44dd12d
b736fc0
 
c5af96c
b736fc0
 
 
 
c5af96c
b736fc0
 
 
839ccc8
 
 
 
b736fc0
 
44dd12d
b736fc0
 
c5af96c
44dd12d
839ccc8
 
c5af96c
 
b736fc0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import streamlit as st
from transformers import pipeline
from gtts import gTTS
import os

# function part
# img2text
def img2text(url):
    image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
    text = image_to_text_model(url)[0]["generated_text"]
    return text

# text2story
def text2story(text):
    prompt = (
        "Generate a fun, engaging, and kid-friendly story for children aged 3-10. "
        "Based on the following scenario, create a simple, imaginative narrative: " + text
    )
    story_generator = pipeline("text-generation", model="aspis/gpt2-genre-story-generation")
    story_text = story_generator(text, max_length=100, num_return_sequences=1)
    return story_text[0]["generated_text"]

# text2audio using gTTS
def text2audio(story_text, filename="output.mp3"):
    tts = gTTS(text=story_text, lang='en')
    tts.save(filename)
    return filename

# Main part
def main():
    st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
    st.header("Turn Your Image to Audio Story")

    if "scenario" not in st.session_state:
        st.session_state.scenario = None
    if "story" not in st.session_state:
        st.session_state.story = None
    if "audio_file" not in st.session_state:
        st.session_state.audio_file = None

    uploaded_file = st.file_uploader("Select an Image...")

    if uploaded_file is not None and st.session_state.scenario is None:
        print(uploaded_file)
        bytes_data = uploaded_file.getvalue()
        with open(uploaded_file.name, "wb") as file:
            file.write(bytes_data)

        st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)

        # Stage 1: Image to Text
        st.text('Processing img2text...')
        st.session_state.scenario = img2text(uploaded_file.name)
        st.write(st.session_state.scenario)

        # Stage 2: Text to Story
        st.text('Generating a story...')
        st.session_state.story = text2story(st.session_state.scenario)
        st.write(st.session_state.story)

        # Stage 3: Story to Audio File
        st.text('Generating audio...')
        audio_filename = text2audio(st.session_state.story)
        st.session_state.audio_file = audio_filename

    elif st.session_state.scenario:
        st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
        st.write("Image Caption: ", st.session_state.scenario)
        st.write("Generated Story: ", st.session_state.story)

    # Play button (No reprocessing)
    if st.session_state.audio_file and st.button("Play Audio"):
        st.audio(st.session_state.audio_file, format="audio/mp3")

if __name__ == "__main__":
    main()