import streamlit as st from transformers import pipeline import torch from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline from transformers import AutoProcessor, AutoModel from gtts import gTTS import os import io # function part # img2text def img2text(url): image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") text = image_to_text_model(url)[0]["generated_text"] return text # text2story def text2story(text): # Initialize the text generation pipeline generator = pipeline('text-generation', model='gpt2') # Create a prompt for the story prompt = f"Create a short story about this scene: {text}\n\nStory:" # Generate the story story = generator(prompt, max_length=100, num_return_sequences=1, temperature=0.7)[0]['generated_text'] # Clean up the story by removing the prompt story = story.replace(prompt, "").strip() return story def text2audio(text): try: # Create a gTTS object tts = gTTS(text=text, lang='en') # Save to BytesIO object audio_bytes = io.BytesIO() tts.write_to_fp(audio_bytes) audio_bytes.seek(0) # Reset the pointer to the start return { 'audio': audio_bytes, 'sampling_rate': 24000 # gTTS default sampling rate } except Exception as e: st.error(f"Error in audio generation: {str(e)}") return None # Apply custom CSS for modern, stylish kid-friendly UI st.set_page_config(page_title="StoryMagic", page_icon="✨", layout="wide") st.markdown(""" """, unsafe_allow_html=True) # App header st.title("✨ StoryMagic") st.markdown("

Upload a picture and watch it transform into a magical story!

", unsafe_allow_html=True) # File uploader with st.container(): st.subheader("Choose a picture") uploaded_file = st.file_uploader("", key="upload") if uploaded_file is not None: bytes_data = uploaded_file.getvalue() with open(uploaded_file.name, "wb") as file: file.write(bytes_data) # Display image st.image(uploaded_file, use_column_width=True) # Stage 1: Image to Text with st.container(): st.markdown("

🔍 Image Analysis

", unsafe_allow_html=True) scenario = img2text(uploaded_file.name) st.text(scenario) # Stage 2: Text to Story with st.container(): st.markdown("

📝 Story Creation

", unsafe_allow_html=True) story = text2story(scenario) st.text(story) # Stage 3: Story to Audio data with st.container(): st.markdown("

🔊 Audio Narration

", unsafe_allow_html=True) audio_data = text2audio(story) # Button right-aligned col1, col2 = st.columns([3, 1]) with col2: if st.button("🔊 Play Story"): if audio_data: st.audio(audio_data['audio'], format="audio/wav", start_time=0, sample_rate=audio_data['sampling_rate']) else: st.error("Could not generate audio") # Cleanup: Remove the temporary file if os.path.exists(uploaded_file.name): os.remove(uploaded_file.name) else: # Welcome message st.markdown("""

Welcome to StoryMagic!

Upload any picture, and our magic wizard will turn it into an exciting story just for you!

🚀 🦄 🔮 🌈
""", unsafe_allow_html=True)