slliac's picture
Update app.py
aaf5130 verified
raw
history blame
8.11 kB
import streamlit as st
from transformers import pipeline
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from transformers import AutoProcessor, AutoModel
from gtts import gTTS
import os
import io
# function part
# img2text
def img2text(url):
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
text = image_to_text_model(url)[0]["generated_text"]
return text
# text2story
def text2story(text):
# Initialize the text generation pipeline
generator = pipeline('text-generation', model='gpt2')
# Create a prompt for the story
prompt = f"Create a short story about this scene: {text}\n\nStory:"
# Generate the story
story = generator(prompt,
max_length=100,
num_return_sequences=1,
temperature=0.7)[0]['generated_text']
# Clean up the story by removing the prompt
story = story.replace(prompt, "").strip()
return story
def text2audio(text):
try:
# Create a gTTS object
tts = gTTS(text=text, lang='en')
# Save to BytesIO object
audio_bytes = io.BytesIO()
tts.write_to_fp(audio_bytes)
audio_bytes.seek(0) # Reset the pointer to the start
return {
'audio': audio_bytes,
'sampling_rate': 24000 # gTTS default sampling rate
}
except Exception as e:
st.error(f"Error in audio generation: {str(e)}")
return None
# Apply custom CSS for modern, stylish kid-friendly UI
st.set_page_config(page_title="StoryMagic", page_icon="โœจ", layout="wide")
st.markdown("""
<style>
/* Modern, stylish kid-friendly design */
@import url('https://fonts.googleapis.com/css2?family=Quicksand:wght@400;600;700&display=swap');
:root {
--primary-color: #6C63FF;
--secondary-color: #41B883;
--accent-color: #FF6B6B;
--background-light: #F7F9FC;
--text-dark: #2E3A59;
--shadow: 0 10px 20px rgba(0,0,0,0.08);
--border-radius: 16px;
}
.stApp {
background: linear-gradient(135deg, #F4F9FF, #EEFAFF);
font-family: 'Quicksand', sans-serif;
color: var(--text-dark);
}
.main .block-container {
max-width: 1000px;
padding-top: 2rem;
padding-bottom: 2rem;
}
/* Modern headers */
h1, h2, h3 {
font-family: 'Quicksand', sans-serif;
font-weight: 700;
color: var(--primary-color);
}
h1 {
font-size: 2.5rem;
text-align: center;
margin-bottom: 0;
}
h2 {
font-size: 1.8rem;
margin-bottom: 1rem;
}
h3 {
font-size: 1.4rem;
margin-bottom: 0.8rem;
}
/* Subtitle */
.subtitle {
text-align: center;
color: #6B7897;
font-size: 1.2rem;
margin-bottom: 2rem;
}
/* Card containers */
.stCard {
background: white;
border-radius: var(--border-radius);
padding: 1.5rem;
box-shadow: var(--shadow);
margin-bottom: 1.5rem;
}
/* Accent borders for stages */
.css-nahz7x, .css-ocqkz7, .css-4z1n4l {
border-left: 5px solid var(--primary-color) !important;
}
.css-1r6slb0, .css-1ubpcwi {
border-left: 5px solid var(--secondary-color) !important;
}
.css-pkbazv, .css-5rimss {
border-left: 5px solid var(--accent-color) !important;
}
/* Custom file uploader */
.stFileUploader > div > div {
background: var(--background-light);
border: 2px dashed #D0D8E6;
border-radius: 12px;
padding: 20px;
transition: all 0.3s ease;
}
.stFileUploader > div > div:hover {
border-color: var(--primary-color);
}
/* Uploaded image styling */
.stImage img {
border-radius: 12px;
box-shadow: var(--shadow);
}
/* Stage icons */
.stage-icon {
font-size: 1.6rem;
margin-right: 10px;
vertical-align: middle;
}
/* Response styling */
.stText {
font-size: 1.1rem;
line-height: 1.7;
background: var(--background-light);
padding: 1rem;
border-radius: 12px;
border-left: 4px solid var(--secondary-color);
margin: 1rem 0;
box-shadow: 0 5px 15px rgba(0,0,0,0.05);
}
/* Button styling */
.stButton > button {
background: var(--secondary-color) !important;
color: white !important;
border: none !important;
border-radius: 50px !important;
padding: 0.6rem 1.5rem !important;
font-size: 1.1rem !important;
font-weight: 600 !important;
font-family: 'Quicksand', sans-serif !important;
transition: all 0.3s ease !important;
box-shadow: 0 5px 15px rgba(65, 184, 131, 0.3) !important;
}
.stButton > button:hover {
background: #37A574 !important;
transform: translateY(-3px) !important;
box-shadow: 0 8px 20px rgba(65, 184, 131, 0.4) !important;
}
.stButton > button:active {
transform: translateY(0) !important;
}
/* Column styling for button positioning */
[data-testid="column"]:last-child .stButton {
display: flex;
justify-content: flex-end;
}
/* Audio player styling */
audio {
width: 100%;
border-radius: 50px;
height: 40px;
}
/* Emoji animation */
@keyframes bounce {
0%, 100% { transform: translateY(0); }
50% { transform: translateY(-15px); }
}
.emoji {
font-size: 1.8rem;
display: inline-block;
animation: bounce 2s infinite;
margin: 0 8px;
}
.emoji:nth-child(2) {
animation-delay: 0.2s;
}
.emoji:nth-child(3) {
animation-delay: 0.4s;
}
.emoji:nth-child(4) {
animation-delay: 0.6s;
}
/* Welcome message */
.welcome-message {
text-align: center;
padding: 3rem 1.5rem;
}
.welcome-icon {
font-size: 4rem;
margin-bottom: 1rem;
}
</style>
""", unsafe_allow_html=True)
# App header
st.title("โœจ StoryMagic")
st.markdown("<p class='subtitle'>Upload a picture and watch it transform into a magical story!</p>",
unsafe_allow_html=True)
# File uploader
with st.container():
st.subheader("Choose a picture")
uploaded_file = st.file_uploader("", key="upload")
if uploaded_file is not None:
bytes_data = uploaded_file.getvalue()
with open(uploaded_file.name, "wb") as file:
file.write(bytes_data)
# Display image
st.image(uploaded_file, use_column_width=True)
# Stage 1: Image to Text
with st.container():
st.markdown("<h3><span class='stage-icon'>๐Ÿ”</span> Image Analysis</h3>", unsafe_allow_html=True)
scenario = img2text(uploaded_file.name)
st.text(scenario)
# Stage 2: Text to Story
with st.container():
st.markdown("<h3><span class='stage-icon'>๐Ÿ“</span> Story Creation</h3>", unsafe_allow_html=True)
story = text2story(scenario)
st.text(story)
# Stage 3: Story to Audio data
with st.container():
st.markdown("<h3><span class='stage-icon'>๐Ÿ”Š</span> Audio Narration</h3>", unsafe_allow_html=True)
audio_data = text2audio(story)
# Button right-aligned
col1, col2 = st.columns([3, 1])
with col2:
if st.button("๐Ÿ”Š Play Story"):
if audio_data:
st.audio(audio_data['audio'],
format="audio/wav",
start_time=0,
sample_rate=audio_data['sampling_rate'])
else:
st.error("Could not generate audio")
# Cleanup: Remove the temporary file
if os.path.exists(uploaded_file.name):
os.remove(uploaded_file.name)
else:
# Welcome message
st.markdown("""
<div class="welcome-message">
<div class="welcome-icon">โœจ</div>
<h2>Welcome to StoryMagic!</h2>
<p style="font-size: 1.2rem; color: #6B7897; max-width: 500px; margin: 0 auto 30px;">
Upload any picture, and our magic wizard will turn it into an exciting story just for you!
</p>
<div>
<span class="emoji">๐Ÿš€</span>
<span class="emoji">๐Ÿฆ„</span>
<span class="emoji">๐Ÿ”ฎ</span>
<span class="emoji">๐ŸŒˆ</span>
</div>
</div>
""", unsafe_allow_html=True)