Spaces:

slliac
/

5240-indiv-assignment

Sleeping

App Files Files Community

5240-indiv-assignment / app.py

slliac

Update app.py

ea507cd verified 23 days ago

raw

history blame

17.2 kB

	import streamlit as st
	from transformers import pipeline
	import torch
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
	from transformers import AutoProcessor, AutoModel
	import edge_tts
	import asyncio
	import os
	import io
	import tempfile


	# Initialize session state for storing data
	if 'scenario' not in st.session_state:
	st.session_state.scenario = None
	if 'scenario_zh' not in st.session_state:
	st.session_state.scenario_zh = None
	if 'story' not in st.session_state:
	st.session_state.story = None
	if 'story_zh' not in st.session_state:
	st.session_state.story_zh = None
	if 'audio_generated_zh' not in st.session_state:
	st.session_state.audio_generated_zh = False
	if 'audio_path_zh' not in st.session_state:
	st.session_state.audio_path_zh = None
	if 'audio_generated_en' not in st.session_state:
	st.session_state.audio_generated_en = False
	if 'audio_path_en' not in st.session_state:
	st.session_state.audio_path_en = None


	# function part
	# img2text
	def img2text(url):
	image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
	text = image_to_text_model(url)[0]["generated_text"]
	return text


	# Translation function EN to ZH
	def translate_to_chinese(text):
	translator = pipeline("translation", model="steve-tong/opus-mt-en-zh-hk")
	translation = translator(text)[0]["translation_text"]
	return translation


	# text2story - using mosaicml/mpt-7b-storywriter model for better stories
	def text2story(text):
	try:
	# Initialize the improved story generation pipeline
	generator = pipeline("text-generation", model="mosaicml/mpt-7b-storywriter", trust_remote_code=True)

	# Create a prompt for the story
	prompt = f"Write a short children's story about this scene: {text}\n\nStory: "

	# Generate the story - limit to a smaller max_length due to model size
	story = generator(prompt,
	max_length=150,
	num_return_sequences=1,
	temperature=0.7,
	repetition_penalty=1.2)[0]['generated_text']

	# Clean up the story by removing the prompt
	story = story.replace(prompt, "").strip()

	# Trim to a reasonable length if needed
	if len(story) > 500:
	sentences = story.split('.')
	trimmed_story = '.'.join(sentences[:5]) + '.'
	return trimmed_story

	return story
	except Exception as e:
	st.error(f"故事生成出問題: {str(e)}")
	# Fallback to simpler model if the advanced one fails
	fallback_generator = pipeline('text-generation', model='gpt2')
	fallback_prompt = f"Create a short story about this scene: {text}\n\nStory:"
	fallback_story = fallback_generator(fallback_prompt, max_length=100, num_return_sequences=1)[0]['generated_text']
	return fallback_story.replace(fallback_prompt, "").strip()


	# Text to audio using edge_tts for Cantonese audio
	async def text2audio_cantonese(text):
	try:
	# Use Cantonese voice from edge-tts
	voice = "zh-HK-HiuMaanNeural" # Female Cantonese voice
	# Alternative: "zh-HK-WanLungNeural" for male voice

	# Create a temporary file
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	temp_file.close()

	# Configure edge-tts to save to the file path
	communicate = edge_tts.Communicate(text, voice)
	await communicate.save(temp_file.name)

	# Return the path to the audio file
	return {
	'path': temp_file.name,
	'success': True
	}
	except Exception as e:
	st.error(f"中文音頻製作出左問題: {str(e)}")
	return {
	'path': None,
	'success': False
	}


	# Text to audio using edge_tts for English audio
	async def text2audio_english(text):
	try:
	# Use English voice from edge-tts
	voice = "en-US-AriaNeural" # Female English voice
	# Alternative: "en-US-GuyNeural" for male voice

	# Create a temporary file
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	temp_file.close()

	# Configure edge-tts to save to the file path
	communicate = edge_tts.Communicate(text, voice)
	await communicate.save(temp_file.name)

	# Return the path to the audio file
	return {
	'path': temp_file.name,
	'success': True
	}
	except Exception as e:
	st.error(f"English audio generation error: {str(e)}")
	return {
	'path': None,
	'success': False
	}


	# Apply custom CSS for modern, stylish kid-friendly UI
	st.set_page_config(page_title="故事魔法", page_icon="✨", layout="wide")

	st.markdown("""
	<style>
	/* Modern, stylish kid-friendly design */
	@import url('https://fonts.googleapis.com/css2?family=Quicksand:wght@400;600;700&display=swap');
	@import url('https://fonts.googleapis.com/css2?family=Noto+Sans+HK:wght@400;500;700&display=swap');

	:root {
	--primary-color: #6C63FF;
	--secondary-color: #41B883;
	--accent-color: #FF6B6B;
	--background-light: #F7F9FC;
	--text-dark: #2E3A59;
	--shadow: 0 10px 20px rgba(0,0,0,0.08);
	--border-radius: 16px;
	}

	.stApp {
	background: linear-gradient(135deg, #F4F9FF, #EEFAFF);
	font-family: 'Noto Sans HK', sans-serif;
	color: var(--text-dark);
	}

	.main .block-container {
	max-width: 1000px;
	padding-top: 2rem;
	padding-bottom: 2rem;
	}

	/* Modern headers */
	h1, h2, h3 {
	font-family: 'Noto Sans HK', sans-serif;
	font-weight: 700;
	color: var(--primary-color);
	}

	h1 {
	font-size: 2.5rem;
	text-align: center;
	margin-bottom: 0;
	}

	h2 {
	font-size: 1.8rem;
	margin-bottom: 1rem;
	}

	h3 {
	font-size: 1.4rem;
	margin-bottom: 0.8rem;
	}

	/* Subtitle */
	.subtitle {
	text-align: center;
	color: #6B7897;
	font-size: 1.2rem;
	margin-bottom: 2rem;
	}

	/* Card containers */
	.stCard {
	background: white;
	border-radius: var(--border-radius);
	padding: 1.5rem;
	box-shadow: var(--shadow);
	margin-bottom: 1.5rem;
	}

	/* Accent borders for stages */
	.css-nahz7x, .css-ocqkz7, .css-4z1n4l {
	border-left: it 5px solid var(--primary-color) !important;
	}

	.css-1r6slb0, .css-1ubpcwi {
	border-left: 5px solid var(--secondary-color) !important;
	}

	.css-pkbazv, .css-5rimss {
	border-left: 5px solid var(--accent-color) !important;
	}

	/* Custom file uploader */
	.stFileUploader > div > div {
	background: var(--background-light);
	border: 2px dashed #D0D8E6;
	border-radius: 12px;
	padding: 20px;
	transition: all 0.3s ease;
	}

	.stFileUploader > div > div:hover {
	border-color: var(--primary-color);
	}

	/* Uploaded image styling */
	.stImage img {
	border-radius: 12px;
	box-shadow: var(--shadow);
	}

	/* Stage icons */
	.stage-icon {
	font-size: 1.6rem;
	margin-right: 10px;
	vertical-align: middle;
	}

	/* Response styling */
	.stText {
	font-size: 1.1rem;
	line-height: 1.7;
	background: var(--background-light);
	padding: 1rem;
	border-radius: 12px;
	border-left: 4px solid var(--secondary-color);
	margin: 1rem 0;
	box-shadow: 0 5px 15px rgba(0,0,0,0.05);
	}

	/* Button styling */
	.stButton > button {
	background: var(--secondary-color) !important;
	color: white !important;
	border: none !important;
	border-radius: 50px !important;
	padding: 0.6rem 1.5rem !important;
	font-size: 1.1rem !important;
	font-weight: 600 !important;
	font-family: 'Noto Sans HK', sans-serif !important;
	transition: all 0.3s ease !important;
	box-shadow: 0 5px 15px rgba(65, 184, 131, 0.3) !important;
	}

	.stButton > button:hover {
	background: #37A574 !important;
	transform: translateY(-3px) !important;
	box-shadow: 0 8px 20px rgba(65, 184, 131, 0.4) !important;
	}

	.stButton > button:active {
	transform: translateY(0) !important;
	}

	/* Audio player styling */
	audio {
	width: 100%;
	border-radius: 50px;
	height: 40px;
	}

	/* Emoji animation */
	@keyframes bounce {
	0%, 100% { transform: translateY(0); }
	50% { transform: translateY(-15px); }
	}

	.emoji {
	font-size: 1.8rem;
	display: inline-block;
	animation: bounce 2s infinite;
	margin: 0 8px;
	}

	.emoji:nth-child(2) {
	animation-delay: 0.2s;
	}

	.emoji:nth-child(3) {
	animation-delay: 0.4s;
	}

	.emoji:nth-child(4) {
	animation-delay: 0.6s;
	}

	/* Welcome message */
	.welcome-message {
	text-align: center;
	padding: 3rem 1.5rem;
	}

	.welcome-icon {
	font-size: 4rem;
	margin-bottom: 1rem;
	}

	/* Audio player container */
	.audio-container {
	background: white;
	padding: 1rem;
	border-radius: 12px;
	margin-bottom: 1rem;
	box-shadow: var(--shadow);
	}

	.audio-title {
	font-weight: 600;
	margin-bottom: 0.5rem;
	color: var(--primary-color);
	}
	</style>
	""", unsafe_allow_html=True)

	# App header with Cantonese
	st.title("✨ 故事魔法")
	st.markdown("<p class='subtitle'>上載一張圖片，睇下佢點變成一個神奇嘅故事！</p>",
	unsafe_allow_html=True)

	# Add a progress indicator for model loading
	progress_placeholder = st.empty()

	# File uploader with Cantonese
	with st.container():
	st.subheader("揀一張靚相啦！")
	uploaded_file = st.file_uploader("", key="upload")

	if uploaded_file is not None:
	# Save uploaded file
	bytes_data = uploaded_file.getvalue()
	temp_file_path = uploaded_file.name
	with open(temp_file_path, "wb") as file:
	file.write(bytes_data)

	# Display image
	st.image(uploaded_file, use_column_width=True)

	# Reset session state if a new file is uploaded (detect by checking if there's no scenario yet)
	if st.session_state.scenario is None:
	# Stage 1: Image to Text
	with st.container():
	st.markdown("<h3><span class='stage-icon'>🔍</span> 圖片解讀中</h3>", unsafe_allow_html=True)

	with progress_placeholder.container():
	st.write("正在分析圖片...")
	progress_bar = st.progress(0)

	# Generate caption if not already done
	st.session_state.scenario = img2text(temp_file_path)
	progress_bar.progress(33)

	# Display English caption
	st.text("英文描述: " + st.session_state.scenario)

	# Translate the caption to Chinese
	with progress_placeholder.container():
	st.write("正在翻譯...")
	st.session_state.scenario_zh = translate_to_chinese(st.session_state.scenario)
	progress_bar.progress(66)

	# Display Chinese caption
	st.text("中文描述: " + st.session_state.scenario_zh)

	# Stage 2: Text to Story
	with st.container():
	st.markdown("<h3><span class='stage-icon'>📝</span> 故事創作中</h3>", unsafe_allow_html=True)

	with progress_placeholder.container():
	st.write("正在創作故事...")

	# Generate story if not already done
	st.session_state.story = text2story(st.session_state.scenario)
	progress_bar.progress(85)

	# Display English story
	st.text("英文故事: " + st.session_state.story)

	# Translate the story to Chinese
	with progress_placeholder.container():
	st.write("正在翻譯故事...")
	st.session_state.story_zh = translate_to_chinese(st.session_state.story)
	progress_bar.progress(100)

	# Display Chinese story
	st.text("中文故事: " + st.session_state.story_zh)

	# Clear progress indicator
	progress_placeholder.empty()

	else:
	# Display saved results from session state
	with st.container():
	st.markdown("<h3><span class='stage-icon'>🔍</span> 圖片解讀中</h3>", unsafe_allow_html=True)
	st.text("英文描述: " + st.session_state.scenario)
	st.text("中文描述: " + st.session_state.scenario_zh)

	with st.container():
	st.markdown("<h3><span class='stage-icon'>📝</span> 故事創作中</h3>", unsafe_allow_html=True)
	st.text("英文故事: " + st.session_state.story)
	st.text("中文故事: " + st.session_state.story_zh)

	# Stage 3: Story to Audio data
	with st.container():
	st.markdown("<h3><span class='stage-icon'>🔊</span> 故事準備朗讀中</h3>", unsafe_allow_html=True)

	# Create two columns for English and Cantonese buttons
	col1, col2 = st.columns(2)

	# English audio button
	with col1:
	if st.button("🔊 Play Story in English"):
	# Only generate audio if not already done
	if not st.session_state.audio_generated_en:
	with st.spinner("Generating English audio..."):
	# Need to run async function with asyncio
	audio_result = asyncio.run(text2audio_english(st.session_state.story))
	st.session_state.audio_path_en = audio_result['path']
	st.session_state.audio_generated_en = audio_result['success']

	# Play the audio
	if st.session_state.audio_path_en and os.path.exists(st.session_state.audio_path_en):
	with open(st.session_state.audio_path_en, "rb") as audio_file:
	audio_bytes = audio_file.read()
	st.markdown("<div class='audio-container'><div class='audio-title'>English Story</div>", unsafe_allow_html=True)
	st.audio(audio_bytes, format="audio/mp3")
	st.markdown("</div>", unsafe_allow_html=True)
	else:
	st.error("Sorry! Please try again.")

	# Cantonese audio button
	with col2:
	if st.button("🔊 播放廣東話故事"):
	# Only generate audio if not already done
	if not st.session_state.audio_generated_zh:
	with st.spinner("正在準備廣東話語音..."):
	# Need to run async function with asyncio
	audio_result = asyncio.run(text2audio_cantonese(st.session_state.story_zh))
	st.session_state.audio_path_zh = audio_result['path']
	st.session_state.audio_generated_zh = audio_result['success']

	# Play the audio
	if st.session_state.audio_path_zh and os.path.exists(st.session_state.audio_path_zh):
	with open(st.session_state.audio_path_zh, "rb") as audio_file:
	audio_bytes = audio_file.read()
	st.markdown("<div class='audio-container'><div class='audio-title'>廣東話故事</div>", unsafe_allow_html=True)
	st.audio(audio_bytes, format="audio/mp3")
	st.markdown("</div>", unsafe_allow_html=True)
	else:
	st.error("哎呀！再試多次啦！")

	# Cleanup: Remove the temporary file when the user is done
	if os.path.exists(temp_file_path):
	os.remove(temp_file_path)
	else:
	# Clear session state when no file is uploaded
	# Also clean up any temporary audio files
	if st.session_state.audio_path_zh and os.path.exists(st.session_state.audio_path_zh):
	try:
	os.remove(st.session_state.audio_path_zh)
	except:
	pass

	if st.session_state.audio_path_en and os.path.exists(st.session_state.audio_path_en):
	try:
	os.remove(st.session_state.audio_path_en)
	except:
	pass

	st.session_state.scenario = None
	st.session_state.scenario_zh = None
	st.session_state.story = None
	st.session_state.story_zh = None
	st.session_state.audio_generated_zh = False
	st.session_state.audio_path_zh = None
	st.session_state.audio_generated_en = False
	st.session_state.audio_path_en = None

	# Welcome message in Cantonese
	st.markdown("""
	<div class="welcome-message">
	<div class="welcome-icon">✨</div>
	<h2>歡迎嚟到故事魔法！</h2>
	<p style="font-size: 1.2rem; color: #6B7897; max-width: 500px; margin: 0 auto 30px;">
	上載一張你鍾意嘅相片，我哋嘅魔法師會幫你變出一個好好玩嘅故事！
	</p>
	<div>
	<span class="emoji">🚀</span>
	<span class="emoji">🦄</span>
	<span class="emoji">🔮</span>
	<span class="emoji">🌈</span>
	</div>
	</div>
	""", unsafe_allow_html=True)