Spaces:
Sleeping
Sleeping
File size: 13,267 Bytes
41836cf fe073b9 2252c2a fc8a45b 3096151 8da5beb fb4c1af c67a79d 2c87af3 ea507cd 2c87af3 fb4c1af c67a79d 3f8c2b6 e13fdef 3f8c2b6 beaeb40 fb4c1af ea507cd beaeb40 2252c2a 89d0588 2252c2a beaeb40 6256080 fb4c1af 61eea33 c67a79d ea507cd fc8a45b f125c89 fc8a45b 8da5beb fc8a45b 8da5beb fc8a45b 8da5beb f125c89 8da5beb fc8a45b f125c89 ea507cd fc8a45b 8da5beb fc8a45b fb4c1af c67a79d aaf5130 6767be7 c67a79d 61eea33 c67a79d 3f8c2b6 2dd1a6b 6ac31c1 ea507cd 3f8c2b6 aaf5130 0d1ef14 f2e4210 41836cf 2c87af3 41836cf 2c87af3 41836cf aaf5130 da51bc8 c67a79d 2c87af3 8fef723 2c87af3 ea507cd 2c87af3 ea507cd 2c87af3 ea507cd 2c87af3 ea507cd 2c87af3 3ce06e2 2c87af3 ea507cd 2c87af3 ea507cd 2c87af3 ea507cd 2c87af3 ea507cd 2c87af3 ea507cd 2c87af3 124f255 2c87af3 124f255 2c87af3 c67a79d aaf5130 124f255 3f8c2b6 ea507cd 8b47931 ea507cd 124f255 ea507cd 2c87af3 ea507cd f125c89 2c87af3 c67a79d 2c87af3 8da5beb ea507cd 8da5beb ea507cd 8da5beb 2c87af3 ea507cd 2dd1a6b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 |
import streamlit as st
from transformers import pipeline
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline , AutoModelForCausalLM , AutoProcessor, AutoModel , AutoModelForSequenceClassification , TextGenerationPipeline, GPT2LMHeadModel
import edge_tts
import asyncio
import os
import io
import tempfile
# Initialize session state for storing data
if 'scenario' not in st.session_state:
st.session_state.scenario = None
if 'scenario_zh' not in st.session_state:
st.session_state.scenario_zh = None
if 'story' not in st.session_state:
st.session_state.story = None
if 'story_zh' not in st.session_state:
st.session_state.story_zh = None
if 'audio_generated_zh' not in st.session_state:
st.session_state.audio_generated_zh = False
if 'audio_path_zh' not in st.session_state:
st.session_state.audio_path_zh = None
if 'audio_generated_en' not in st.session_state:
st.session_state.audio_generated_en = False
if 'audio_path_en' not in st.session_state:
st.session_state.audio_path_en = None
# function part
# img2text
def img2text(url):
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
text = image_to_text_model(url)[0]["generated_text"]
return text
# Translation function EN to ZH
def translate_to_chinese(text):
translator = pipeline("translation", model="steve-tong/opus-mt-en-zh-hk")
translation = translator(text)[0]["translation_text"]
return translation
# Generate story acccording to text
def text2story(text):
try:
#text2story - using aspis/gpt2-genre-story-generation for generated better stories
model_name = "aspis/gpt2-genre-story-generation"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
generator = TextGenerationPipeline(model=model, tokenizer=tokenizer)
# Input should be of format "<BOS> <Genre token> Optional starter text"
input_prompt = f"<BOS> <adventure> {text}"
story = generator(input_prompt, max_length=100, do_sample=True,
repetition_penalty=1.5, temperature=1.2,
top_p=0.95, top_k=50)
return story[0]['generated_text'].strip('<BOS> <adventure>')
except Exception as e:
# just Fallback only used openai-community/gpt2 if the advanced one fails
fallback_generator = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
fallback_prompt = f"{text}"
tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
inputs = tokenizer(fallback_prompt, return_tensors="pt")
fallback_story = fallback_generator.generate(
inputs.input_ids,
min_length=50,
max_new_tokens=100,
temperature=0.7,
top_p=0.9,
top_k=40,
repetition_penalty=1.2,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
fallback_story = tokenizer.decode(fallback_story[0], skip_special_tokens=True)
return fallback_story
def load_css(css_file):
with open(css_file) as f:
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
# Text to audio using edge_tts for Cantonese audio
async def text2audio_cantonese(text):
try:
# Use Cantonese voice from edge-tts
voice = "zh-HK-HiuMaanNeural" # Female Cantonese voice
# Alternative: "zh-HK-WanLungNeural" for male voice
# Create a temporary file
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
temp_file.close()
# Configure edge-tts to save to the file path
communicate = edge_tts.Communicate(text, voice)
await communicate.save(temp_file.name)
# Return the path to the audio file
return {
'path': temp_file.name,
'success': True
}
except Exception as e:
st.error(f"中文音頻製作出左問題: {str(e)}")
return {
'path': None,
'success': False
}
# Text to audio using edge_tts for English audio
async def text2audio_english(text):
try:
# Use English voice from edge-tts
voice = "en-US-AriaNeural" # Female English voice
# Alternative: "en-US-GuyNeural" for male voice
# Create a temporary file
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
temp_file.close()
# Configure edge-tts to save to the file path
communicate = edge_tts.Communicate(text, voice)
await communicate.save(temp_file.name)
# Return the path to the audio file
return {
'path': temp_file.name,
'success': True
}
except Exception as e:
st.error(f"English audio generation error: {str(e)}")
return {
'path': None,
'success': False
}
# Apply custom CSS for modern, stylish kid-friendly UI
st.set_page_config(page_title="歡迎嚟到 ISOM 5240 - 故事魔法師!", page_icon="✨", layout="wide")
load_css('styles.css')
# App header with Cantonese
st.title("")
st.markdown("<div class='welcome-message'> <div class='banner-container'><div class='magician-banner'><div class='magic-hat'>🎩</div><div class='magic-elements wand-left'>🪄</div><div class='magic-elements wand-right'>🪄</div><span class='sparkle spark1'>✨</span><span class='sparkle spark2'>✨</span><span class='sparkle spark3'>✨</span><span class='sparkle spark4'>✨</span><span class='sparkle spark5'>✨</span><h1 class='title' style='font-color: white !important;'>歡迎嚟到 ISOM 5240 - 故事魔法師!</h1></div></div><p style='font-size: 1.2rem; color: #6B7897; max-width: 500px; margin: 0 auto 30px;'>上載一張你鍾意嘅相片,我哋嘅魔法師會幫你變出一個好好玩嘅故事!</p><div><span class='emoji'>🚀</span><span class='emoji'>🦄</span> <span class='emoji'>🔮</span><span class='emoji'>🌈</span></div></div>", unsafe_allow_html=True)
# Add a progress indicator for model loading
progress_placeholder = st.empty()
# File uploader with Cantonese
with st.container():
uploaded_file = st.file_uploader(
"",
type=["jpg", "jpeg", "png"], # Limit file types
key="upload"
)
if uploaded_file is not None:
st.success("上載成功!")
else:
st.info("請選擇一張畫作上載,格式必須係 JPG、JPEG 或 PNG! 最大 200 MB !")
if uploaded_file is not None:
# Save uploaded file
bytes_data = uploaded_file.getvalue()
temp_file_path = uploaded_file.name
with open(temp_file_path, "wb") as file:
file.write(bytes_data)
# Display image
st.image(uploaded_file, use_container_width=True)
# Reset session state if a new file is uploaded (detect by checking if there's no scenario yet)
if st.session_state.scenario is None:
# Stage 1: Image to Text
with st.container():
st.markdown("<h3><span class='stage-icon'>🔍</span> 以下係我哋嘅魔術師覺得你嘅畫作係關於 : </h3>", unsafe_allow_html=True)
with progress_placeholder.container():
st.write("正在分析圖片...")
progress_bar = st.progress(0)
# Generate caption if not already done
st.session_state.scenario = img2text(temp_file_path)
progress_bar.progress(33)
# Display English caption
st.text("英文描述: " + st.session_state.scenario)
# Translate the caption to Chinese
with progress_placeholder.container():
st.write("正在翻譯...")
st.session_state.scenario_zh = translate_to_chinese(st.session_state.scenario)
progress_bar.progress(66)
# Display Chinese caption
st.text("中文描述: " + st.session_state.scenario_zh)
# Stage 2: Text to Story
with st.container():
st.markdown("<h3><span class='stage-icon'>📝</span> 以下係我哋嘅魔術師正根據你嘅畫作嘅故事: </h3>", unsafe_allow_html=True)
with progress_placeholder.container():
st.write("正在創作故事...")
# Generate story if not already done
st.session_state.story = text2story(st.session_state.scenario)
progress_bar.progress(85)
# Display English story
st.text("英文故事: " + st.session_state.story)
# Translate the story to Chinese
with progress_placeholder.container():
st.write("正在翻譯故事...")
st.session_state.story_zh = translate_to_chinese(st.session_state.story)
progress_bar.progress(100)
# Display Chinese story
st.text("中文故事: " + st.session_state.story_zh)
# Clear progress indicator
progress_placeholder.empty()
else:
# Display saved results from session state
with st.container():
st.markdown("<h3><span class='stage-icon'>🔍</span> 以下係我哋嘅魔術師覺得你嘅畫作係關於 : </h3>", unsafe_allow_html=True)
st.text("英文描述: " + st.session_state.scenario)
st.text("中文描述: " + st.session_state.scenario_zh)
with st.container():
st.markdown("<h3><span class='stage-icon'>📝</span> 以下係我哋嘅魔術師正根據你嘅畫作嘅故事: </h3>", unsafe_allow_html=True)
st.text("英文故事: " + st.session_state.story)
st.text("中文故事: " + st.session_state.story_zh)
# Stage 3: Story to Audio data
with st.container():
st.markdown("<h3><span class='stage-icon'>🔊</span> 你嘅故事準備好俾我哋嘅魔術師讀出嚟喇!</h3>", unsafe_allow_html=True)
# Create two columns for English and Cantonese buttons
col1, col2 = st.columns(2)
# English audio button
with col1:
if st.button("🔊 Play Story in English that read aloud by our magician !"):
# Only generate audio if not already done
if not st.session_state.audio_generated_en:
# Need to run async function with asyncio
audio_result = asyncio.run(text2audio_english(st.session_state.story))
st.session_state.audio_path_en = audio_result['path']
st.session_state.audio_generated_en = audio_result['success']
# Play the audio
if st.session_state.audio_path_en and os.path.exists(st.session_state.audio_path_en):
with open(st.session_state.audio_path_en, "rb") as audio_file:
audio_bytes = audio_file.read()
st.audio(audio_bytes, format="audio/mp3")
else:
st.error("Sorry! Please try again.")
# Cantonese audio button
with col2:
if st.button("🔊 你嘅故事已經準備好俾我哋嘅魔術師用廣東話讀出嚟喇!"):
# Only generate audio if not already done
if not st.session_state.audio_generated_zh:
# Need to run async function with asyncio
audio_result = asyncio.run(text2audio_cantonese(st.session_state.story_zh))
st.session_state.audio_path_zh = audio_result['path']
st.session_state.audio_generated_zh = audio_result['success']
# Play the audio
if st.session_state.audio_path_zh and os.path.exists(st.session_state.audio_path_zh):
with open(st.session_state.audio_path_zh, "rb") as audio_file:
audio_bytes = audio_file.read()
st.audio(audio_bytes, format="audio/mp3")
else:
st.error("哎呀!再試多次啦!")
# Cleanup: Remove the temporary file when the user is done
if os.path.exists(temp_file_path):
os.remove(temp_file_path)
else:
# Clear session state when no file is uploaded
# Also clean up any temporary audio files
if st.session_state.audio_path_zh and os.path.exists(st.session_state.audio_path_zh):
try:
os.remove(st.session_state.audio_path_zh)
except:
pass
if st.session_state.audio_path_en and os.path.exists(st.session_state.audio_path_en):
try:
os.remove(st.session_state.audio_path_en)
except:
pass
st.session_state.scenario = None
st.session_state.scenario_zh = None
st.session_state.story = None
st.session_state.story_zh = None
st.session_state.audio_generated_zh = False
st.session_state.audio_path_zh = None
st.session_state.audio_generated_en = False
st.session_state.audio_path_en = None
|