Spaces:

slliac
/

5240-indiv-assignment

Running

App Files Files Community

slliac commited on 20 days ago

Commit

ea507cd

verified ·

1 Parent(s): 8da5beb

Update app.py

Browse files

Files changed (1) hide show

app.py +162 -41

app.py CHANGED Viewed

@@ -19,10 +19,14 @@ if 'story' not in st.session_state:
     st.session_state.story = None
 if 'story_zh' not in st.session_state:
     st.session_state.story_zh = None
-if 'audio_generated' not in st.session_state:
-    st.session_state.audio_generated = False
-if 'audio_path' not in st.session_state:
-    st.session_state.audio_path = None
 # function part
@@ -40,26 +44,42 @@ def translate_to_chinese(text):
     return translation
-# text2story
 def text2story(text):
-    # Initialize the text generation pipeline
-    generator = pipeline('text-generation', model='gpt2')
-    # Create a prompt for the story
-    prompt = f"Create a short story about this scene: {text}\n\nStory:"
-    # Generate the story
-    story = generator(prompt,
-                      max_length=100,
-                      num_return_sequences=1,
-                      temperature=0.7)[0]['generated_text']
-    # Clean up the story by removing the prompt
-    story = story.replace(prompt, "").strip()
-    return story
-# Text to audio using edge_tts for Cantonese support
 async def text2audio_cantonese(text):
     try:
         # Use Cantonese voice from edge-tts
@@ -80,7 +100,35 @@ async def text2audio_cantonese(text):
             'success': True
         }
     except Exception as e:
-        st.error(f"音頻製作出左問題: {str(e)}")
         return {
             'path': None,
             'success': False
@@ -275,6 +323,21 @@ st.markdown("""
     font-size: 4rem;
     margin-bottom: 1rem;
   }
 </style>
 """, unsafe_allow_html=True)
@@ -283,6 +346,9 @@ st.title("✨ 故事魔法")
 st.markdown("<p class='subtitle'>上載一張圖片，睇下佢點變成一個神奇嘅故事！</p>",
             unsafe_allow_html=True)
 # File uploader with Cantonese
 with st.container():
     st.subheader("揀一張靚相啦！")
@@ -304,14 +370,22 @@ if uploaded_file is not None:
         with st.container():
             st.markdown("<h3><span class='stage-icon'>🔍</span> 圖片解讀中</h3>", unsafe_allow_html=True)
             # Generate caption if not already done
             st.session_state.scenario = img2text(temp_file_path)
             # Display English caption
             st.text("英文描述: " + st.session_state.scenario)
             # Translate the caption to Chinese
             st.session_state.scenario_zh = translate_to_chinese(st.session_state.scenario)
             # Display Chinese caption
             st.text("中文描述: " + st.session_state.scenario_zh)
@@ -320,17 +394,28 @@ if uploaded_file is not None:
         with st.container():
             st.markdown("<h3><span class='stage-icon'>📝</span> 故事創作中</h3>", unsafe_allow_html=True)
             # Generate story if not already done
             st.session_state.story = text2story(st.session_state.scenario)
             # Display English story
             st.text("英文故事: " + st.session_state.story)
             # Translate the story to Chinese
             st.session_state.story_zh = translate_to_chinese(st.session_state.story)
             # Display Chinese story
             st.text("中文故事: " + st.session_state.story_zh)
     else:
         # Display saved results from session state
         with st.container():
@@ -347,22 +432,50 @@ if uploaded_file is not None:
     with st.container():
         st.markdown("<h3><span class='stage-icon'>🔊</span> 故事準備朗讀中</h3>", unsafe_allow_html=True)
-        # Play button with Cantonese text
-        if st.button("🔊 播放故事"):
-            # Only generate audio if not already done
-            if not st.session_state.audio_generated:
-                # Need to run async function with asyncio
-                audio_result = asyncio.run(text2audio_cantonese(st.session_state.story_zh))
-                st.session_state.audio_path = audio_result['path']
-                st.session_state.audio_generated = audio_result['success']
-            # Play the audio
-            if st.session_state.audio_path and os.path.exists(st.session_state.audio_path):
-                with open(st.session_state.audio_path, "rb") as audio_file:
-                    audio_bytes = audio_file.read()
-                st.audio(audio_bytes, format="audio/mp3")
-            else:
-                st.error("哎呀！再試多次啦！")
     # Cleanup: Remove the temporary file when the user is done
     if os.path.exists(temp_file_path):
@@ -370,9 +483,15 @@ if uploaded_file is not None:
 else:
     # Clear session state when no file is uploaded
     # Also clean up any temporary audio files
-    if st.session_state.audio_path and os.path.exists(st.session_state.audio_path):
         try:
-            os.remove(st.session_state.audio_path)
         except:
             pass
@@ -380,8 +499,10 @@ else:
     st.session_state.scenario_zh = None
     st.session_state.story = None
     st.session_state.story_zh = None
-    st.session_state.audio_generated = False
-    st.session_state.audio_path = None
     # Welcome message in Cantonese
     st.markdown("""

     st.session_state.story = None
 if 'story_zh' not in st.session_state:
     st.session_state.story_zh = None
+if 'audio_generated_zh' not in st.session_state:
+    st.session_state.audio_generated_zh = False
+if 'audio_path_zh' not in st.session_state:
+    st.session_state.audio_path_zh = None
+if 'audio_generated_en' not in st.session_state:
+    st.session_state.audio_generated_en = False
+if 'audio_path_en' not in st.session_state:
+    st.session_state.audio_path_en = None
 # function part
     return translation
+# text2story - using mosaicml/mpt-7b-storywriter model for better stories
 def text2story(text):
+    try:
+        # Initialize the improved story generation pipeline
+        generator = pipeline("text-generation", model="mosaicml/mpt-7b-storywriter", trust_remote_code=True)
+        # Create a prompt for the story
+        prompt = f"Write a short children's story about this scene: {text}\n\nStory: "
+        # Generate the story - limit to a smaller max_length due to model size
+        story = generator(prompt,
+                         max_length=150,
+                         num_return_sequences=1,
+                         temperature=0.7,
+                         repetition_penalty=1.2)[0]['generated_text']
+        # Clean up the story by removing the prompt
+        story = story.replace(prompt, "").strip()
+        # Trim to a reasonable length if needed
+        if len(story) > 500:
+            sentences = story.split('.')
+            trimmed_story = '.'.join(sentences[:5]) + '.'
+            return trimmed_story
+        return story
+    except Exception as e:
+        st.error(f"故事生成出問題: {str(e)}")
+        # Fallback to simpler model if the advanced one fails
+        fallback_generator = pipeline('text-generation', model='gpt2')
+        fallback_prompt = f"Create a short story about this scene: {text}\n\nStory:"
+        fallback_story = fallback_generator(fallback_prompt, max_length=100, num_return_sequences=1)[0]['generated_text']
+        return fallback_story.replace(fallback_prompt, "").strip()
+# Text to audio using edge_tts for Cantonese audio
 async def text2audio_cantonese(text):
     try:
         # Use Cantonese voice from edge-tts
             'success': True
         }
     except Exception as e:
+        st.error(f"中文音頻製作出左問題: {str(e)}")
+        return {
+            'path': None,
+            'success': False
+        }
+# Text to audio using edge_tts for English audio
+async def text2audio_english(text):
+    try:
+        # Use English voice from edge-tts
+        voice = "en-US-AriaNeural"  # Female English voice
+        # Alternative: "en-US-GuyNeural" for male voice
+        # Create a temporary file
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
+        temp_file.close()
+        # Configure edge-tts to save to the file path
+        communicate = edge_tts.Communicate(text, voice)
+        await communicate.save(temp_file.name)
+        # Return the path to the audio file
+        return {
+            'path': temp_file.name,
+            'success': True
+        }
+    except Exception as e:
+        st.error(f"English audio generation error: {str(e)}")
         return {
             'path': None,
             'success': False
     font-size: 4rem;
     margin-bottom: 1rem;
   }
+  /* Audio player container */
+  .audio-container {
+    background: white;
+    padding: 1rem;
+    border-radius: 12px;
+    margin-bottom: 1rem;
+    box-shadow: var(--shadow);
+  }
+  .audio-title {
+    font-weight: 600;
+    margin-bottom: 0.5rem;
+    color: var(--primary-color);
+  }
 </style>
 """, unsafe_allow_html=True)
 st.markdown("<p class='subtitle'>上載一張圖片，睇下佢點變成一個神奇嘅故事！</p>",
             unsafe_allow_html=True)
+# Add a progress indicator for model loading
+progress_placeholder = st.empty()
 # File uploader with Cantonese
 with st.container():
     st.subheader("揀一張靚相啦！")
         with st.container():
             st.markdown("<h3><span class='stage-icon'>🔍</span> 圖片解讀中</h3>", unsafe_allow_html=True)
+            with progress_placeholder.container():
+                st.write("正在分析圖片...")
+                progress_bar = st.progress(0)
             # Generate caption if not already done
             st.session_state.scenario = img2text(temp_file_path)
+            progress_bar.progress(33)
             # Display English caption
             st.text("英文描述: " + st.session_state.scenario)
             # Translate the caption to Chinese
+            with progress_placeholder.container():
+                st.write("正在翻譯...")
             st.session_state.scenario_zh = translate_to_chinese(st.session_state.scenario)
+            progress_bar.progress(66)
             # Display Chinese caption
             st.text("中文描述: " + st.session_state.scenario_zh)
         with st.container():
             st.markdown("<h3><span class='stage-icon'>📝</span> 故事創作中</h3>", unsafe_allow_html=True)
+            with progress_placeholder.container():
+                st.write("正在創作故事...")
             # Generate story if not already done
             st.session_state.story = text2story(st.session_state.scenario)
+            progress_bar.progress(85)
             # Display English story
             st.text("英文故事: " + st.session_state.story)
             # Translate the story to Chinese
+            with progress_placeholder.container():
+                st.write("正在翻譯故事...")
             st.session_state.story_zh = translate_to_chinese(st.session_state.story)
+            progress_bar.progress(100)
             # Display Chinese story
             st.text("中文故事: " + st.session_state.story_zh)
+            # Clear progress indicator
+            progress_placeholder.empty()
     else:
         # Display saved results from session state
         with st.container():
     with st.container():
         st.markdown("<h3><span class='stage-icon'>🔊</span> 故事準備朗讀中</h3>", unsafe_allow_html=True)
+        # Create two columns for English and Cantonese buttons
+        col1, col2 = st.columns(2)
+        # English audio button
+        with col1:
+            if st.button("🔊 Play Story in English"):
+                # Only generate audio if not already done
+                if not st.session_state.audio_generated_en:
+                    with st.spinner("Generating English audio..."):
+                        # Need to run async function with asyncio
+                        audio_result = asyncio.run(text2audio_english(st.session_state.story))
+                        st.session_state.audio_path_en = audio_result['path']
+                        st.session_state.audio_generated_en = audio_result['success']
+                # Play the audio
+                if st.session_state.audio_path_en and os.path.exists(st.session_state.audio_path_en):
+                    with open(st.session_state.audio_path_en, "rb") as audio_file:
+                        audio_bytes = audio_file.read()
+                    st.markdown("<div class='audio-container'><div class='audio-title'>English Story</div>", unsafe_allow_html=True)
+                    st.audio(audio_bytes, format="audio/mp3")
+                    st.markdown("</div>", unsafe_allow_html=True)
+                else:
+                    st.error("Sorry! Please try again.")
+        # Cantonese audio button
+        with col2:
+            if st.button("🔊 播放廣東話故事"):
+                # Only generate audio if not already done
+                if not st.session_state.audio_generated_zh:
+                    with st.spinner("正在準備廣東話語音..."):
+                        # Need to run async function with asyncio
+                        audio_result = asyncio.run(text2audio_cantonese(st.session_state.story_zh))
+                        st.session_state.audio_path_zh = audio_result['path']
+                        st.session_state.audio_generated_zh = audio_result['success']
+                # Play the audio
+                if st.session_state.audio_path_zh and os.path.exists(st.session_state.audio_path_zh):
+                    with open(st.session_state.audio_path_zh, "rb") as audio_file:
+                        audio_bytes = audio_file.read()
+                    st.markdown("<div class='audio-container'><div class='audio-title'>廣東話故事</div>", unsafe_allow_html=True)
+                    st.audio(audio_bytes, format="audio/mp3")
+                    st.markdown("</div>", unsafe_allow_html=True)
+                else:
+                    st.error("哎呀！再試多次啦！")
     # Cleanup: Remove the temporary file when the user is done
     if os.path.exists(temp_file_path):
 else:
     # Clear session state when no file is uploaded
     # Also clean up any temporary audio files
+    if st.session_state.audio_path_zh and os.path.exists(st.session_state.audio_path_zh):
+        try:
+            os.remove(st.session_state.audio_path_zh)
+        except:
+            pass
+    if st.session_state.audio_path_en and os.path.exists(st.session_state.audio_path_en):
         try:
+            os.remove(st.session_state.audio_path_en)
         except:
             pass
     st.session_state.scenario_zh = None
     st.session_state.story = None
     st.session_state.story_zh = None
+    st.session_state.audio_generated_zh = False
+    st.session_state.audio_path_zh = None
+    st.session_state.audio_generated_en = False
+    st.session_state.audio_path_en = None
     # Welcome message in Cantonese
     st.markdown("""