Spaces:

saq1b
/

podcastgen

Running

App Files Files Community

saq1b commited on Mar 7

Commit

aa19ef5

verified ·

1 Parent(s): 38a4215

Upload app.py

Browse files

Files changed (1) hide show

app.py +133 -57

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import gradio as gr
 from pydub import AudioSegment
 from google import genai
 from google.genai import types
@@ -284,7 +284,7 @@ Follow this example structure:
         try:
             if progress:
-                progress(0.3, "Generating podcast script...")
             # Add timeout to the API call
             response = await asyncio.wait_for(
@@ -306,19 +306,19 @@ Follow this example structure:
                 timeout=60  # 60 seconds timeout
             )
         except asyncio.TimeoutError:
-            raise gr.Error("The script generation request timed out. Please try again later.")
         except Exception as e:
             if "API key not valid" in str(e):
-                raise gr.Error("Invalid API key. Please provide a valid Gemini API key.")
             elif "rate limit" in str(e).lower():
-                raise gr.Error("Rate limit exceeded for the API key. Please try again later or provide your own Gemini API key.")
             else:
-                raise gr.Error(f"Failed to generate podcast script: {e}")
         print(f"Generated podcast script:\n{response.text}")
         if progress:
-            progress(0.4, "Script generated successfully!")
         return json.loads(response.text)
@@ -327,7 +327,7 @@ Follow this example structure:
         # Check file size before reading
         file_size = os.path.getsize(file_obj.name)
         if file_size > MAX_FILE_SIZE_BYTES:
-            raise gr.Error(f"File size exceeds the {MAX_FILE_SIZE_MB}MB limit. Please upload a smaller file.")
         async with aiofiles.open(file_obj.name, 'rb') as f:
             return await f.read()
@@ -356,7 +356,7 @@ Follow this example structure:
         except asyncio.TimeoutError:
             if os.path.exists(temp_filename):
                 os.remove(temp_filename)
-            raise gr.Error("Text-to-speech generation timed out. Please try with a shorter text.")
         except Exception as e:
             if os.path.exists(temp_filename):
                 os.remove(temp_filename)
@@ -364,7 +364,7 @@ Follow this example structure:
     async def combine_audio_files(self, audio_files: List[str], progress=None) -> str:
         if progress:
-            progress(0.9, "Combining audio files...")
         combined_audio = AudioSegment.empty()
         for audio_file in audio_files:
@@ -375,14 +375,14 @@ Follow this example structure:
         combined_audio.export(output_filename, format="wav")
         if progress:
-            progress(1.0, "Podcast generated successfully!")
         return output_filename
     async def generate_podcast(self, input_text: str, language: str, speaker1: str, speaker2: str, api_key: str, file_obj=None, progress=None) -> str:
         try:
             if progress:
-                progress(0.1, "Starting podcast generation...")
             # Set overall timeout for the entire process
             return await asyncio.wait_for(
@@ -390,18 +390,18 @@ Follow this example structure:
                 timeout=600  # 10 minutes total timeout
             )
         except asyncio.TimeoutError:
-            raise gr.Error("The podcast generation process timed out. Please try with shorter text or try again later.")
         except Exception as e:
-            raise gr.Error(f"Error generating podcast: {str(e)}")
     async def _generate_podcast_internal(self, input_text: str, language: str, speaker1: str, speaker2: str, api_key: str, file_obj=None, progress=None) -> str:
         if progress:
-            progress(0.2, "Generating podcast script...")
         podcast_json = await self.generate_script(input_text, language, api_key, file_obj, progress)
         if progress:
-            progress(0.5, "Converting text to speech...")
         # Process TTS in batches to prevent overwhelming the system
         audio_files = []
@@ -410,7 +410,7 @@ Follow this example structure:
         for i, item in enumerate(podcast_json['podcast']):
             if progress:
                 current_progress = 0.5 + (0.4 * (i / total_lines))
-                progress(current_progress, f"Processing speech {i+1}/{total_lines}...")
             try:
                 audio_file = await self.tts_generate(item['line'], item['speaker'], speaker1, speaker2)
@@ -420,12 +420,12 @@ Follow this example structure:
                 for file in audio_files:
                     if os.path.exists(file):
                         os.remove(file)
-                raise gr.Error(f"Error generating speech for line {i+1}: {str(e)}")
         combined_audio = await self.combine_audio_files(audio_files, progress)
         return combined_audio
-async def process_input(input_text: str, input_file, language: str, speaker1: str, speaker2: str, api_key: str = "", progress=gr.Progress()) -> str:
     start_time = time.time()
     voice_names = {
@@ -443,12 +443,13 @@ async def process_input(input_text: str, input_file, language: str, speaker1: st
     speaker2 = voice_names[speaker2]
     try:
-        progress(0.05, "Processing input...")
         if not api_key:
             api_key = os.getenv("GENAI_API_KEY")
             if not api_key:
-                raise gr.Error("No API key provided. Please provide a Gemini API key.")
         podcast_generator = PodcastGenerator()
         podcast = await podcast_generator.generate_podcast(input_text, language, speaker1, speaker2, api_key, input_file, progress)
@@ -461,18 +462,25 @@ async def process_input(input_text: str, input_file, language: str, speaker1: st
         # Ensure we show a user-friendly error
         error_msg = str(e)
         if "rate limit" in error_msg.lower():
-            raise gr.Error("Rate limit exceeded. Please try again later or use your own API key.")
         elif "timeout" in error_msg.lower():
-            raise gr.Error("The request timed out. This could be due to server load or the length of your input. Please try again with shorter text.")
         else:
-            raise gr.Error(f"Error: {error_msg}")
-iface = gr.Interface(
-    fn=process_input,
-    inputs=[
-        gr.Textbox(label="Input Text"),
-        gr.File(label="Or Upload a PDF or TXT file"),
-        gr.Dropdown(label="Language", choices=[
             "Auto Detect",
             "Afrikaans", "Albanian", "Amharic", "Arabic", "Armenian", "Azerbaijani",
             "Bahasa Indonesian", "Bangla", "Basque", "Bengali", "Bosnian", "Bulgarian",
@@ -487,20 +495,10 @@ iface = gr.Interface(
             "Slovak", "Slovene", "Somali", "Spanish", "Sundanese", "Swahili",
             "Swedish", "Tamil", "Telugu", "Thai", "Turkish", "Ukrainian", "Urdu",
             "Uzbek", "Vietnamese", "Welsh", "Zulu"
-        ],
-        value="Auto Detect"),
-        gr.Dropdown(label="Speaker 1 Voice", choices=[
-            "Andrew - English (United States)",
-            "Ava - English (United States)",
-            "Brian - English (United States)",
-            "Emma - English (United States)",
-            "Florian - German (Germany)",
-            "Seraphina - German (Germany)",
-            "Remy - French (France)",
-            "Vivienne - French (France)"
-        ],
-        value="Andrew - English (United States)"),
-        gr.Dropdown(label="Speaker 2 Voice", choices=[
             "Andrew - English (United States)",
             "Ava - English (United States)",
             "Brian - English (United States)",
@@ -509,17 +507,95 @@ iface = gr.Interface(
             "Seraphina - German (Germany)",
             "Remy - French (France)",
             "Vivienne - French (France)"
-        ],
-        value="Ava - English (United States)"),
-        gr.Textbox(label="Your Gemini API Key (Optional) - In case you are getting rate limited"),
-    ],
-    outputs=[
-        gr.Audio(label="Generated Podcast Audio")
-    ],
-    title="PodcastGen 🎙️",
-    description="Generate a 2-speaker podcast from text input or documents!",
-    allow_flagging="never",
-)
 if __name__ == "__main__":
-    iface.launch()

+import streamlit as st
 from pydub import AudioSegment
 from google import genai
 from google.genai import types
         try:
             if progress:
+                progress.progress(0.3, "Generating podcast script...")
             # Add timeout to the API call
             response = await asyncio.wait_for(
                 timeout=60  # 60 seconds timeout
             )
         except asyncio.TimeoutError:
+            raise Exception("The script generation request timed out. Please try again later.")
         except Exception as e:
             if "API key not valid" in str(e):
+                raise Exception("Invalid API key. Please provide a valid Gemini API key.")
             elif "rate limit" in str(e).lower():
+                raise Exception("Rate limit exceeded for the API key. Please try again later or provide your own Gemini API key.")
             else:
+                raise Exception(f"Failed to generate podcast script: {e}")
         print(f"Generated podcast script:\n{response.text}")
         if progress:
+            progress.progress(0.4, "Script generated successfully!")
         return json.loads(response.text)
         # Check file size before reading
         file_size = os.path.getsize(file_obj.name)
         if file_size > MAX_FILE_SIZE_BYTES:
+            raise Exception(f"File size exceeds the {MAX_FILE_SIZE_MB}MB limit. Please upload a smaller file.")
         async with aiofiles.open(file_obj.name, 'rb') as f:
             return await f.read()
         except asyncio.TimeoutError:
             if os.path.exists(temp_filename):
                 os.remove(temp_filename)
+            raise Exception("Text-to-speech generation timed out. Please try with a shorter text.")
         except Exception as e:
             if os.path.exists(temp_filename):
                 os.remove(temp_filename)
     async def combine_audio_files(self, audio_files: List[str], progress=None) -> str:
         if progress:
+            progress.progress(0.9, "Combining audio files...")
         combined_audio = AudioSegment.empty()
         for audio_file in audio_files:
         combined_audio.export(output_filename, format="wav")
         if progress:
+            progress.progress(1.0, "Podcast generated successfully!")
         return output_filename
     async def generate_podcast(self, input_text: str, language: str, speaker1: str, speaker2: str, api_key: str, file_obj=None, progress=None) -> str:
         try:
             if progress:
+                progress.progress(0.1, "Starting podcast generation...")
             # Set overall timeout for the entire process
             return await asyncio.wait_for(
                 timeout=600  # 10 minutes total timeout
             )
         except asyncio.TimeoutError:
+            raise Exception("The podcast generation process timed out. Please try with shorter text or try again later.")
         except Exception as e:
+            raise Exception(f"Error generating podcast: {str(e)}")
     async def _generate_podcast_internal(self, input_text: str, language: str, speaker1: str, speaker2: str, api_key: str, file_obj=None, progress=None) -> str:
         if progress:
+            progress.progress(0.2, "Generating podcast script...")
         podcast_json = await self.generate_script(input_text, language, api_key, file_obj, progress)
         if progress:
+            progress.progress(0.5, "Converting text to speech...")
         # Process TTS in batches to prevent overwhelming the system
         audio_files = []
         for i, item in enumerate(podcast_json['podcast']):
             if progress:
                 current_progress = 0.5 + (0.4 * (i / total_lines))
+                progress.progress(current_progress, f"Processing speech {i+1}/{total_lines}...")
             try:
                 audio_file = await self.tts_generate(item['line'], item['speaker'], speaker1, speaker2)
                 for file in audio_files:
                     if os.path.exists(file):
                         os.remove(file)
+                raise Exception(f"Error generating speech for line {i+1}: {str(e)}")
         combined_audio = await self.combine_audio_files(audio_files, progress)
         return combined_audio
+async def process_input(input_text: str, input_file, language: str, speaker1: str, speaker2: str, api_key: str = "", progress=None) -> str:
     start_time = time.time()
     voice_names = {
     speaker2 = voice_names[speaker2]
     try:
+        if progress:
+            progress.progress(0.05, "Processing input...")
         if not api_key:
             api_key = os.getenv("GENAI_API_KEY")
             if not api_key:
+                raise Exception("No API key provided. Please provide a Gemini API key.")
         podcast_generator = PodcastGenerator()
         podcast = await podcast_generator.generate_podcast(input_text, language, speaker1, speaker2, api_key, input_file, progress)
         # Ensure we show a user-friendly error
         error_msg = str(e)
         if "rate limit" in error_msg.lower():
+            raise Exception("Rate limit exceeded. Please try again later or use your own API key.")
         elif "timeout" in error_msg.lower():
+            raise Exception("The request timed out. This could be due to server load or the length of your input. Please try again with shorter text.")
         else:
+            raise Exception(f"Error: {error_msg}")
+# Streamlit UI
+def main():
+    st.set_page_config(page_title="PodcastGen 🎙️", page_icon="🎙️", layout="wide")
+    st.title("PodcastGen 🎙️")
+    st.write("Generate a 2-speaker podcast from text input or documents!")
+    with st.sidebar:
+        st.header("Configuration")
+        api_key = st.text_input("Your Gemini API Key (Optional)", type="password",
+                              help="In case you are getting rate limited")
+        language_options = [
             "Auto Detect",
             "Afrikaans", "Albanian", "Amharic", "Arabic", "Armenian", "Azerbaijani",
             "Bahasa Indonesian", "Bangla", "Basque", "Bengali", "Bosnian", "Bulgarian",
             "Slovak", "Slovene", "Somali", "Spanish", "Sundanese", "Swahili",
             "Swedish", "Tamil", "Telugu", "Thai", "Turkish", "Ukrainian", "Urdu",
             "Uzbek", "Vietnamese", "Welsh", "Zulu"
+        ]
+        language = st.selectbox("Language", language_options, index=0)
+        voice_options = [
             "Andrew - English (United States)",
             "Ava - English (United States)",
             "Brian - English (United States)",
             "Seraphina - German (Germany)",
             "Remy - French (France)",
             "Vivienne - French (France)"
+        ]
+        speaker1 = st.selectbox("Speaker 1 Voice", voice_options, index=0)
+        speaker2 = st.selectbox("Speaker 2 Voice", voice_options, index=1)
+    col1, col2 = st.columns([2, 1])
+    with col1:
+        input_text = st.text_area("Input Text", height=250)
+    with col2:
+        uploaded_file = st.file_uploader("Or Upload a PDF or TXT file", type=["pdf", "txt"])
+    if st.button("Generate Podcast"):
+        if not input_text and not uploaded_file:
+            st.error("Please provide either input text or upload a file.")
+            return
+        # Create a progress bar for the async operation
+        progress_bar = st.progress(0)
+        status_text = st.empty()
+        # Create a progress wrapper for compatibility with the existing code
+        class StreamlitProgress:
+            def progress(self, value, text=None):
+                progress_bar.progress(value)
+                if text:
+                    status_text.text(text)
+        try:
+            # Prepare file if uploaded
+            file_obj = None
+            if uploaded_file:
+                # Save the uploaded file to a temporary location
+                file_path = f"temp_upload_{uuid.uuid4()}{os.path.splitext(uploaded_file.name)[1]}"
+                with open(file_path, "wb") as f:
+                    f.write(uploaded_file.getbuffer())
+                class FileWrapper:
+                    def __init__(self, path, name):
+                        self.name = name
+                        self.path = path
+                    @property
+                    def name(self):
+                        return self._name
+                    @name.setter
+                    def name(self, value):
+                        self._name = value
+                file_obj = FileWrapper(file_path, uploaded_file.name)
+                file_obj.name = file_path  # Set the path as the name for proper file reading
+            # Run the async function in a new event loop
+            progress_wrapper = StreamlitProgress()
+            audio_file = asyncio.run(process_input(
+                input_text,
+                file_obj,
+                language,
+                speaker1,
+                speaker2,
+                api_key,
+                progress_wrapper
+            ))
+            # Display the audio
+            st.subheader("Generated Podcast")
+            st.audio(audio_file, format="audio/wav")
+            # Provide a download button
+            with open(audio_file, "rb") as f:
+                audio_bytes = f.read()
+            st.download_button(
+                label="Download Podcast",
+                data=audio_bytes,
+                file_name="podcast.wav",
+                mime="audio/wav"
+            )
+            # Clean up the temporary file
+            if file_obj:
+                try:
+                    os.remove(file_path)
+                except:
+                    pass
+        except Exception as e:
+            st.error(str(e))
 if __name__ == "__main__":
+    main()