single-podcast

Sleeping

App Files Files Community

aheedsajid commited on Nov 22, 2024

Commit

e19b042

verified ·

1 Parent(s): d4d0839

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -108

app.py CHANGED Viewed

@@ -10,14 +10,11 @@ import uuid
 from dotenv import load_dotenv
 import re
 load_dotenv()
 def sanitize_filename(filename):
     """Convert a string to a safe filename by removing special characters and spaces"""
     safe_filename = re.sub(r'[^a-zA-Z0-9_-]', '', filename.replace(' ', '_'))
     return safe_filename.lower()[:50]
 async def get_voices():
@@ -28,7 +25,6 @@ async def get_voices():
         if voice["Locale"].startswith(("en-US", "en-GB", "en-AU", "en-CA", "en-IN"))
     ]
     formatted_voices = [
         f"{voice['ShortName']} ({voice['Gender']}, {voice['Locale']})"
         for voice in english_voices
@@ -44,87 +40,54 @@ async def generate_audio(text, voice, filename):
     communicate = edge_tts.Communicate(text, extract_voice_name(voice))
     await communicate.save(filename)
-async def create_podcast_versions(data, speaker1_name, speaker2_name, speaker1_voice, speaker2_voice, title):
     session_id = str(uuid.uuid4())
     temp_dir = f'temp_{session_id}'
     safe_title = sanitize_filename(title)
     if not os.path.exists(temp_dir):
         os.makedirs(temp_dir)
     try:
-        speaker1_version = AudioSegment.empty()
-        speaker2_version = AudioSegment.empty()
         combined_version = AudioSegment.empty()
         for i, entry in enumerate(data['conversation']):
-            if 'speaker1text' in entry:
-                temp_file = f'{temp_dir}/speaker1_{i}.mp3'
-                await generate_audio(entry['speaker1text'], speaker1_voice, temp_file)
                 audio = AudioSegment.from_file(temp_file)
-                speaker1_version += audio
-                speaker2_version += AudioSegment.silent(duration=len(audio))
                 combined_version += audio
                 os.remove(temp_file)
-            if 'speaker2text' in entry:
-                temp_file = f'{temp_dir}/speaker2_{i}.mp3'
-                await generate_audio(entry['speaker2text'], speaker2_voice, temp_file)
-                audio = AudioSegment.from_file(temp_file)
-                speaker2_version += audio
-                speaker1_version += AudioSegment.silent(duration=len(audio))
-                combined_version += audio
-                os.remove(temp_file)
-        speaker1_path = f"{safe_title}_{speaker1_name.lower()}_only.mp3"
-        speaker2_path = f"{safe_title}_{speaker2_name.lower()}_only.mp3"
-        combined_path = f"{safe_title}_combined.mp3"
-        speaker1_version.export(speaker1_path, format="mp3")
-        speaker2_version.export(speaker2_path, format="mp3")
-        combined_version.export(combined_path, format="mp3")
-        return speaker1_path, speaker2_path, combined_path, temp_dir
     except Exception as e:
         if os.path.exists(temp_dir):
             shutil.rmtree(temp_dir)
         raise e
-def generate_podcast(title, channel_name, speaker1_name, speaker2_name, speaker1_voice, speaker2_voice):
     try:
-        if not all([title, channel_name, speaker1_name, speaker2_name, speaker1_voice, speaker2_voice]):
             raise ValueError("All fields must be filled out")
         client = Client(os.getenv('API_URL'))
         result = client.predict(
             message=f"""{os.getenv('API_MESSAGE')} {{
               "title": "{title}",
               "channel": "{channel_name}",
-              "speaker1": "{speaker1_name}",
-              "speaker2": "{speaker2_name}",
               "conversation": [
                 {{
-                  "speaker1text": ""
-                }},
-                {{
-                  "speaker2text": ""
                 }}
               ]
             }}
-            give 36 sentences for both.
             """,
             request=os.getenv('API_REQUEST'),
             param_3=0.5,
@@ -134,12 +97,9 @@ def generate_podcast(title, channel_name, speaker1_name, speaker2_name, speaker1
             api_name="/chat"
         )
         try:
             podcast_data = json.loads(result)
         except json.JSONDecodeError:
             json_start = result.find('```') + 3
             json_end = result.rfind('```')
@@ -151,44 +111,28 @@ def generate_podcast(title, channel_name, speaker1_name, speaker2_name, speaker1
             else:
                 raise ValueError("Could not parse JSON from response")
-        speaker1_path, speaker2_path, combined_path, temp_dir = asyncio.run(
-            create_podcast_versions(
                 podcast_data,
-                speaker1_name,
-                speaker2_name,
-                speaker1_voice,
-                speaker2_voice,
                 title
             )
         )
         if os.path.exists(temp_dir):
             shutil.rmtree(temp_dir)
-        return [
-            speaker1_path,
-            speaker2_path,
-            combined_path,
-            podcast_data
-        ]
     except Exception as e:
-        return [
-            None,
-            None,
-            None,
-            f"Error: {str(e)}"
-        ]
 with gr.Blocks(theme=gr.themes.Soft()) as interface:
     available_voices = asyncio.run(get_voices())
     gr.Markdown("# Easy Podcast")
-    gr.Markdown("Generate a podcast conversation between two speakers on any topic. Choose voices and customize speaker details to create your perfect podcast.<br>To use elevelabs voices or cloned voices, or to automate the podcast video creation with avatar contact me at aheedsajid@gmail.com<br>Support me USDT (TRC-20) (TAe7hsSVWtMEYz3G5V1UiUdYPQVqm28bKx)")
     with gr.Row():
         with gr.Column():
@@ -203,43 +147,25 @@ with gr.Blocks(theme=gr.themes.Soft()) as interface:
                 value="WeePakistan",
                 show_label=True
             )
         with gr.Column():
-            speaker1_name = gr.Textbox(
-                label="First Speaker Name",
                 placeholder="e.g., John",
                 value="Andrew",
                 show_label=True
             )
-            speaker2_name = gr.Textbox(
-                label="Second Speaker Name",
-                placeholder="e.g., Sarah",
-                value="Priya",
-                show_label=True
-            )
-    with gr.Row():
-        with gr.Column():
-            speaker1_voice = gr.Dropdown(
                 choices=available_voices,
                 value=next((v for v in available_voices if "Andrew" in v), available_voices[0]),
-                label="First Speaker Voice",
-                info="Select voice for the first speaker"
-            )
-        with gr.Column():
-            speaker2_voice = gr.Dropdown(
-                choices=available_voices,
-                value=next((v for v in available_voices if "Ava" in v), available_voices[0]),
-                label="Second Speaker Voice",
-                info="Select voice for the second speaker"
             )
     generate_btn = gr.Button("Generate Podcast", variant="primary")
-    with gr.Row():
-        speaker1_audio = gr.Audio(label="First Speaker Audio")
-        speaker2_audio = gr.Audio(label="Second Speaker Audio")
-        combined_audio = gr.Audio(label="Combined Audio")
     conversation_json = gr.JSON(label="Generated Conversation")
     generate_btn.click(
@@ -247,15 +173,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as interface:
         inputs=[
             title,
             channel_name,
-            speaker1_name,
-            speaker2_name,
-            speaker1_voice,
-            speaker2_voice
         ],
         outputs=[
-            speaker1_audio,
-            speaker2_audio,
-            combined_audio,
             conversation_json
         ]
     )

 from dotenv import load_dotenv
 import re
 load_dotenv()
 def sanitize_filename(filename):
     """Convert a string to a safe filename by removing special characters and spaces"""
     safe_filename = re.sub(r'[^a-zA-Z0-9_-]', '', filename.replace(' ', '_'))
     return safe_filename.lower()[:50]
 async def get_voices():
         if voice["Locale"].startswith(("en-US", "en-GB", "en-AU", "en-CA", "en-IN"))
     ]
     formatted_voices = [
         f"{voice['ShortName']} ({voice['Gender']}, {voice['Locale']})"
         for voice in english_voices
     communicate = edge_tts.Communicate(text, extract_voice_name(voice))
     await communicate.save(filename)
+async def create_podcast_version(data, speaker_name, speaker_voice, title):
     session_id = str(uuid.uuid4())
     temp_dir = f'temp_{session_id}'
     safe_title = sanitize_filename(title)
     if not os.path.exists(temp_dir):
         os.makedirs(temp_dir)
     try:
         combined_version = AudioSegment.empty()
         for i, entry in enumerate(data['conversation']):
+            if 'speakertext' in entry:
+                temp_file = f'{temp_dir}/speaker_{i}.mp3'
+                await generate_audio(entry['speakertext'], speaker_voice, temp_file)
                 audio = AudioSegment.from_file(temp_file)
                 combined_version += audio
                 os.remove(temp_file)
+        output_path = f"{safe_title}_{speaker_name.lower()}.mp3"
+        combined_version.export(output_path, format="mp3")
+        return output_path, temp_dir
     except Exception as e:
         if os.path.exists(temp_dir):
             shutil.rmtree(temp_dir)
         raise e
+def generate_podcast(title, channel_name, speaker_name, speaker_voice):
     try:
+        if not all([title, channel_name, speaker_name, speaker_voice]):
             raise ValueError("All fields must be filled out")
         client = Client(os.getenv('API_URL'))
         result = client.predict(
             message=f"""{os.getenv('API_MESSAGE')} {{
               "title": "{title}",
               "channel": "{channel_name}",
+              "speaker": "{speaker_name}",
               "conversation": [
                 {{
+                  "speakertext": ""
                 }}
               ]
             }}
+            give 36 sentences.
             """,
             request=os.getenv('API_REQUEST'),
             param_3=0.5,
             api_name="/chat"
         )
         try:
             podcast_data = json.loads(result)
         except json.JSONDecodeError:
             json_start = result.find('```') + 3
             json_end = result.rfind('```')
             else:
                 raise ValueError("Could not parse JSON from response")
+        audio_path, temp_dir = asyncio.run(
+            create_podcast_version(
                 podcast_data,
+                speaker_name,
+                speaker_voice,
                 title
             )
         )
         if os.path.exists(temp_dir):
             shutil.rmtree(temp_dir)
+        return [audio_path, podcast_data]
     except Exception as e:
+        return [None, f"Error: {str(e)}"]
 with gr.Blocks(theme=gr.themes.Soft()) as interface:
     available_voices = asyncio.run(get_voices())
     gr.Markdown("# Easy Podcast")
+    gr.Markdown("Generate a podcast monologue on any topic. Choose a voice and customize speaker details to create your perfect podcast.<br>To use elevelabs voices or cloned voices, or to automate the podcast video creation with avatar contact me at aheedsajid@gmail.com<br>Support me USDT (TRC-20) (TAe7hsSVWtMEYz3G5V1UiUdYPQVqm28bKx)")
     with gr.Row():
         with gr.Column():
                 value="WeePakistan",
                 show_label=True
             )
+    with gr.Row():
         with gr.Column():
+            speaker_name = gr.Textbox(
+                label="Speaker Name",
                 placeholder="e.g., John",
                 value="Andrew",
                 show_label=True
             )
+            speaker_voice = gr.Dropdown(
                 choices=available_voices,
                 value=next((v for v in available_voices if "Andrew" in v), available_voices[0]),
+                label="Speaker Voice",
+                info="Select voice for the speaker"
             )
     generate_btn = gr.Button("Generate Podcast", variant="primary")
+    audio_output = gr.Audio(label="Generated Audio")
     conversation_json = gr.JSON(label="Generated Conversation")
     generate_btn.click(
         inputs=[
             title,
             channel_name,
+            speaker_name,
+            speaker_voice
         ],
         outputs=[
+            audio_output,
             conversation_json
         ]
     )