Spaces:

Pixeltable
/

Document-to-Audio-Synthesis

Sleeping

App Files Files Community

PierreBrunelle commited on Oct 23, 2024

Commit

096ae89

verified ·

1 Parent(s): bca3d41

Update src/interface.py

Browse files

Files changed (1) hide show

src/interface.py +50 -55

src/interface.py CHANGED Viewed

@@ -7,12 +7,7 @@ SYNTHESIS_MODES = {
         "styles": ["Technical", "Narrative", "Instructional", "Descriptive"],
         "default_temp": 0.7,
         "default_chunks": 300,
-        "system_prompt": """Convert this content into clear narration.
-            Format:
-            - Clear sentence structures
-            - Natural pauses (...)
-            - Term definitions when needed
-            - Proper transitions"""
     },
     "podcast": {
         "description": "Conversational style with engaging tone and dynamic pacing",
@@ -67,42 +62,42 @@ def create_interface():
         synthesis_mode = gr.State(SYNTHESIS_MODES["narration"])
-        # Main Input Row
         with gr.Row():
-            # Left Column - Core Inputs
-            with gr.Column(scale=1):
-                with gr.Row():
-                    api_key = gr.Textbox(
-                        label="🔑 OpenAI API Key",
-                        placeholder="sk-...",
-                        type="password",
-                        scale=2
                     )
-                    file_input = gr.File(
-                        label="📁 Input PDF",
-                        file_types=[".pdf"],
-                        scale=1
                     )
-            # Right Column - Mode Selection
-            with gr.Column(scale=1):
-                mode_select = gr.Radio(
-                    choices=list(SYNTHESIS_MODES.keys()),
-                    value="narration",
-                    label="🎭 Output Mode",
-                    info="Select output type"
-                )
-                mode_description = gr.Markdown(
-                    SYNTHESIS_MODES["narration"]["description"],
-                    elem_classes=["mode-description"]
-                )
-        # Parameters Row
         with gr.Row():
-            # Voice and Style Column
             with gr.Column():
-                with gr.Box():
-                    gr.Markdown("### 🎛️ Voice & Style")
                     voice_select = gr.Radio(
                         choices=["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
                         value="onyx",
@@ -116,10 +111,9 @@ def create_interface():
                         interactive=True
                     )
-            # Processing Parameters Column
             with gr.Column():
-                with gr.Box():
-                    gr.Markdown("### ⚙️ Processing")
                     with gr.Row():
                         chunk_size = gr.Slider(
                             minimum=100, maximum=1000,
@@ -145,29 +139,30 @@ def create_interface():
             process_btn = gr.Button("🚀 Generate Audio", variant="primary", scale=2)
             status_output = gr.Textbox(label="📋 Status", scale=1)
-        # Output Tabs Row
         with gr.Tabs():
-            with gr.Tab("📝 Content"):
                 output_table = gr.Dataframe(
                     headers=["🔍 Segment", "📄 Content", "🎭 Script"],
                     wrap=True
                 )
-            with gr.Tab("🎧 Audio"):
                 with gr.Row():
-                    audio_output = gr.Audio(
-                        label="🔊 Output",
-                        type="filepath",
-                        show_download_button=True
-                    )
-                    with gr.Column():
-                        gr.Markdown("""
-                            ### 📚 Quick Tips
-                            - 🎯 Lower temperature = more consistent output
-                            - 📏 Smaller chunks = more precise control
-                            - 🎙️ Try different voices for best results
-                        """)
-        # Footer
         gr.HTML(
             """
             <div style="text-align: center; margin-top: 1rem; padding-top: 1rem; border-top: 1px solid #ccc;">

         "styles": ["Technical", "Narrative", "Instructional", "Descriptive"],
         "default_temp": 0.7,
         "default_chunks": 300,
+        "system_prompt": """Convert this content into clear narration."""
     },
     "podcast": {
         "description": "Conversational style with engaging tone and dynamic pacing",
         synthesis_mode = gr.State(SYNTHESIS_MODES["narration"])
+        # Main Settings Row
         with gr.Row():
+            # Core Settings Column
+            with gr.Column():
+                with gr.Accordion("🔑 Core Settings", open=True):
+                    with gr.Row():
+                        api_key = gr.Textbox(
+                            label="OpenAI API Key",
+                            placeholder="sk-...",
+                            type="password",
+                            scale=2
+                        )
+                        file_input = gr.File(
+                            label="PDF Document",
+                            file_types=[".pdf"],
+                            scale=1
+                        )
+            # Mode Selection Column
+            with gr.Column():
+                with gr.Accordion("🎭 Output Mode", open=True):
+                    mode_select = gr.Radio(
+                        choices=list(SYNTHESIS_MODES.keys()),
+                        value="narration",
+                        label="Select Mode",
+                        info="Choose output style"
                     )
+                    mode_description = gr.Markdown(
+                        SYNTHESIS_MODES["narration"]["description"]
                     )
+        # Voice and Processing Settings Row
         with gr.Row():
+            # Voice Settings Column
             with gr.Column():
+                with gr.Accordion("🎛️ Voice & Style", open=True):
                     voice_select = gr.Radio(
                         choices=["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
                         value="onyx",
                         interactive=True
                     )
+            # Processing Settings Column
             with gr.Column():
+                with gr.Accordion("⚙️ Processing Parameters", open=True):
                     with gr.Row():
                         chunk_size = gr.Slider(
                             minimum=100, maximum=1000,
             process_btn = gr.Button("🚀 Generate Audio", variant="primary", scale=2)
             status_output = gr.Textbox(label="📋 Status", scale=1)
+        # Output Section
         with gr.Tabs():
+            with gr.TabItem("📝 Content"):
                 output_table = gr.Dataframe(
                     headers=["🔍 Segment", "📄 Content", "🎭 Script"],
                     wrap=True
                 )
+            with gr.TabItem("🎧 Audio"):
                 with gr.Row():
+                    with gr.Column(scale=2):
+                        audio_output = gr.Audio(
+                            label="🔊 Synthesized Audio",
+                            type="filepath",
+                            show_download_button=True
+                        )
+                    with gr.Column(scale=1):
+                        with gr.Accordion("📚 Quick Tips", open=True):
+                            gr.Markdown("""
+                                - 🎯 Lower temperature = more consistent
+                                - 📏 Smaller chunks = more precise
+                                - 🎙️ Try different voices for best fit
+                                - 💫 Match style to content type
+                            """)
         gr.HTML(
             """
             <div style="text-align: center; margin-top: 1rem; padding-top: 1rem; border-top: 1px solid #ccc;">