Spaces:

dwarkesh
/

transcriber

Running

App Files Files Community

dwarkesh commited on 17 days ago

Commit

16d45b9

1 Parent(s): 6686395

better progress bars

Browse files

Files changed (1) hide show

app.py +46 -12

app.py CHANGED Viewed

@@ -258,7 +258,6 @@ def rename_speakers(text: str, speaker_map: dict) -> str:
 def process_audio(audio_file):
     try:
-        # Save uploaded file with a temporary name
         temp_path = Path("temp_audio")
         temp_path.mkdir(exist_ok=True)
         temp_file = temp_path / "temp_audio.mp3"
@@ -267,34 +266,48 @@ def process_audio(audio_file):
             with open(temp_file, "wb") as f:
                 f.write(audio_file)
             # Get transcript
             transcriber = Transcriber(os.getenv("ASSEMBLYAI_API_KEY"))
             utterances = transcriber.get_transcript(temp_file)
-            # Generate original transcript
             dialogues = list(group_utterances_by_speaker(utterances))
             original = format_chunk(dialogues, markdown=True)
-            # Show original transcript immediately
-            yield original, ""
             try:
-                # Enhance transcript
                 enhancer = Enhancer(os.getenv("GOOGLE_API_KEY"))
                 chunks = prepare_audio_chunks(temp_file, utterances)
                 enhanced = asyncio.run(enhancer.enhance_chunks(chunks))
-                # Format final transcript
                 merged = "\n\n".join(chunk.strip() for chunk in enhanced)
                 merged = apply_markdown_formatting(merged)
-                yield original, merged
             except Exception as e:
-                yield original, f"Error: {str(e)}"
         finally:
-            # Cleanup temp file
             if os.path.exists(temp_file):
                 os.remove(temp_file)
@@ -333,12 +346,33 @@ with gr.Blocks(title="Transcript Enhancer") as demo:
         with gr.Column():
             gr.Markdown("### Enhanced Transcript")
             enhanced_output = gr.Markdown()
     transcribe_btn.click(
         fn=process_audio,
         inputs=[audio_input],
-        outputs=[original_output, enhanced_output]
     )
 # Launch the app

 def process_audio(audio_file):
     try:
         temp_path = Path("temp_audio")
         temp_path.mkdir(exist_ok=True)
         temp_file = temp_path / "temp_audio.mp3"
             with open(temp_file, "wb") as f:
                 f.write(audio_file)
+            # Initial state - show generating message
+            yield (
+                gr.update(value="", visible=True),  # original transcript
+                gr.update(value="", visible=True),  # enhanced transcript
+                gr.update(value="🎯 Generating transcript...", visible=True)  # status
+            )
             # Get transcript
             transcriber = Transcriber(os.getenv("ASSEMBLYAI_API_KEY"))
             utterances = transcriber.get_transcript(temp_file)
             dialogues = list(group_utterances_by_speaker(utterances))
             original = format_chunk(dialogues, markdown=True)
+            # Show original and enhancing message
+            yield (
+                gr.update(value=original, visible=True),
+                gr.update(value="", visible=True),
+                gr.update(value="🔄 Enhancing transcript...", visible=True)
+            )
             try:
                 enhancer = Enhancer(os.getenv("GOOGLE_API_KEY"))
                 chunks = prepare_audio_chunks(temp_file, utterances)
                 enhanced = asyncio.run(enhancer.enhance_chunks(chunks))
                 merged = "\n\n".join(chunk.strip() for chunk in enhanced)
                 merged = apply_markdown_formatting(merged)
+                # Show final result
+                yield (
+                    gr.update(value=original, visible=True),
+                    gr.update(value=merged, visible=True),
+                    gr.update(visible=False)  # hide status
+                )
             except Exception as e:
+                yield (
+                    gr.update(value=original, visible=True),
+                    gr.update(value=f"Error: {str(e)}", visible=True),
+                    gr.update(visible=False)
+                )
         finally:
             if os.path.exists(temp_file):
                 os.remove(temp_file)
         with gr.Column():
             gr.Markdown("### Enhanced Transcript")
+            status = gr.Markdown(
+                visible=False,
+                elem_classes="status-message"
+            )
             enhanced_output = gr.Markdown()
+    # Add some CSS
+    gr.Markdown("""
+        <style>
+        .status-message {
+            padding: 8px 15px;
+            border-radius: 4px;
+            background-color: #f0f0f0;
+            margin-bottom: 10px;
+            display: inline-block;
+        }
+        </style>
+    """)
     transcribe_btn.click(
         fn=process_audio,
         inputs=[audio_input],
+        outputs=[
+            original_output,
+            enhanced_output,
+            status
+        ]
     )
 # Launch the app