Spaces:

dwarkesh
/

transcriber

Running

App Files Files Community

dwarkesh commited on 17 days ago

Commit

fa0d8b7

1 Parent(s): 16d45b9

nice - downloads too

Browse files

Files changed (2) hide show

.gitignore +2 -1
app.py +52 -18

.gitignore CHANGED Viewed

@@ -1,3 +1,4 @@
 .venv/
 transcripts/
-temp_audio/

 .venv/
 transcripts/
+temp_audio/
+temp_downloads/

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ from pydub import AudioSegment
 import asyncio
 import io
 from itertools import groupby
 prompt = '''
 You are an expert transcript editor. Your task is to enhance this transcript for maximum readability while maintaining the core message.
@@ -256,6 +257,23 @@ def rename_speakers(text: str, speaker_map: dict) -> str:
     return result
 def process_audio(audio_file):
     try:
         temp_path = Path("temp_audio")
@@ -266,11 +284,12 @@ def process_audio(audio_file):
             with open(temp_file, "wb") as f:
                 f.write(audio_file)
-            # Initial state - show generating message
             yield (
                 gr.update(value="", visible=True),  # original transcript
                 gr.update(value="", visible=True),  # enhanced transcript
-                gr.update(value="🎯 Generating transcript...", visible=True)  # status
             )
             # Get transcript
@@ -279,11 +298,15 @@ def process_audio(audio_file):
             dialogues = list(group_utterances_by_speaker(utterances))
             original = format_chunk(dialogues, markdown=True)
-            # Show original and enhancing message
             yield (
                 gr.update(value=original, visible=True),
                 gr.update(value="", visible=True),
-                gr.update(value="🔄 Enhancing transcript...", visible=True)
             )
             try:
@@ -293,21 +316,27 @@ def process_audio(audio_file):
                 merged = "\n\n".join(chunk.strip() for chunk in enhanced)
                 merged = apply_markdown_formatting(merged)
                 # Show final result
                 yield (
                     gr.update(value=original, visible=True),
                     gr.update(value=merged, visible=True),
-                    gr.update(visible=False)  # hide status
                 )
             except Exception as e:
                 yield (
                     gr.update(value=original, visible=True),
                     gr.update(value=f"Error: {str(e)}", visible=True),
-                    gr.update(visible=False)
                 )
         finally:
             if os.path.exists(temp_file):
                 os.remove(temp_file)
@@ -342,36 +371,41 @@ with gr.Blocks(title="Transcript Enhancer") as demo:
     with gr.Row():
         with gr.Column():
             gr.Markdown("### Original Transcript")
             original_output = gr.Markdown()
         with gr.Column():
             gr.Markdown("### Enhanced Transcript")
-            status = gr.Markdown(
-                visible=False,
-                elem_classes="status-message"
             )
             enhanced_output = gr.Markdown()
-    # Add some CSS
     gr.Markdown("""
         <style>
-        .status-message {
-            padding: 8px 15px;
-            border-radius: 4px;
-            background-color: #f0f0f0;
-            margin-bottom: 10px;
-            display: inline-block;
         }
         </style>
     """)
     transcribe_btn.click(
         fn=process_audio,
         inputs=[audio_input],
         outputs=[
             original_output,
             enhanced_output,
-            status
         ]
     )

 import asyncio
 import io
 from itertools import groupby
+from datetime import datetime
 prompt = '''
 You are an expert transcript editor. Your task is to enhance this transcript for maximum readability while maintaining the core message.
     return result
+def create_downloadable_file(content: str, prefix: str) -> str:
+    """Create a temporary file with the content and return filepath"""
+    temp_dir = Path("temp_downloads")
+    temp_dir.mkdir(exist_ok=True)
+    # Create a unique filename
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"{prefix}_{timestamp}.md"
+    filepath = temp_dir / filename
+    # Write content to file
+    with open(filepath, "w", encoding="utf-8") as f:
+        f.write(content)
+    return str(filepath)
 def process_audio(audio_file):
     try:
         temp_path = Path("temp_audio")
             with open(temp_file, "wb") as f:
                 f.write(audio_file)
+            # Initial state - clear both transcripts
             yield (
                 gr.update(value="", visible=True),  # original transcript
                 gr.update(value="", visible=True),  # enhanced transcript
+                None,  # original download
+                None,  # enhanced download
             )
             # Get transcript
             dialogues = list(group_utterances_by_speaker(utterances))
             original = format_chunk(dialogues, markdown=True)
+            # Create downloadable file for original transcript
+            original_file = create_downloadable_file(original, "original_transcript")
+            # Show original transcript
             yield (
                 gr.update(value=original, visible=True),
                 gr.update(value="", visible=True),
+                original_file,
+                None,
             )
             try:
                 merged = "\n\n".join(chunk.strip() for chunk in enhanced)
                 merged = apply_markdown_formatting(merged)
+                # Create downloadable file for enhanced transcript
+                enhanced_file = create_downloadable_file(merged, "enhanced_transcript")
                 # Show final result
                 yield (
                     gr.update(value=original, visible=True),
                     gr.update(value=merged, visible=True),
+                    original_file,
+                    enhanced_file,
                 )
             except Exception as e:
                 yield (
                     gr.update(value=original, visible=True),
                     gr.update(value=f"Error: {str(e)}", visible=True),
+                    original_file,
+                    None,
                 )
         finally:
+            # Cleanup temp files
             if os.path.exists(temp_file):
                 os.remove(temp_file)
     with gr.Row():
         with gr.Column():
             gr.Markdown("### Original Transcript")
+            original_download = gr.File(
+                label="Download as Markdown",
+                file_count="single",
+                visible=True,
+                interactive=False,
+            )
             original_output = gr.Markdown()
         with gr.Column():
             gr.Markdown("### Enhanced Transcript")
+            enhanced_download = gr.File(
+                label="Download as Markdown",
+                file_count="single",
+                visible=True,
+                interactive=False,
             )
             enhanced_output = gr.Markdown()
+    # Add some CSS to style the download buttons
     gr.Markdown("""
         <style>
+        .download-button {
+            margin-top: 10px;
         }
         </style>
     """)
     transcribe_btn.click(
         fn=process_audio,
         inputs=[audio_input],
         outputs=[
             original_output,
             enhanced_output,
+            original_download,
+            enhanced_download
         ]
     )