dwarkesh commited on
Commit
16d45b9
·
1 Parent(s): 6686395

better progress bars

Browse files
Files changed (1) hide show
  1. app.py +46 -12
app.py CHANGED
@@ -258,7 +258,6 @@ def rename_speakers(text: str, speaker_map: dict) -> str:
258
 
259
  def process_audio(audio_file):
260
  try:
261
- # Save uploaded file with a temporary name
262
  temp_path = Path("temp_audio")
263
  temp_path.mkdir(exist_ok=True)
264
  temp_file = temp_path / "temp_audio.mp3"
@@ -267,34 +266,48 @@ def process_audio(audio_file):
267
  with open(temp_file, "wb") as f:
268
  f.write(audio_file)
269
 
 
 
 
 
 
 
 
270
  # Get transcript
271
  transcriber = Transcriber(os.getenv("ASSEMBLYAI_API_KEY"))
272
  utterances = transcriber.get_transcript(temp_file)
273
-
274
- # Generate original transcript
275
  dialogues = list(group_utterances_by_speaker(utterances))
276
  original = format_chunk(dialogues, markdown=True)
277
 
278
- # Show original transcript immediately
279
- yield original, ""
 
 
 
 
280
 
281
  try:
282
- # Enhance transcript
283
  enhancer = Enhancer(os.getenv("GOOGLE_API_KEY"))
284
  chunks = prepare_audio_chunks(temp_file, utterances)
285
  enhanced = asyncio.run(enhancer.enhance_chunks(chunks))
286
-
287
- # Format final transcript
288
  merged = "\n\n".join(chunk.strip() for chunk in enhanced)
289
  merged = apply_markdown_formatting(merged)
290
 
291
- yield original, merged
 
 
 
 
 
292
 
293
  except Exception as e:
294
- yield original, f"Error: {str(e)}"
 
 
 
 
295
 
296
  finally:
297
- # Cleanup temp file
298
  if os.path.exists(temp_file):
299
  os.remove(temp_file)
300
 
@@ -333,12 +346,33 @@ with gr.Blocks(title="Transcript Enhancer") as demo:
333
 
334
  with gr.Column():
335
  gr.Markdown("### Enhanced Transcript")
 
 
 
 
336
  enhanced_output = gr.Markdown()
337
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
  transcribe_btn.click(
339
  fn=process_audio,
340
  inputs=[audio_input],
341
- outputs=[original_output, enhanced_output]
 
 
 
 
342
  )
343
 
344
  # Launch the app
 
258
 
259
  def process_audio(audio_file):
260
  try:
 
261
  temp_path = Path("temp_audio")
262
  temp_path.mkdir(exist_ok=True)
263
  temp_file = temp_path / "temp_audio.mp3"
 
266
  with open(temp_file, "wb") as f:
267
  f.write(audio_file)
268
 
269
+ # Initial state - show generating message
270
+ yield (
271
+ gr.update(value="", visible=True), # original transcript
272
+ gr.update(value="", visible=True), # enhanced transcript
273
+ gr.update(value="🎯 Generating transcript...", visible=True) # status
274
+ )
275
+
276
  # Get transcript
277
  transcriber = Transcriber(os.getenv("ASSEMBLYAI_API_KEY"))
278
  utterances = transcriber.get_transcript(temp_file)
 
 
279
  dialogues = list(group_utterances_by_speaker(utterances))
280
  original = format_chunk(dialogues, markdown=True)
281
 
282
+ # Show original and enhancing message
283
+ yield (
284
+ gr.update(value=original, visible=True),
285
+ gr.update(value="", visible=True),
286
+ gr.update(value="🔄 Enhancing transcript...", visible=True)
287
+ )
288
 
289
  try:
 
290
  enhancer = Enhancer(os.getenv("GOOGLE_API_KEY"))
291
  chunks = prepare_audio_chunks(temp_file, utterances)
292
  enhanced = asyncio.run(enhancer.enhance_chunks(chunks))
 
 
293
  merged = "\n\n".join(chunk.strip() for chunk in enhanced)
294
  merged = apply_markdown_formatting(merged)
295
 
296
+ # Show final result
297
+ yield (
298
+ gr.update(value=original, visible=True),
299
+ gr.update(value=merged, visible=True),
300
+ gr.update(visible=False) # hide status
301
+ )
302
 
303
  except Exception as e:
304
+ yield (
305
+ gr.update(value=original, visible=True),
306
+ gr.update(value=f"Error: {str(e)}", visible=True),
307
+ gr.update(visible=False)
308
+ )
309
 
310
  finally:
 
311
  if os.path.exists(temp_file):
312
  os.remove(temp_file)
313
 
 
346
 
347
  with gr.Column():
348
  gr.Markdown("### Enhanced Transcript")
349
+ status = gr.Markdown(
350
+ visible=False,
351
+ elem_classes="status-message"
352
+ )
353
  enhanced_output = gr.Markdown()
354
 
355
+ # Add some CSS
356
+ gr.Markdown("""
357
+ <style>
358
+ .status-message {
359
+ padding: 8px 15px;
360
+ border-radius: 4px;
361
+ background-color: #f0f0f0;
362
+ margin-bottom: 10px;
363
+ display: inline-block;
364
+ }
365
+ </style>
366
+ """)
367
+
368
  transcribe_btn.click(
369
  fn=process_audio,
370
  inputs=[audio_input],
371
+ outputs=[
372
+ original_output,
373
+ enhanced_output,
374
+ status
375
+ ]
376
  )
377
 
378
  # Launch the app