cstr commited on
Commit
8cc0029
·
verified ·
1 Parent(s): 00124b5
Files changed (1) hide show
  1. app.py +9 -12
app.py CHANGED
@@ -528,12 +528,12 @@ def get_model_options(pipeline_type):
528
  # Dictionary to store loaded models
529
  loaded_models = {}
530
 
531
- def transcribe_audio(audio_upload, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False, include_timecodes=False):
532
  """
533
  Transcribes audio from a given source using the specified pipeline and model.
534
 
535
  Args:
536
- audio_upload (file): Uploaded audio file.
537
  audio_url (str): URL of audio.
538
  proxy_url (str): Proxy URL if needed.
539
  proxy_username (str): Proxy username.
@@ -567,14 +567,10 @@ def transcribe_audio(audio_upload, audio_url, proxy_url, proxy_username, proxy_p
567
  audio_path = None
568
  is_temp_file = False
569
 
570
- if audio_upload is not None:
571
- if isinstance(audio_upload, dict) and 'name' in audio_upload:
572
- # audio_upload is a dict with file info
573
- audio_path = audio_upload['name']
574
- is_temp_file = False
575
- elif isinstance(audio_upload, str) and os.path.exists(audio_upload):
576
- audio_path = audio_upload
577
- is_temp_file = False
578
  elif audio_url is not None and len(audio_url.strip()) > 0:
579
  # audio_url is provided
580
  audio_path, is_temp_file = download_audio(audio_url, download_method, proxy_url, proxy_username, proxy_password)
@@ -584,7 +580,7 @@ def transcribe_audio(audio_upload, audio_url, proxy_url, proxy_username, proxy_p
584
  yield verbose_messages + error_msg, "", None
585
  return
586
  else:
587
- error_msg = "No audio source provided. Please upload an audio file or enter a URL."
588
  logging.error(error_msg)
589
  yield verbose_messages + error_msg, "", None
590
  return
@@ -696,6 +692,7 @@ def transcribe_audio(audio_upload, audio_url, proxy_url, proxy_username, proxy_p
696
  if audio_path and is_temp_file and os.path.exists(audio_path):
697
  os.remove(audio_path)
698
 
 
699
  with gr.Blocks() as iface:
700
  gr.Markdown("# Audio Transcription")
701
  gr.Markdown("Transcribe audio using multiple pipelines and (Faster) Whisper models.")
@@ -785,7 +782,7 @@ with gr.Blocks() as iface:
785
  [None, "https://www.youtube.com/watch?v=daQ_hqA6HDo", "", "", "", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", None, None, False, False],
786
  [None, "https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453.mp3", "", "", "", "faster-sequenced", "SYSTRAN/faster-whisper-large-v1", "float16", 1, "ffmpeg", 0, 300, False, False],
787
  ],
788
- inputs=[audio_upload, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose, include_timecodes],
789
  )
790
 
791
  iface.launch(share=False, debug=True)
 
528
  # Dictionary to store loaded models
529
  loaded_models = {}
530
 
531
+ def transcribe_audio(audio_input, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False, include_timecodes=False):
532
  """
533
  Transcribes audio from a given source using the specified pipeline and model.
534
 
535
  Args:
536
+ audio_input (str): Path to uploaded audio file or recorded audio.
537
  audio_url (str): URL of audio.
538
  proxy_url (str): Proxy URL if needed.
539
  proxy_username (str): Proxy username.
 
567
  audio_path = None
568
  is_temp_file = False
569
 
570
+ if audio_input is not None and len(audio_input) > 0:
571
+ # audio_input is a filepath to uploaded or recorded audio
572
+ audio_path = audio_input
573
+ is_temp_file = False
 
 
 
 
574
  elif audio_url is not None and len(audio_url.strip()) > 0:
575
  # audio_url is provided
576
  audio_path, is_temp_file = download_audio(audio_url, download_method, proxy_url, proxy_username, proxy_password)
 
580
  yield verbose_messages + error_msg, "", None
581
  return
582
  else:
583
+ error_msg = "No audio source provided. Please upload an audio file, record audio, or enter a URL."
584
  logging.error(error_msg)
585
  yield verbose_messages + error_msg, "", None
586
  return
 
692
  if audio_path and is_temp_file and os.path.exists(audio_path):
693
  os.remove(audio_path)
694
 
695
+
696
  with gr.Blocks() as iface:
697
  gr.Markdown("# Audio Transcription")
698
  gr.Markdown("Transcribe audio using multiple pipelines and (Faster) Whisper models.")
 
782
  [None, "https://www.youtube.com/watch?v=daQ_hqA6HDo", "", "", "", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", None, None, False, False],
783
  [None, "https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453.mp3", "", "", "", "faster-sequenced", "SYSTRAN/faster-whisper-large-v1", "float16", 1, "ffmpeg", 0, 300, False, False],
784
  ],
785
+ inputs=[audio_input, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose, include_timecodes],
786
  )
787
 
788
  iface.launch(share=False, debug=True)