Spaces:
Running
Running
fix
Browse files
app.py
CHANGED
@@ -528,12 +528,12 @@ def get_model_options(pipeline_type):
|
|
528 |
# Dictionary to store loaded models
|
529 |
loaded_models = {}
|
530 |
|
531 |
-
def transcribe_audio(
|
532 |
"""
|
533 |
Transcribes audio from a given source using the specified pipeline and model.
|
534 |
|
535 |
Args:
|
536 |
-
|
537 |
audio_url (str): URL of audio.
|
538 |
proxy_url (str): Proxy URL if needed.
|
539 |
proxy_username (str): Proxy username.
|
@@ -567,14 +567,10 @@ def transcribe_audio(audio_upload, audio_url, proxy_url, proxy_username, proxy_p
|
|
567 |
audio_path = None
|
568 |
is_temp_file = False
|
569 |
|
570 |
-
if
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
is_temp_file = False
|
575 |
-
elif isinstance(audio_upload, str) and os.path.exists(audio_upload):
|
576 |
-
audio_path = audio_upload
|
577 |
-
is_temp_file = False
|
578 |
elif audio_url is not None and len(audio_url.strip()) > 0:
|
579 |
# audio_url is provided
|
580 |
audio_path, is_temp_file = download_audio(audio_url, download_method, proxy_url, proxy_username, proxy_password)
|
@@ -584,7 +580,7 @@ def transcribe_audio(audio_upload, audio_url, proxy_url, proxy_username, proxy_p
|
|
584 |
yield verbose_messages + error_msg, "", None
|
585 |
return
|
586 |
else:
|
587 |
-
error_msg = "No audio source provided. Please upload an audio file or enter a URL."
|
588 |
logging.error(error_msg)
|
589 |
yield verbose_messages + error_msg, "", None
|
590 |
return
|
@@ -696,6 +692,7 @@ def transcribe_audio(audio_upload, audio_url, proxy_url, proxy_username, proxy_p
|
|
696 |
if audio_path and is_temp_file and os.path.exists(audio_path):
|
697 |
os.remove(audio_path)
|
698 |
|
|
|
699 |
with gr.Blocks() as iface:
|
700 |
gr.Markdown("# Audio Transcription")
|
701 |
gr.Markdown("Transcribe audio using multiple pipelines and (Faster) Whisper models.")
|
@@ -785,7 +782,7 @@ with gr.Blocks() as iface:
|
|
785 |
[None, "https://www.youtube.com/watch?v=daQ_hqA6HDo", "", "", "", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", None, None, False, False],
|
786 |
[None, "https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453.mp3", "", "", "", "faster-sequenced", "SYSTRAN/faster-whisper-large-v1", "float16", 1, "ffmpeg", 0, 300, False, False],
|
787 |
],
|
788 |
-
inputs=[
|
789 |
)
|
790 |
|
791 |
iface.launch(share=False, debug=True)
|
|
|
528 |
# Dictionary to store loaded models
|
529 |
loaded_models = {}
|
530 |
|
531 |
+
def transcribe_audio(audio_input, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False, include_timecodes=False):
|
532 |
"""
|
533 |
Transcribes audio from a given source using the specified pipeline and model.
|
534 |
|
535 |
Args:
|
536 |
+
audio_input (str): Path to uploaded audio file or recorded audio.
|
537 |
audio_url (str): URL of audio.
|
538 |
proxy_url (str): Proxy URL if needed.
|
539 |
proxy_username (str): Proxy username.
|
|
|
567 |
audio_path = None
|
568 |
is_temp_file = False
|
569 |
|
570 |
+
if audio_input is not None and len(audio_input) > 0:
|
571 |
+
# audio_input is a filepath to uploaded or recorded audio
|
572 |
+
audio_path = audio_input
|
573 |
+
is_temp_file = False
|
|
|
|
|
|
|
|
|
574 |
elif audio_url is not None and len(audio_url.strip()) > 0:
|
575 |
# audio_url is provided
|
576 |
audio_path, is_temp_file = download_audio(audio_url, download_method, proxy_url, proxy_username, proxy_password)
|
|
|
580 |
yield verbose_messages + error_msg, "", None
|
581 |
return
|
582 |
else:
|
583 |
+
error_msg = "No audio source provided. Please upload an audio file, record audio, or enter a URL."
|
584 |
logging.error(error_msg)
|
585 |
yield verbose_messages + error_msg, "", None
|
586 |
return
|
|
|
692 |
if audio_path and is_temp_file and os.path.exists(audio_path):
|
693 |
os.remove(audio_path)
|
694 |
|
695 |
+
|
696 |
with gr.Blocks() as iface:
|
697 |
gr.Markdown("# Audio Transcription")
|
698 |
gr.Markdown("Transcribe audio using multiple pipelines and (Faster) Whisper models.")
|
|
|
782 |
[None, "https://www.youtube.com/watch?v=daQ_hqA6HDo", "", "", "", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", None, None, False, False],
|
783 |
[None, "https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453.mp3", "", "", "", "faster-sequenced", "SYSTRAN/faster-whisper-large-v1", "float16", 1, "ffmpeg", 0, 300, False, False],
|
784 |
],
|
785 |
+
inputs=[audio_input, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose, include_timecodes],
|
786 |
)
|
787 |
|
788 |
iface.launch(share=False, debug=True)
|