Spaces:
Running
Running
+px +ux -tc
Browse files
app.py
CHANGED
@@ -45,13 +45,16 @@ from faster_whisper.transcribe import BatchedInferencePipeline
|
|
45 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
46 |
logging.info(f"Using device: {device}")
|
47 |
|
48 |
-
def download_audio(url, method_choice):
|
49 |
"""
|
50 |
-
Downloads audio from a given URL using the specified method.
|
51 |
|
52 |
Args:
|
53 |
url (str): The URL of the audio.
|
54 |
method_choice (str): The method to use for downloading audio.
|
|
|
|
|
|
|
55 |
|
56 |
Returns:
|
57 |
tuple: (path to the downloaded audio file, is_temp_file), or (None, False) if failed.
|
@@ -60,19 +63,19 @@ def download_audio(url, method_choice):
|
|
60 |
logging.info(f"Downloading audio from URL: {url} using method: {method_choice}")
|
61 |
try:
|
62 |
if 'youtube.com' in parsed_url.netloc or 'youtu.be' in parsed_url.netloc:
|
63 |
-
audio_file = download_youtube_audio(url, method_choice)
|
64 |
if not audio_file:
|
65 |
error_msg = f"Failed to download audio from {url} using method {method_choice}. Ensure yt-dlp is up to date."
|
66 |
logging.error(error_msg)
|
67 |
return None, False
|
68 |
elif parsed_url.scheme == 'rtsp':
|
69 |
-
audio_file = download_rtsp_audio(url)
|
70 |
if not audio_file:
|
71 |
error_msg = f"Failed to download RTSP audio from {url}"
|
72 |
logging.error(error_msg)
|
73 |
return None, False
|
74 |
else:
|
75 |
-
audio_file = download_direct_audio(url, method_choice)
|
76 |
if not audio_file:
|
77 |
error_msg = f"Failed to download audio from {url} using method {method_choice}"
|
78 |
logging.error(error_msg)
|
@@ -84,13 +87,16 @@ def download_audio(url, method_choice):
|
|
84 |
return None, False
|
85 |
|
86 |
|
87 |
-
def download_youtube_audio(url, method_choice):
|
88 |
"""
|
89 |
Downloads audio from a YouTube URL using the specified method.
|
90 |
|
91 |
Args:
|
92 |
url (str): The YouTube URL.
|
93 |
method_choice (str): The method to use for downloading.
|
|
|
|
|
|
|
94 |
|
95 |
Returns:
|
96 |
str: Path to the downloaded audio file, or None if failed.
|
@@ -102,17 +108,20 @@ def download_youtube_audio(url, method_choice):
|
|
102 |
method = methods.get(method_choice, yt_dlp_method)
|
103 |
try:
|
104 |
logging.info(f"Attempting to download YouTube audio using {method_choice}")
|
105 |
-
return method(url)
|
106 |
except Exception as e:
|
107 |
logging.error(f"Error downloading using {method_choice}: {str(e)}")
|
108 |
return None
|
109 |
|
110 |
-
def yt_dlp_method(url):
|
111 |
"""
|
112 |
Downloads YouTube audio using yt-dlp and saves it to a temporary file.
|
113 |
|
114 |
Args:
|
115 |
url (str): The YouTube URL.
|
|
|
|
|
|
|
116 |
|
117 |
Returns:
|
118 |
str: Path to the downloaded audio file, or None if failed.
|
@@ -133,6 +142,8 @@ def yt_dlp_method(url):
|
|
133 |
'logger': MyLogger(), # Use a custom logger to capture yt-dlp logs
|
134 |
'progress_hooks': [my_hook], # Hook to capture download progress and errors
|
135 |
}
|
|
|
|
|
136 |
try:
|
137 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
138 |
info = ydl.extract_info(url, download=True)
|
@@ -174,12 +185,15 @@ def my_hook(d):
|
|
174 |
elif d['status'] == 'error':
|
175 |
logging.error(f"Download error: {d['filename']}")
|
176 |
|
177 |
-
def pytube_method(url):
|
178 |
"""
|
179 |
Downloads audio from a YouTube URL using pytube and saves it to a temporary file.
|
180 |
|
181 |
Args:
|
182 |
url (str): The YouTube URL.
|
|
|
|
|
|
|
183 |
|
184 |
Returns:
|
185 |
str: Path to the downloaded audio file, or None if failed.
|
@@ -187,7 +201,13 @@ def pytube_method(url):
|
|
187 |
logging.info("Using pytube method")
|
188 |
from pytube import YouTube
|
189 |
try:
|
190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
audio_stream = yt.streams.filter(only_audio=True).first()
|
192 |
if audio_stream is None:
|
193 |
error_msg = "No audio streams available with pytube."
|
@@ -205,12 +225,13 @@ def pytube_method(url):
|
|
205 |
return None
|
206 |
|
207 |
|
208 |
-
def download_rtsp_audio(url):
|
209 |
"""
|
210 |
Downloads audio from an RTSP URL using FFmpeg.
|
211 |
|
212 |
Args:
|
213 |
url (str): The RTSP URL.
|
|
|
214 |
|
215 |
Returns:
|
216 |
str: Path to the downloaded audio file, or None if failed.
|
@@ -218,8 +239,12 @@ def download_rtsp_audio(url):
|
|
218 |
logging.info("Using FFmpeg to download RTSP stream")
|
219 |
output_file = tempfile.mktemp(suffix='.mp3')
|
220 |
command = ['ffmpeg', '-i', url, '-acodec', 'libmp3lame', '-ab', '192k', '-y', output_file]
|
|
|
|
|
|
|
|
|
221 |
try:
|
222 |
-
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
223 |
logging.info(f"Downloaded RTSP audio to: {output_file}")
|
224 |
return output_file
|
225 |
except subprocess.CalledProcessError as e:
|
@@ -229,13 +254,16 @@ def download_rtsp_audio(url):
|
|
229 |
logging.error(f"Error downloading RTSP audio: {str(e)}")
|
230 |
return None
|
231 |
|
232 |
-
def download_direct_audio(url, method_choice):
|
233 |
"""
|
234 |
Downloads audio from a direct URL using the specified method.
|
235 |
|
236 |
Args:
|
237 |
url (str): The direct URL of the audio file.
|
238 |
method_choice (str): The method to use for downloading.
|
|
|
|
|
|
|
239 |
|
240 |
Returns:
|
241 |
str: Path to the downloaded audio file, or None if failed.
|
@@ -250,7 +278,7 @@ def download_direct_audio(url, method_choice):
|
|
250 |
}
|
251 |
method = methods.get(method_choice, requests_method)
|
252 |
try:
|
253 |
-
audio_file = method(url)
|
254 |
if not audio_file or not os.path.exists(audio_file):
|
255 |
error_msg = f"Failed to download direct audio from {url} using method {method_choice}"
|
256 |
logging.error(error_msg)
|
@@ -260,18 +288,30 @@ def download_direct_audio(url, method_choice):
|
|
260 |
logging.error(f"Error downloading direct audio with {method_choice}: {str(e)}")
|
261 |
return None
|
262 |
|
263 |
-
def requests_method(url):
|
264 |
"""
|
265 |
Downloads audio using the requests library.
|
266 |
|
267 |
Args:
|
268 |
url (str): The URL of the audio file.
|
|
|
|
|
|
|
269 |
|
270 |
Returns:
|
271 |
str: Path to the downloaded audio file, or None if failed.
|
272 |
"""
|
273 |
try:
|
274 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
if response.status_code == 200:
|
276 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
|
277 |
for chunk in response.iter_content(chunk_size=8192):
|
@@ -286,12 +326,15 @@ def requests_method(url):
|
|
286 |
logging.error(f"Error in requests_method: {str(e)}")
|
287 |
return None
|
288 |
|
289 |
-
def wget_method(url):
|
290 |
"""
|
291 |
Downloads audio using the wget command-line tool.
|
292 |
|
293 |
Args:
|
294 |
url (str): The URL of the audio file.
|
|
|
|
|
|
|
295 |
|
296 |
Returns:
|
297 |
str: Path to the downloaded audio file, or None if failed.
|
@@ -299,8 +342,12 @@ def wget_method(url):
|
|
299 |
logging.info("Using wget method")
|
300 |
output_file = tempfile.mktemp(suffix='.mp3')
|
301 |
command = ['wget', '-O', output_file, url]
|
|
|
|
|
|
|
|
|
302 |
try:
|
303 |
-
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
304 |
logging.info(f"Downloaded audio to: {output_file}")
|
305 |
return output_file
|
306 |
except subprocess.CalledProcessError as e:
|
@@ -310,12 +357,15 @@ def wget_method(url):
|
|
310 |
logging.error(f"Error in wget_method: {str(e)}")
|
311 |
return None
|
312 |
|
313 |
-
def yt_dlp_direct_method(url):
|
314 |
"""
|
315 |
Downloads audio using yt-dlp (supports various protocols and sites).
|
316 |
|
317 |
Args:
|
318 |
url (str): The URL of the audio or webpage containing audio.
|
|
|
|
|
|
|
319 |
|
320 |
Returns:
|
321 |
str: Path to the downloaded audio file, or None if failed.
|
@@ -333,6 +383,8 @@ def yt_dlp_direct_method(url):
|
|
333 |
'preferredquality': '192',
|
334 |
}],
|
335 |
}
|
|
|
|
|
336 |
try:
|
337 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
338 |
ydl.download([url])
|
@@ -342,12 +394,15 @@ def yt_dlp_direct_method(url):
|
|
342 |
logging.error(f"Error in yt_dlp_direct_method: {str(e)}")
|
343 |
return None
|
344 |
|
345 |
-
def ffmpeg_method(url):
|
346 |
"""
|
347 |
Downloads audio using FFmpeg.
|
348 |
|
349 |
Args:
|
350 |
url (str): The URL of the audio file.
|
|
|
|
|
|
|
351 |
|
352 |
Returns:
|
353 |
str: Path to the downloaded audio file, or None if failed.
|
@@ -355,8 +410,12 @@ def ffmpeg_method(url):
|
|
355 |
logging.info("Using ffmpeg method")
|
356 |
output_file = tempfile.mktemp(suffix='.mp3')
|
357 |
command = ['ffmpeg', '-i', url, '-vn', '-acodec', 'libmp3lame', '-q:a', '2', output_file]
|
|
|
|
|
|
|
|
|
358 |
try:
|
359 |
-
subprocess.run(command, check=True, capture_output=True, text=True)
|
360 |
logging.info(f"Downloaded and converted audio to: {output_file}")
|
361 |
return output_file
|
362 |
except subprocess.CalledProcessError as e:
|
@@ -366,12 +425,15 @@ def ffmpeg_method(url):
|
|
366 |
logging.error(f"Error in ffmpeg_method: {str(e)}")
|
367 |
return None
|
368 |
|
369 |
-
def aria2_method(url):
|
370 |
"""
|
371 |
Downloads audio using aria2.
|
372 |
|
373 |
Args:
|
374 |
url (str): The URL of the audio file.
|
|
|
|
|
|
|
375 |
|
376 |
Returns:
|
377 |
str: Path to the downloaded audio file, or None if failed.
|
@@ -379,6 +441,8 @@ def aria2_method(url):
|
|
379 |
logging.info("Using aria2 method")
|
380 |
output_file = tempfile.mktemp(suffix='.mp3')
|
381 |
command = ['aria2c', '--split=4', '--max-connection-per-server=4', '--out', output_file, url]
|
|
|
|
|
382 |
try:
|
383 |
subprocess.run(command, check=True, capture_output=True, text=True)
|
384 |
logging.info(f"Downloaded audio to: {output_file}")
|
@@ -402,8 +466,8 @@ def trim_audio(audio_path, start_time, end_time):
|
|
402 |
Returns:
|
403 |
str: Path to the trimmed audio file.
|
404 |
|
405 |
-
|
406 |
-
|
407 |
"""
|
408 |
try:
|
409 |
logging.info(f"Trimming audio from {start_time} to {end_time}")
|
@@ -464,12 +528,16 @@ def get_model_options(pipeline_type):
|
|
464 |
# Dictionary to store loaded models
|
465 |
loaded_models = {}
|
466 |
|
467 |
-
def transcribe_audio(
|
468 |
"""
|
469 |
Transcribes audio from a given source using the specified pipeline and model.
|
470 |
|
471 |
Args:
|
472 |
-
|
|
|
|
|
|
|
|
|
473 |
pipeline_type (str): Type of pipeline to use ('faster-batched', 'faster-sequenced', or 'transformers').
|
474 |
model_id (str): The ID of the model to use.
|
475 |
dtype (str): Data type for model computations ('int8', 'float16', or 'float32').
|
@@ -478,6 +546,7 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
478 |
start_time (float, optional): Start time in seconds for trimming audio.
|
479 |
end_time (float, optional): End time in seconds for trimming audio.
|
480 |
verbose (bool, optional): Whether to output verbose logging.
|
|
|
481 |
|
482 |
Yields:
|
483 |
Tuple[str, str, str or None]: Metrics and messages, transcription text, path to transcription file.
|
@@ -494,29 +563,28 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
494 |
if verbose:
|
495 |
yield verbose_messages, "", None
|
496 |
|
497 |
-
# Determine
|
498 |
audio_path = None
|
499 |
is_temp_file = False
|
500 |
|
501 |
-
if
|
502 |
-
if
|
503 |
-
#
|
504 |
-
audio_path
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
yield verbose_messages + error_msg, "", None
|
509 |
-
return
|
510 |
-
elif os.path.exists(input_source):
|
511 |
-
# Input source is a local file path
|
512 |
-
audio_path = input_source
|
513 |
is_temp_file = False
|
514 |
-
elif
|
515 |
-
#
|
516 |
-
audio_path =
|
517 |
-
|
|
|
|
|
|
|
|
|
518 |
else:
|
519 |
-
error_msg = "No
|
520 |
logging.error(error_msg)
|
521 |
yield verbose_messages + error_msg, "", None
|
522 |
return
|
@@ -601,9 +669,15 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
601 |
|
602 |
for segment in segments:
|
603 |
if pipeline_type in ["faster-batched", "faster-sequenced"]:
|
604 |
-
|
|
|
|
|
|
|
605 |
else:
|
606 |
-
|
|
|
|
|
|
|
607 |
transcription += transcription_segment
|
608 |
if verbose:
|
609 |
yield verbose_messages + metrics_output, transcription, None
|
@@ -627,39 +701,49 @@ with gr.Blocks() as iface:
|
|
627 |
gr.Markdown("Transcribe audio using multiple pipelines and (Faster) Whisper models.")
|
628 |
|
629 |
with gr.Row():
|
630 |
-
|
631 |
-
|
632 |
-
|
633 |
-
label="Pipeline Type",
|
634 |
-
value="faster-batched"
|
635 |
-
)
|
636 |
-
model_id = gr.Dropdown(
|
637 |
-
label="Model",
|
638 |
-
choices=get_model_options("faster-batched"),
|
639 |
-
value=get_model_options("faster-batched")[0]
|
640 |
-
)
|
641 |
-
|
642 |
-
with gr.Row():
|
643 |
-
dtype = gr.Dropdown(choices=["int8", "float16", "float32"], label="Data Type", value="int8")
|
644 |
-
batch_size = gr.Slider(minimum=1, maximum=32, step=1, value=16, label="Batch Size")
|
645 |
-
download_method = gr.Dropdown(
|
646 |
-
choices=["yt-dlp", "pytube", "youtube-dl", "yt-dlp-alt", "ffmpeg", "aria2", "wget"],
|
647 |
-
label="Download Method",
|
648 |
-
value="yt-dlp"
|
649 |
-
)
|
650 |
-
|
651 |
with gr.Row():
|
652 |
-
|
653 |
-
|
654 |
-
|
655 |
|
656 |
transcribe_button = gr.Button("Transcribe")
|
657 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
658 |
with gr.Row():
|
659 |
metrics_output = gr.Textbox(label="Transcription Metrics and Verbose Messages", lines=10)
|
660 |
transcription_output = gr.Textbox(label="Transcription", lines=10)
|
661 |
transcription_file = gr.File(label="Download Transcription")
|
662 |
-
|
663 |
def update_model_dropdown(pipeline_type):
|
664 |
"""
|
665 |
Updates the model dropdown choices based on the selected pipeline type.
|
@@ -681,7 +765,7 @@ with gr.Blocks() as iface:
|
|
681 |
logging.error(f"Error in update_model_dropdown: {str(e)}")
|
682 |
return gr.update(choices=["Error"], value="Error", visible=True)
|
683 |
|
684 |
-
#
|
685 |
pipeline_type.change(update_model_dropdown, inputs=[pipeline_type], outputs=[model_id])
|
686 |
|
687 |
def transcribe_with_progress(*args):
|
@@ -690,17 +774,16 @@ with gr.Blocks() as iface:
|
|
690 |
|
691 |
transcribe_button.click(
|
692 |
transcribe_with_progress,
|
693 |
-
inputs=[
|
694 |
outputs=[metrics_output, transcription_output, transcription_file]
|
695 |
)
|
696 |
|
697 |
gr.Examples(
|
698 |
examples=[
|
699 |
-
["https://www.youtube.com/watch?v=daQ_hqA6HDo", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", None, None,
|
700 |
-
["https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/
|
701 |
-
["path/to/local/audio.mp3", "transformers", "openai/whisper-large-v3", "float16", 16, "yt-dlp", 60, 180, True]
|
702 |
],
|
703 |
-
inputs=[
|
704 |
)
|
705 |
|
706 |
iface.launch(share=False, debug=True)
|
|
|
45 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
46 |
logging.info(f"Using device: {device}")
|
47 |
|
48 |
+
def download_audio(url, method_choice, proxy_url, proxy_username, proxy_password):
|
49 |
"""
|
50 |
+
Downloads audio from a given URL using the specified method and proxy settings.
|
51 |
|
52 |
Args:
|
53 |
url (str): The URL of the audio.
|
54 |
method_choice (str): The method to use for downloading audio.
|
55 |
+
proxy_url (str): Proxy URL if needed.
|
56 |
+
proxy_username (str): Proxy username.
|
57 |
+
proxy_password (str): Proxy password.
|
58 |
|
59 |
Returns:
|
60 |
tuple: (path to the downloaded audio file, is_temp_file), or (None, False) if failed.
|
|
|
63 |
logging.info(f"Downloading audio from URL: {url} using method: {method_choice}")
|
64 |
try:
|
65 |
if 'youtube.com' in parsed_url.netloc or 'youtu.be' in parsed_url.netloc:
|
66 |
+
audio_file = download_youtube_audio(url, method_choice, proxy_url, proxy_username, proxy_password)
|
67 |
if not audio_file:
|
68 |
error_msg = f"Failed to download audio from {url} using method {method_choice}. Ensure yt-dlp is up to date."
|
69 |
logging.error(error_msg)
|
70 |
return None, False
|
71 |
elif parsed_url.scheme == 'rtsp':
|
72 |
+
audio_file = download_rtsp_audio(url, proxy_url)
|
73 |
if not audio_file:
|
74 |
error_msg = f"Failed to download RTSP audio from {url}"
|
75 |
logging.error(error_msg)
|
76 |
return None, False
|
77 |
else:
|
78 |
+
audio_file = download_direct_audio(url, method_choice, proxy_url, proxy_username, proxy_password)
|
79 |
if not audio_file:
|
80 |
error_msg = f"Failed to download audio from {url} using method {method_choice}"
|
81 |
logging.error(error_msg)
|
|
|
87 |
return None, False
|
88 |
|
89 |
|
90 |
+
def download_youtube_audio(url, method_choice, proxy_url, proxy_username, proxy_password):
|
91 |
"""
|
92 |
Downloads audio from a YouTube URL using the specified method.
|
93 |
|
94 |
Args:
|
95 |
url (str): The YouTube URL.
|
96 |
method_choice (str): The method to use for downloading.
|
97 |
+
proxy_url (str): Proxy URL if needed.
|
98 |
+
proxy_username (str): Proxy username.
|
99 |
+
proxy_password (str): Proxy password.
|
100 |
|
101 |
Returns:
|
102 |
str: Path to the downloaded audio file, or None if failed.
|
|
|
108 |
method = methods.get(method_choice, yt_dlp_method)
|
109 |
try:
|
110 |
logging.info(f"Attempting to download YouTube audio using {method_choice}")
|
111 |
+
return method(url, proxy_url, proxy_username, proxy_password)
|
112 |
except Exception as e:
|
113 |
logging.error(f"Error downloading using {method_choice}: {str(e)}")
|
114 |
return None
|
115 |
|
116 |
+
def yt_dlp_method(url, proxy_url, proxy_username, proxy_password):
|
117 |
"""
|
118 |
Downloads YouTube audio using yt-dlp and saves it to a temporary file.
|
119 |
|
120 |
Args:
|
121 |
url (str): The YouTube URL.
|
122 |
+
proxy_url (str): Proxy URL if needed.
|
123 |
+
proxy_username (str): Proxy username.
|
124 |
+
proxy_password (str): Proxy password.
|
125 |
|
126 |
Returns:
|
127 |
str: Path to the downloaded audio file, or None if failed.
|
|
|
142 |
'logger': MyLogger(), # Use a custom logger to capture yt-dlp logs
|
143 |
'progress_hooks': [my_hook], # Hook to capture download progress and errors
|
144 |
}
|
145 |
+
if proxy_url and len(proxy_url.strip()) > 0:
|
146 |
+
ydl_opts['proxy'] = proxy_url
|
147 |
try:
|
148 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
149 |
info = ydl.extract_info(url, download=True)
|
|
|
185 |
elif d['status'] == 'error':
|
186 |
logging.error(f"Download error: {d['filename']}")
|
187 |
|
188 |
+
def pytube_method(url, proxy_url, proxy_username, proxy_password):
|
189 |
"""
|
190 |
Downloads audio from a YouTube URL using pytube and saves it to a temporary file.
|
191 |
|
192 |
Args:
|
193 |
url (str): The YouTube URL.
|
194 |
+
proxy_url (str): Proxy URL if needed.
|
195 |
+
proxy_username (str): Proxy username.
|
196 |
+
proxy_password (str): Proxy password.
|
197 |
|
198 |
Returns:
|
199 |
str: Path to the downloaded audio file, or None if failed.
|
|
|
201 |
logging.info("Using pytube method")
|
202 |
from pytube import YouTube
|
203 |
try:
|
204 |
+
proxies = None
|
205 |
+
if proxy_url and len(proxy_url.strip()) > 0:
|
206 |
+
proxies = {
|
207 |
+
"http": proxy_url,
|
208 |
+
"https": proxy_url
|
209 |
+
}
|
210 |
+
yt = YouTube(url, proxies=proxies)
|
211 |
audio_stream = yt.streams.filter(only_audio=True).first()
|
212 |
if audio_stream is None:
|
213 |
error_msg = "No audio streams available with pytube."
|
|
|
225 |
return None
|
226 |
|
227 |
|
228 |
+
def download_rtsp_audio(url, proxy_url):
|
229 |
"""
|
230 |
Downloads audio from an RTSP URL using FFmpeg.
|
231 |
|
232 |
Args:
|
233 |
url (str): The RTSP URL.
|
234 |
+
proxy_url (str): Proxy URL if needed.
|
235 |
|
236 |
Returns:
|
237 |
str: Path to the downloaded audio file, or None if failed.
|
|
|
239 |
logging.info("Using FFmpeg to download RTSP stream")
|
240 |
output_file = tempfile.mktemp(suffix='.mp3')
|
241 |
command = ['ffmpeg', '-i', url, '-acodec', 'libmp3lame', '-ab', '192k', '-y', output_file]
|
242 |
+
env = os.environ.copy()
|
243 |
+
if proxy_url and len(proxy_url.strip()) > 0:
|
244 |
+
env['http_proxy'] = proxy_url
|
245 |
+
env['https_proxy'] = proxy_url
|
246 |
try:
|
247 |
+
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
|
248 |
logging.info(f"Downloaded RTSP audio to: {output_file}")
|
249 |
return output_file
|
250 |
except subprocess.CalledProcessError as e:
|
|
|
254 |
logging.error(f"Error downloading RTSP audio: {str(e)}")
|
255 |
return None
|
256 |
|
257 |
+
def download_direct_audio(url, method_choice, proxy_url, proxy_username, proxy_password):
|
258 |
"""
|
259 |
Downloads audio from a direct URL using the specified method.
|
260 |
|
261 |
Args:
|
262 |
url (str): The direct URL of the audio file.
|
263 |
method_choice (str): The method to use for downloading.
|
264 |
+
proxy_url (str): Proxy URL if needed.
|
265 |
+
proxy_username (str): Proxy username.
|
266 |
+
proxy_password (str): Proxy password.
|
267 |
|
268 |
Returns:
|
269 |
str: Path to the downloaded audio file, or None if failed.
|
|
|
278 |
}
|
279 |
method = methods.get(method_choice, requests_method)
|
280 |
try:
|
281 |
+
audio_file = method(url, proxy_url, proxy_username, proxy_password)
|
282 |
if not audio_file or not os.path.exists(audio_file):
|
283 |
error_msg = f"Failed to download direct audio from {url} using method {method_choice}"
|
284 |
logging.error(error_msg)
|
|
|
288 |
logging.error(f"Error downloading direct audio with {method_choice}: {str(e)}")
|
289 |
return None
|
290 |
|
291 |
+
def requests_method(url, proxy_url, proxy_username, proxy_password):
|
292 |
"""
|
293 |
Downloads audio using the requests library.
|
294 |
|
295 |
Args:
|
296 |
url (str): The URL of the audio file.
|
297 |
+
proxy_url (str): Proxy URL if needed.
|
298 |
+
proxy_username (str): Proxy username.
|
299 |
+
proxy_password (str): Proxy password.
|
300 |
|
301 |
Returns:
|
302 |
str: Path to the downloaded audio file, or None if failed.
|
303 |
"""
|
304 |
try:
|
305 |
+
proxies = None
|
306 |
+
auth = None
|
307 |
+
if proxy_url and len(proxy_url.strip()) > 0:
|
308 |
+
proxies = {
|
309 |
+
"http": proxy_url,
|
310 |
+
"https": proxy_url
|
311 |
+
}
|
312 |
+
if proxy_username and proxy_password:
|
313 |
+
auth = (proxy_username, proxy_password)
|
314 |
+
response = requests.get(url, stream=True, proxies=proxies, auth=auth)
|
315 |
if response.status_code == 200:
|
316 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
|
317 |
for chunk in response.iter_content(chunk_size=8192):
|
|
|
326 |
logging.error(f"Error in requests_method: {str(e)}")
|
327 |
return None
|
328 |
|
329 |
+
def wget_method(url, proxy_url, proxy_username, proxy_password):
|
330 |
"""
|
331 |
Downloads audio using the wget command-line tool.
|
332 |
|
333 |
Args:
|
334 |
url (str): The URL of the audio file.
|
335 |
+
proxy_url (str): Proxy URL if needed.
|
336 |
+
proxy_username (str): Proxy username.
|
337 |
+
proxy_password (str): Proxy password.
|
338 |
|
339 |
Returns:
|
340 |
str: Path to the downloaded audio file, or None if failed.
|
|
|
342 |
logging.info("Using wget method")
|
343 |
output_file = tempfile.mktemp(suffix='.mp3')
|
344 |
command = ['wget', '-O', output_file, url]
|
345 |
+
env = os.environ.copy()
|
346 |
+
if proxy_url and len(proxy_url.strip()) > 0:
|
347 |
+
env['http_proxy'] = proxy_url
|
348 |
+
env['https_proxy'] = proxy_url
|
349 |
try:
|
350 |
+
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
|
351 |
logging.info(f"Downloaded audio to: {output_file}")
|
352 |
return output_file
|
353 |
except subprocess.CalledProcessError as e:
|
|
|
357 |
logging.error(f"Error in wget_method: {str(e)}")
|
358 |
return None
|
359 |
|
360 |
+
def yt_dlp_direct_method(url, proxy_url, proxy_username, proxy_password):
|
361 |
"""
|
362 |
Downloads audio using yt-dlp (supports various protocols and sites).
|
363 |
|
364 |
Args:
|
365 |
url (str): The URL of the audio or webpage containing audio.
|
366 |
+
proxy_url (str): Proxy URL if needed.
|
367 |
+
proxy_username (str): Proxy username.
|
368 |
+
proxy_password (str): Proxy password.
|
369 |
|
370 |
Returns:
|
371 |
str: Path to the downloaded audio file, or None if failed.
|
|
|
383 |
'preferredquality': '192',
|
384 |
}],
|
385 |
}
|
386 |
+
if proxy_url and len(proxy_url.strip()) > 0:
|
387 |
+
ydl_opts['proxy'] = proxy_url
|
388 |
try:
|
389 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
390 |
ydl.download([url])
|
|
|
394 |
logging.error(f"Error in yt_dlp_direct_method: {str(e)}")
|
395 |
return None
|
396 |
|
397 |
+
def ffmpeg_method(url, proxy_url, proxy_username, proxy_password):
|
398 |
"""
|
399 |
Downloads audio using FFmpeg.
|
400 |
|
401 |
Args:
|
402 |
url (str): The URL of the audio file.
|
403 |
+
proxy_url (str): Proxy URL if needed.
|
404 |
+
proxy_username (str): Proxy username.
|
405 |
+
proxy_password (str): Proxy password.
|
406 |
|
407 |
Returns:
|
408 |
str: Path to the downloaded audio file, or None if failed.
|
|
|
410 |
logging.info("Using ffmpeg method")
|
411 |
output_file = tempfile.mktemp(suffix='.mp3')
|
412 |
command = ['ffmpeg', '-i', url, '-vn', '-acodec', 'libmp3lame', '-q:a', '2', output_file]
|
413 |
+
env = os.environ.copy()
|
414 |
+
if proxy_url and len(proxy_url.strip()) > 0:
|
415 |
+
env['http_proxy'] = proxy_url
|
416 |
+
env['https_proxy'] = proxy_url
|
417 |
try:
|
418 |
+
subprocess.run(command, check=True, capture_output=True, text=True, env=env)
|
419 |
logging.info(f"Downloaded and converted audio to: {output_file}")
|
420 |
return output_file
|
421 |
except subprocess.CalledProcessError as e:
|
|
|
425 |
logging.error(f"Error in ffmpeg_method: {str(e)}")
|
426 |
return None
|
427 |
|
428 |
+
def aria2_method(url, proxy_url, proxy_username, proxy_password):
|
429 |
"""
|
430 |
Downloads audio using aria2.
|
431 |
|
432 |
Args:
|
433 |
url (str): The URL of the audio file.
|
434 |
+
proxy_url (str): Proxy URL if needed.
|
435 |
+
proxy_username (str): Proxy username.
|
436 |
+
proxy_password (str): Proxy password.
|
437 |
|
438 |
Returns:
|
439 |
str: Path to the downloaded audio file, or None if failed.
|
|
|
441 |
logging.info("Using aria2 method")
|
442 |
output_file = tempfile.mktemp(suffix='.mp3')
|
443 |
command = ['aria2c', '--split=4', '--max-connection-per-server=4', '--out', output_file, url]
|
444 |
+
if proxy_url and len(proxy_url.strip()) > 0:
|
445 |
+
command.extend(['--all-proxy', proxy_url])
|
446 |
try:
|
447 |
subprocess.run(command, check=True, capture_output=True, text=True)
|
448 |
logging.info(f"Downloaded audio to: {output_file}")
|
|
|
466 |
Returns:
|
467 |
str: Path to the trimmed audio file.
|
468 |
|
469 |
+
Raises:
|
470 |
+
gr.Error: If invalid start or end times are provided.
|
471 |
"""
|
472 |
try:
|
473 |
logging.info(f"Trimming audio from {start_time} to {end_time}")
|
|
|
528 |
# Dictionary to store loaded models
|
529 |
loaded_models = {}
|
530 |
|
531 |
+
def transcribe_audio(audio_upload, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False, include_timecodes=False):
|
532 |
"""
|
533 |
Transcribes audio from a given source using the specified pipeline and model.
|
534 |
|
535 |
Args:
|
536 |
+
audio_upload (file): Uploaded audio file.
|
537 |
+
audio_url (str): URL of audio.
|
538 |
+
proxy_url (str): Proxy URL if needed.
|
539 |
+
proxy_username (str): Proxy username.
|
540 |
+
proxy_password (str): Proxy password.
|
541 |
pipeline_type (str): Type of pipeline to use ('faster-batched', 'faster-sequenced', or 'transformers').
|
542 |
model_id (str): The ID of the model to use.
|
543 |
dtype (str): Data type for model computations ('int8', 'float16', or 'float32').
|
|
|
546 |
start_time (float, optional): Start time in seconds for trimming audio.
|
547 |
end_time (float, optional): End time in seconds for trimming audio.
|
548 |
verbose (bool, optional): Whether to output verbose logging.
|
549 |
+
include_timecodes (bool, optional): Whether to include timecodes in the transcription.
|
550 |
|
551 |
Yields:
|
552 |
Tuple[str, str, str or None]: Metrics and messages, transcription text, path to transcription file.
|
|
|
563 |
if verbose:
|
564 |
yield verbose_messages, "", None
|
565 |
|
566 |
+
# Determine the audio source
|
567 |
audio_path = None
|
568 |
is_temp_file = False
|
569 |
|
570 |
+
if audio_upload is not None:
|
571 |
+
if isinstance(audio_upload, dict) and 'name' in audio_upload:
|
572 |
+
# audio_upload is a dict with file info
|
573 |
+
audio_path = audio_upload['name']
|
574 |
+
is_temp_file = False
|
575 |
+
elif isinstance(audio_upload, str) and os.path.exists(audio_upload):
|
576 |
+
audio_path = audio_upload
|
|
|
|
|
|
|
|
|
|
|
577 |
is_temp_file = False
|
578 |
+
elif audio_url is not None and len(audio_url.strip()) > 0:
|
579 |
+
# audio_url is provided
|
580 |
+
audio_path, is_temp_file = download_audio(audio_url, download_method, proxy_url, proxy_username, proxy_password)
|
581 |
+
if not audio_path:
|
582 |
+
error_msg = f"Error downloading audio from {audio_url} using method {download_method}. Check logs for details."
|
583 |
+
logging.error(error_msg)
|
584 |
+
yield verbose_messages + error_msg, "", None
|
585 |
+
return
|
586 |
else:
|
587 |
+
error_msg = "No audio source provided. Please upload an audio file or enter a URL."
|
588 |
logging.error(error_msg)
|
589 |
yield verbose_messages + error_msg, "", None
|
590 |
return
|
|
|
669 |
|
670 |
for segment in segments:
|
671 |
if pipeline_type in ["faster-batched", "faster-sequenced"]:
|
672 |
+
if include_timecodes:
|
673 |
+
transcription_segment = f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}\n"
|
674 |
+
else:
|
675 |
+
transcription_segment = f"{segment.text}\n"
|
676 |
else:
|
677 |
+
if include_timecodes:
|
678 |
+
transcription_segment = f"[{segment['timestamp'][0]:.2f}s -> {segment['timestamp'][1]:.2f}s] {segment['text']}\n"
|
679 |
+
else:
|
680 |
+
transcription_segment = f"{segment['text']}\n"
|
681 |
transcription += transcription_segment
|
682 |
if verbose:
|
683 |
yield verbose_messages + metrics_output, transcription, None
|
|
|
701 |
gr.Markdown("Transcribe audio using multiple pipelines and (Faster) Whisper models.")
|
702 |
|
703 |
with gr.Row():
|
704 |
+
audio_upload = gr.Audio(label="Upload or Record Audio", source="upload")
|
705 |
+
audio_url = gr.Textbox(label="Or Enter URL of audio file or YouTube link")
|
706 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
707 |
with gr.Row():
|
708 |
+
proxy_url = gr.Textbox(label="Proxy URL", placeholder="Enter proxy URL if needed", value="", lines=1)
|
709 |
+
proxy_username = gr.Textbox(label="Proxy Username", placeholder="Proxy username (optional)", value="", lines=1)
|
710 |
+
proxy_password = gr.Textbox(label="Proxy Password", placeholder="Proxy password (optional)", value="", lines=1, type="password")
|
711 |
|
712 |
transcribe_button = gr.Button("Transcribe")
|
713 |
+
|
714 |
+
with gr.Accordion("Advanced Options", open=False):
|
715 |
+
with gr.Row():
|
716 |
+
pipeline_type = gr.Dropdown(
|
717 |
+
choices=["faster-batched", "faster-sequenced", "transformers"],
|
718 |
+
label="Pipeline Type",
|
719 |
+
value="faster-batched"
|
720 |
+
)
|
721 |
+
model_id = gr.Dropdown(
|
722 |
+
label="Model",
|
723 |
+
choices=get_model_options("faster-batched"),
|
724 |
+
value="cstr/whisper-large-v3-turbo-int8_float32"
|
725 |
+
)
|
726 |
+
|
727 |
+
with gr.Row():
|
728 |
+
dtype = gr.Dropdown(choices=["int8", "float16", "float32"], label="Data Type", value="int8")
|
729 |
+
batch_size = gr.Slider(minimum=1, maximum=32, step=1, value=16, label="Batch Size")
|
730 |
+
download_method = gr.Dropdown(
|
731 |
+
choices=["yt-dlp", "pytube", "youtube-dl", "yt-dlp-alt", "ffmpeg", "aria2", "wget"],
|
732 |
+
label="Download Method",
|
733 |
+
value="yt-dlp"
|
734 |
+
)
|
735 |
+
|
736 |
+
with gr.Row():
|
737 |
+
start_time = gr.Number(label="Start Time (seconds)", value=None, minimum=0)
|
738 |
+
end_time = gr.Number(label="End Time (seconds)", value=None, minimum=0)
|
739 |
+
verbose = gr.Checkbox(label="Verbose Output", value=False)
|
740 |
+
include_timecodes = gr.Checkbox(label="Include timecodes in transcription", value=False)
|
741 |
+
|
742 |
with gr.Row():
|
743 |
metrics_output = gr.Textbox(label="Transcription Metrics and Verbose Messages", lines=10)
|
744 |
transcription_output = gr.Textbox(label="Transcription", lines=10)
|
745 |
transcription_file = gr.File(label="Download Transcription")
|
746 |
+
|
747 |
def update_model_dropdown(pipeline_type):
|
748 |
"""
|
749 |
Updates the model dropdown choices based on the selected pipeline type.
|
|
|
765 |
logging.error(f"Error in update_model_dropdown: {str(e)}")
|
766 |
return gr.update(choices=["Error"], value="Error", visible=True)
|
767 |
|
768 |
+
# Event handler for pipeline_type change
|
769 |
pipeline_type.change(update_model_dropdown, inputs=[pipeline_type], outputs=[model_id])
|
770 |
|
771 |
def transcribe_with_progress(*args):
|
|
|
774 |
|
775 |
transcribe_button.click(
|
776 |
transcribe_with_progress,
|
777 |
+
inputs=[audio_upload, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose, include_timecodes],
|
778 |
outputs=[metrics_output, transcription_output, transcription_file]
|
779 |
)
|
780 |
|
781 |
gr.Examples(
|
782 |
examples=[
|
783 |
+
[None, "https://www.youtube.com/watch?v=daQ_hqA6HDo", "", "", "", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", None, None, False, False],
|
784 |
+
[None, "https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453.mp3", "", "", "", "faster-sequenced", "SYSTRAN/faster-whisper-large-v1", "float16", 1, "ffmpeg", 0, 300, False, False],
|
|
|
785 |
],
|
786 |
+
inputs=[audio_upload, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose, include_timecodes],
|
787 |
)
|
788 |
|
789 |
iface.launch(share=False, debug=True)
|