cstr commited on
Commit
d290706
·
verified ·
1 Parent(s): 60c0a37

+px +ux -tc

Browse files
Files changed (1) hide show
  1. app.py +161 -78
app.py CHANGED
@@ -45,13 +45,16 @@ from faster_whisper.transcribe import BatchedInferencePipeline
45
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
46
  logging.info(f"Using device: {device}")
47
 
48
- def download_audio(url, method_choice):
49
  """
50
- Downloads audio from a given URL using the specified method.
51
 
52
  Args:
53
  url (str): The URL of the audio.
54
  method_choice (str): The method to use for downloading audio.
 
 
 
55
 
56
  Returns:
57
  tuple: (path to the downloaded audio file, is_temp_file), or (None, False) if failed.
@@ -60,19 +63,19 @@ def download_audio(url, method_choice):
60
  logging.info(f"Downloading audio from URL: {url} using method: {method_choice}")
61
  try:
62
  if 'youtube.com' in parsed_url.netloc or 'youtu.be' in parsed_url.netloc:
63
- audio_file = download_youtube_audio(url, method_choice)
64
  if not audio_file:
65
  error_msg = f"Failed to download audio from {url} using method {method_choice}. Ensure yt-dlp is up to date."
66
  logging.error(error_msg)
67
  return None, False
68
  elif parsed_url.scheme == 'rtsp':
69
- audio_file = download_rtsp_audio(url)
70
  if not audio_file:
71
  error_msg = f"Failed to download RTSP audio from {url}"
72
  logging.error(error_msg)
73
  return None, False
74
  else:
75
- audio_file = download_direct_audio(url, method_choice)
76
  if not audio_file:
77
  error_msg = f"Failed to download audio from {url} using method {method_choice}"
78
  logging.error(error_msg)
@@ -84,13 +87,16 @@ def download_audio(url, method_choice):
84
  return None, False
85
 
86
 
87
- def download_youtube_audio(url, method_choice):
88
  """
89
  Downloads audio from a YouTube URL using the specified method.
90
 
91
  Args:
92
  url (str): The YouTube URL.
93
  method_choice (str): The method to use for downloading.
 
 
 
94
 
95
  Returns:
96
  str: Path to the downloaded audio file, or None if failed.
@@ -102,17 +108,20 @@ def download_youtube_audio(url, method_choice):
102
  method = methods.get(method_choice, yt_dlp_method)
103
  try:
104
  logging.info(f"Attempting to download YouTube audio using {method_choice}")
105
- return method(url)
106
  except Exception as e:
107
  logging.error(f"Error downloading using {method_choice}: {str(e)}")
108
  return None
109
 
110
- def yt_dlp_method(url):
111
  """
112
  Downloads YouTube audio using yt-dlp and saves it to a temporary file.
113
 
114
  Args:
115
  url (str): The YouTube URL.
 
 
 
116
 
117
  Returns:
118
  str: Path to the downloaded audio file, or None if failed.
@@ -133,6 +142,8 @@ def yt_dlp_method(url):
133
  'logger': MyLogger(), # Use a custom logger to capture yt-dlp logs
134
  'progress_hooks': [my_hook], # Hook to capture download progress and errors
135
  }
 
 
136
  try:
137
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
138
  info = ydl.extract_info(url, download=True)
@@ -174,12 +185,15 @@ def my_hook(d):
174
  elif d['status'] == 'error':
175
  logging.error(f"Download error: {d['filename']}")
176
 
177
- def pytube_method(url):
178
  """
179
  Downloads audio from a YouTube URL using pytube and saves it to a temporary file.
180
 
181
  Args:
182
  url (str): The YouTube URL.
 
 
 
183
 
184
  Returns:
185
  str: Path to the downloaded audio file, or None if failed.
@@ -187,7 +201,13 @@ def pytube_method(url):
187
  logging.info("Using pytube method")
188
  from pytube import YouTube
189
  try:
190
- yt = YouTube(url)
 
 
 
 
 
 
191
  audio_stream = yt.streams.filter(only_audio=True).first()
192
  if audio_stream is None:
193
  error_msg = "No audio streams available with pytube."
@@ -205,12 +225,13 @@ def pytube_method(url):
205
  return None
206
 
207
 
208
- def download_rtsp_audio(url):
209
  """
210
  Downloads audio from an RTSP URL using FFmpeg.
211
 
212
  Args:
213
  url (str): The RTSP URL.
 
214
 
215
  Returns:
216
  str: Path to the downloaded audio file, or None if failed.
@@ -218,8 +239,12 @@ def download_rtsp_audio(url):
218
  logging.info("Using FFmpeg to download RTSP stream")
219
  output_file = tempfile.mktemp(suffix='.mp3')
220
  command = ['ffmpeg', '-i', url, '-acodec', 'libmp3lame', '-ab', '192k', '-y', output_file]
 
 
 
 
221
  try:
222
- subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
223
  logging.info(f"Downloaded RTSP audio to: {output_file}")
224
  return output_file
225
  except subprocess.CalledProcessError as e:
@@ -229,13 +254,16 @@ def download_rtsp_audio(url):
229
  logging.error(f"Error downloading RTSP audio: {str(e)}")
230
  return None
231
 
232
- def download_direct_audio(url, method_choice):
233
  """
234
  Downloads audio from a direct URL using the specified method.
235
 
236
  Args:
237
  url (str): The direct URL of the audio file.
238
  method_choice (str): The method to use for downloading.
 
 
 
239
 
240
  Returns:
241
  str: Path to the downloaded audio file, or None if failed.
@@ -250,7 +278,7 @@ def download_direct_audio(url, method_choice):
250
  }
251
  method = methods.get(method_choice, requests_method)
252
  try:
253
- audio_file = method(url)
254
  if not audio_file or not os.path.exists(audio_file):
255
  error_msg = f"Failed to download direct audio from {url} using method {method_choice}"
256
  logging.error(error_msg)
@@ -260,18 +288,30 @@ def download_direct_audio(url, method_choice):
260
  logging.error(f"Error downloading direct audio with {method_choice}: {str(e)}")
261
  return None
262
 
263
- def requests_method(url):
264
  """
265
  Downloads audio using the requests library.
266
 
267
  Args:
268
  url (str): The URL of the audio file.
 
 
 
269
 
270
  Returns:
271
  str: Path to the downloaded audio file, or None if failed.
272
  """
273
  try:
274
- response = requests.get(url, stream=True)
 
 
 
 
 
 
 
 
 
275
  if response.status_code == 200:
276
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
277
  for chunk in response.iter_content(chunk_size=8192):
@@ -286,12 +326,15 @@ def requests_method(url):
286
  logging.error(f"Error in requests_method: {str(e)}")
287
  return None
288
 
289
- def wget_method(url):
290
  """
291
  Downloads audio using the wget command-line tool.
292
 
293
  Args:
294
  url (str): The URL of the audio file.
 
 
 
295
 
296
  Returns:
297
  str: Path to the downloaded audio file, or None if failed.
@@ -299,8 +342,12 @@ def wget_method(url):
299
  logging.info("Using wget method")
300
  output_file = tempfile.mktemp(suffix='.mp3')
301
  command = ['wget', '-O', output_file, url]
 
 
 
 
302
  try:
303
- subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
304
  logging.info(f"Downloaded audio to: {output_file}")
305
  return output_file
306
  except subprocess.CalledProcessError as e:
@@ -310,12 +357,15 @@ def wget_method(url):
310
  logging.error(f"Error in wget_method: {str(e)}")
311
  return None
312
 
313
- def yt_dlp_direct_method(url):
314
  """
315
  Downloads audio using yt-dlp (supports various protocols and sites).
316
 
317
  Args:
318
  url (str): The URL of the audio or webpage containing audio.
 
 
 
319
 
320
  Returns:
321
  str: Path to the downloaded audio file, or None if failed.
@@ -333,6 +383,8 @@ def yt_dlp_direct_method(url):
333
  'preferredquality': '192',
334
  }],
335
  }
 
 
336
  try:
337
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
338
  ydl.download([url])
@@ -342,12 +394,15 @@ def yt_dlp_direct_method(url):
342
  logging.error(f"Error in yt_dlp_direct_method: {str(e)}")
343
  return None
344
 
345
- def ffmpeg_method(url):
346
  """
347
  Downloads audio using FFmpeg.
348
 
349
  Args:
350
  url (str): The URL of the audio file.
 
 
 
351
 
352
  Returns:
353
  str: Path to the downloaded audio file, or None if failed.
@@ -355,8 +410,12 @@ def ffmpeg_method(url):
355
  logging.info("Using ffmpeg method")
356
  output_file = tempfile.mktemp(suffix='.mp3')
357
  command = ['ffmpeg', '-i', url, '-vn', '-acodec', 'libmp3lame', '-q:a', '2', output_file]
 
 
 
 
358
  try:
359
- subprocess.run(command, check=True, capture_output=True, text=True)
360
  logging.info(f"Downloaded and converted audio to: {output_file}")
361
  return output_file
362
  except subprocess.CalledProcessError as e:
@@ -366,12 +425,15 @@ def ffmpeg_method(url):
366
  logging.error(f"Error in ffmpeg_method: {str(e)}")
367
  return None
368
 
369
- def aria2_method(url):
370
  """
371
  Downloads audio using aria2.
372
 
373
  Args:
374
  url (str): The URL of the audio file.
 
 
 
375
 
376
  Returns:
377
  str: Path to the downloaded audio file, or None if failed.
@@ -379,6 +441,8 @@ def aria2_method(url):
379
  logging.info("Using aria2 method")
380
  output_file = tempfile.mktemp(suffix='.mp3')
381
  command = ['aria2c', '--split=4', '--max-connection-per-server=4', '--out', output_file, url]
 
 
382
  try:
383
  subprocess.run(command, check=True, capture_output=True, text=True)
384
  logging.info(f"Downloaded audio to: {output_file}")
@@ -402,8 +466,8 @@ def trim_audio(audio_path, start_time, end_time):
402
  Returns:
403
  str: Path to the trimmed audio file.
404
 
405
- Raises:
406
- gr.Error: If invalid start or end times are provided.
407
  """
408
  try:
409
  logging.info(f"Trimming audio from {start_time} to {end_time}")
@@ -464,12 +528,16 @@ def get_model_options(pipeline_type):
464
  # Dictionary to store loaded models
465
  loaded_models = {}
466
 
467
- def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
468
  """
469
  Transcribes audio from a given source using the specified pipeline and model.
470
 
471
  Args:
472
- input_source (str or file): URL of audio, path to local file, or uploaded file object.
 
 
 
 
473
  pipeline_type (str): Type of pipeline to use ('faster-batched', 'faster-sequenced', or 'transformers').
474
  model_id (str): The ID of the model to use.
475
  dtype (str): Data type for model computations ('int8', 'float16', or 'float32').
@@ -478,6 +546,7 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
478
  start_time (float, optional): Start time in seconds for trimming audio.
479
  end_time (float, optional): End time in seconds for trimming audio.
480
  verbose (bool, optional): Whether to output verbose logging.
 
481
 
482
  Yields:
483
  Tuple[str, str, str or None]: Metrics and messages, transcription text, path to transcription file.
@@ -494,29 +563,28 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
494
  if verbose:
495
  yield verbose_messages, "", None
496
 
497
- # Determine if input_source is a URL, file path, or uploaded audio
498
  audio_path = None
499
  is_temp_file = False
500
 
501
- if isinstance(input_source, str):
502
- if input_source.startswith('http://') or input_source.startswith('https://'):
503
- # Input source is a URL
504
- audio_path, is_temp_file = download_audio(input_source, download_method)
505
- if not audio_path:
506
- error_msg = f"Error downloading audio from {input_source} using method {download_method}. Check logs for details."
507
- logging.error(error_msg)
508
- yield verbose_messages + error_msg, "", None
509
- return
510
- elif os.path.exists(input_source):
511
- # Input source is a local file path
512
- audio_path = input_source
513
  is_temp_file = False
514
- elif isinstance(input_source, dict) and 'path' in input_source:
515
- # Input source is an uploaded audio file
516
- audio_path = input_source['path']
517
- is_temp_file = False
 
 
 
 
518
  else:
519
- error_msg = "No valid audio source provided."
520
  logging.error(error_msg)
521
  yield verbose_messages + error_msg, "", None
522
  return
@@ -601,9 +669,15 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
601
 
602
  for segment in segments:
603
  if pipeline_type in ["faster-batched", "faster-sequenced"]:
604
- transcription_segment = f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}\n"
 
 
 
605
  else:
606
- transcription_segment = f"[{segment['timestamp'][0]:.2f}s -> {segment['timestamp'][1]:.2f}s] {segment['text']}\n"
 
 
 
607
  transcription += transcription_segment
608
  if verbose:
609
  yield verbose_messages + metrics_output, transcription, None
@@ -627,39 +701,49 @@ with gr.Blocks() as iface:
627
  gr.Markdown("Transcribe audio using multiple pipelines and (Faster) Whisper models.")
628
 
629
  with gr.Row():
630
- input_source = gr.Audio(label="Audio Source (Upload a file or enter a URL/YouTube URL)")
631
- pipeline_type = gr.Dropdown(
632
- choices=["faster-batched", "faster-sequenced", "transformers"],
633
- label="Pipeline Type",
634
- value="faster-batched"
635
- )
636
- model_id = gr.Dropdown(
637
- label="Model",
638
- choices=get_model_options("faster-batched"),
639
- value=get_model_options("faster-batched")[0]
640
- )
641
-
642
- with gr.Row():
643
- dtype = gr.Dropdown(choices=["int8", "float16", "float32"], label="Data Type", value="int8")
644
- batch_size = gr.Slider(minimum=1, maximum=32, step=1, value=16, label="Batch Size")
645
- download_method = gr.Dropdown(
646
- choices=["yt-dlp", "pytube", "youtube-dl", "yt-dlp-alt", "ffmpeg", "aria2", "wget"],
647
- label="Download Method",
648
- value="yt-dlp"
649
- )
650
-
651
  with gr.Row():
652
- start_time = gr.Number(label="Start Time (seconds)", value=None, minimum=0)
653
- end_time = gr.Number(label="End Time (seconds)", value=None, minimum=0)
654
- verbose = gr.Checkbox(label="Verbose Output", value=True) # Set to True by default
655
 
656
  transcribe_button = gr.Button("Transcribe")
657
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
658
  with gr.Row():
659
  metrics_output = gr.Textbox(label="Transcription Metrics and Verbose Messages", lines=10)
660
  transcription_output = gr.Textbox(label="Transcription", lines=10)
661
  transcription_file = gr.File(label="Download Transcription")
662
-
663
  def update_model_dropdown(pipeline_type):
664
  """
665
  Updates the model dropdown choices based on the selected pipeline type.
@@ -681,7 +765,7 @@ with gr.Blocks() as iface:
681
  logging.error(f"Error in update_model_dropdown: {str(e)}")
682
  return gr.update(choices=["Error"], value="Error", visible=True)
683
 
684
- # event handler for pipeline_type change
685
  pipeline_type.change(update_model_dropdown, inputs=[pipeline_type], outputs=[model_id])
686
 
687
  def transcribe_with_progress(*args):
@@ -690,17 +774,16 @@ with gr.Blocks() as iface:
690
 
691
  transcribe_button.click(
692
  transcribe_with_progress,
693
- inputs=[input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose],
694
  outputs=[metrics_output, transcription_output, transcription_file]
695
  )
696
 
697
  gr.Examples(
698
  examples=[
699
- ["https://www.youtube.com/watch?v=daQ_hqA6HDo", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", None, None, True],
700
- ["https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453_-_The_Price_is_Right_-_Law_and_Economics_in_the_Second_Scholastic5yxzh.mp3", "faster-sequenced", "deepdml/faster-whisper-large-v3-turbo-ct2", "float16", 1, "ffmpeg", 0, 300, True],
701
- ["path/to/local/audio.mp3", "transformers", "openai/whisper-large-v3", "float16", 16, "yt-dlp", 60, 180, True]
702
  ],
703
- inputs=[input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose],
704
  )
705
 
706
  iface.launch(share=False, debug=True)
 
45
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
46
  logging.info(f"Using device: {device}")
47
 
48
+ def download_audio(url, method_choice, proxy_url, proxy_username, proxy_password):
49
  """
50
+ Downloads audio from a given URL using the specified method and proxy settings.
51
 
52
  Args:
53
  url (str): The URL of the audio.
54
  method_choice (str): The method to use for downloading audio.
55
+ proxy_url (str): Proxy URL if needed.
56
+ proxy_username (str): Proxy username.
57
+ proxy_password (str): Proxy password.
58
 
59
  Returns:
60
  tuple: (path to the downloaded audio file, is_temp_file), or (None, False) if failed.
 
63
  logging.info(f"Downloading audio from URL: {url} using method: {method_choice}")
64
  try:
65
  if 'youtube.com' in parsed_url.netloc or 'youtu.be' in parsed_url.netloc:
66
+ audio_file = download_youtube_audio(url, method_choice, proxy_url, proxy_username, proxy_password)
67
  if not audio_file:
68
  error_msg = f"Failed to download audio from {url} using method {method_choice}. Ensure yt-dlp is up to date."
69
  logging.error(error_msg)
70
  return None, False
71
  elif parsed_url.scheme == 'rtsp':
72
+ audio_file = download_rtsp_audio(url, proxy_url)
73
  if not audio_file:
74
  error_msg = f"Failed to download RTSP audio from {url}"
75
  logging.error(error_msg)
76
  return None, False
77
  else:
78
+ audio_file = download_direct_audio(url, method_choice, proxy_url, proxy_username, proxy_password)
79
  if not audio_file:
80
  error_msg = f"Failed to download audio from {url} using method {method_choice}"
81
  logging.error(error_msg)
 
87
  return None, False
88
 
89
 
90
+ def download_youtube_audio(url, method_choice, proxy_url, proxy_username, proxy_password):
91
  """
92
  Downloads audio from a YouTube URL using the specified method.
93
 
94
  Args:
95
  url (str): The YouTube URL.
96
  method_choice (str): The method to use for downloading.
97
+ proxy_url (str): Proxy URL if needed.
98
+ proxy_username (str): Proxy username.
99
+ proxy_password (str): Proxy password.
100
 
101
  Returns:
102
  str: Path to the downloaded audio file, or None if failed.
 
108
  method = methods.get(method_choice, yt_dlp_method)
109
  try:
110
  logging.info(f"Attempting to download YouTube audio using {method_choice}")
111
+ return method(url, proxy_url, proxy_username, proxy_password)
112
  except Exception as e:
113
  logging.error(f"Error downloading using {method_choice}: {str(e)}")
114
  return None
115
 
116
+ def yt_dlp_method(url, proxy_url, proxy_username, proxy_password):
117
  """
118
  Downloads YouTube audio using yt-dlp and saves it to a temporary file.
119
 
120
  Args:
121
  url (str): The YouTube URL.
122
+ proxy_url (str): Proxy URL if needed.
123
+ proxy_username (str): Proxy username.
124
+ proxy_password (str): Proxy password.
125
 
126
  Returns:
127
  str: Path to the downloaded audio file, or None if failed.
 
142
  'logger': MyLogger(), # Use a custom logger to capture yt-dlp logs
143
  'progress_hooks': [my_hook], # Hook to capture download progress and errors
144
  }
145
+ if proxy_url and len(proxy_url.strip()) > 0:
146
+ ydl_opts['proxy'] = proxy_url
147
  try:
148
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
149
  info = ydl.extract_info(url, download=True)
 
185
  elif d['status'] == 'error':
186
  logging.error(f"Download error: {d['filename']}")
187
 
188
+ def pytube_method(url, proxy_url, proxy_username, proxy_password):
189
  """
190
  Downloads audio from a YouTube URL using pytube and saves it to a temporary file.
191
 
192
  Args:
193
  url (str): The YouTube URL.
194
+ proxy_url (str): Proxy URL if needed.
195
+ proxy_username (str): Proxy username.
196
+ proxy_password (str): Proxy password.
197
 
198
  Returns:
199
  str: Path to the downloaded audio file, or None if failed.
 
201
  logging.info("Using pytube method")
202
  from pytube import YouTube
203
  try:
204
+ proxies = None
205
+ if proxy_url and len(proxy_url.strip()) > 0:
206
+ proxies = {
207
+ "http": proxy_url,
208
+ "https": proxy_url
209
+ }
210
+ yt = YouTube(url, proxies=proxies)
211
  audio_stream = yt.streams.filter(only_audio=True).first()
212
  if audio_stream is None:
213
  error_msg = "No audio streams available with pytube."
 
225
  return None
226
 
227
 
228
+ def download_rtsp_audio(url, proxy_url):
229
  """
230
  Downloads audio from an RTSP URL using FFmpeg.
231
 
232
  Args:
233
  url (str): The RTSP URL.
234
+ proxy_url (str): Proxy URL if needed.
235
 
236
  Returns:
237
  str: Path to the downloaded audio file, or None if failed.
 
239
  logging.info("Using FFmpeg to download RTSP stream")
240
  output_file = tempfile.mktemp(suffix='.mp3')
241
  command = ['ffmpeg', '-i', url, '-acodec', 'libmp3lame', '-ab', '192k', '-y', output_file]
242
+ env = os.environ.copy()
243
+ if proxy_url and len(proxy_url.strip()) > 0:
244
+ env['http_proxy'] = proxy_url
245
+ env['https_proxy'] = proxy_url
246
  try:
247
+ subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
248
  logging.info(f"Downloaded RTSP audio to: {output_file}")
249
  return output_file
250
  except subprocess.CalledProcessError as e:
 
254
  logging.error(f"Error downloading RTSP audio: {str(e)}")
255
  return None
256
 
257
+ def download_direct_audio(url, method_choice, proxy_url, proxy_username, proxy_password):
258
  """
259
  Downloads audio from a direct URL using the specified method.
260
 
261
  Args:
262
  url (str): The direct URL of the audio file.
263
  method_choice (str): The method to use for downloading.
264
+ proxy_url (str): Proxy URL if needed.
265
+ proxy_username (str): Proxy username.
266
+ proxy_password (str): Proxy password.
267
 
268
  Returns:
269
  str: Path to the downloaded audio file, or None if failed.
 
278
  }
279
  method = methods.get(method_choice, requests_method)
280
  try:
281
+ audio_file = method(url, proxy_url, proxy_username, proxy_password)
282
  if not audio_file or not os.path.exists(audio_file):
283
  error_msg = f"Failed to download direct audio from {url} using method {method_choice}"
284
  logging.error(error_msg)
 
288
  logging.error(f"Error downloading direct audio with {method_choice}: {str(e)}")
289
  return None
290
 
291
+ def requests_method(url, proxy_url, proxy_username, proxy_password):
292
  """
293
  Downloads audio using the requests library.
294
 
295
  Args:
296
  url (str): The URL of the audio file.
297
+ proxy_url (str): Proxy URL if needed.
298
+ proxy_username (str): Proxy username.
299
+ proxy_password (str): Proxy password.
300
 
301
  Returns:
302
  str: Path to the downloaded audio file, or None if failed.
303
  """
304
  try:
305
+ proxies = None
306
+ auth = None
307
+ if proxy_url and len(proxy_url.strip()) > 0:
308
+ proxies = {
309
+ "http": proxy_url,
310
+ "https": proxy_url
311
+ }
312
+ if proxy_username and proxy_password:
313
+ auth = (proxy_username, proxy_password)
314
+ response = requests.get(url, stream=True, proxies=proxies, auth=auth)
315
  if response.status_code == 200:
316
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
317
  for chunk in response.iter_content(chunk_size=8192):
 
326
  logging.error(f"Error in requests_method: {str(e)}")
327
  return None
328
 
329
+ def wget_method(url, proxy_url, proxy_username, proxy_password):
330
  """
331
  Downloads audio using the wget command-line tool.
332
 
333
  Args:
334
  url (str): The URL of the audio file.
335
+ proxy_url (str): Proxy URL if needed.
336
+ proxy_username (str): Proxy username.
337
+ proxy_password (str): Proxy password.
338
 
339
  Returns:
340
  str: Path to the downloaded audio file, or None if failed.
 
342
  logging.info("Using wget method")
343
  output_file = tempfile.mktemp(suffix='.mp3')
344
  command = ['wget', '-O', output_file, url]
345
+ env = os.environ.copy()
346
+ if proxy_url and len(proxy_url.strip()) > 0:
347
+ env['http_proxy'] = proxy_url
348
+ env['https_proxy'] = proxy_url
349
  try:
350
+ subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
351
  logging.info(f"Downloaded audio to: {output_file}")
352
  return output_file
353
  except subprocess.CalledProcessError as e:
 
357
  logging.error(f"Error in wget_method: {str(e)}")
358
  return None
359
 
360
+ def yt_dlp_direct_method(url, proxy_url, proxy_username, proxy_password):
361
  """
362
  Downloads audio using yt-dlp (supports various protocols and sites).
363
 
364
  Args:
365
  url (str): The URL of the audio or webpage containing audio.
366
+ proxy_url (str): Proxy URL if needed.
367
+ proxy_username (str): Proxy username.
368
+ proxy_password (str): Proxy password.
369
 
370
  Returns:
371
  str: Path to the downloaded audio file, or None if failed.
 
383
  'preferredquality': '192',
384
  }],
385
  }
386
+ if proxy_url and len(proxy_url.strip()) > 0:
387
+ ydl_opts['proxy'] = proxy_url
388
  try:
389
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
390
  ydl.download([url])
 
394
  logging.error(f"Error in yt_dlp_direct_method: {str(e)}")
395
  return None
396
 
397
+ def ffmpeg_method(url, proxy_url, proxy_username, proxy_password):
398
  """
399
  Downloads audio using FFmpeg.
400
 
401
  Args:
402
  url (str): The URL of the audio file.
403
+ proxy_url (str): Proxy URL if needed.
404
+ proxy_username (str): Proxy username.
405
+ proxy_password (str): Proxy password.
406
 
407
  Returns:
408
  str: Path to the downloaded audio file, or None if failed.
 
410
  logging.info("Using ffmpeg method")
411
  output_file = tempfile.mktemp(suffix='.mp3')
412
  command = ['ffmpeg', '-i', url, '-vn', '-acodec', 'libmp3lame', '-q:a', '2', output_file]
413
+ env = os.environ.copy()
414
+ if proxy_url and len(proxy_url.strip()) > 0:
415
+ env['http_proxy'] = proxy_url
416
+ env['https_proxy'] = proxy_url
417
  try:
418
+ subprocess.run(command, check=True, capture_output=True, text=True, env=env)
419
  logging.info(f"Downloaded and converted audio to: {output_file}")
420
  return output_file
421
  except subprocess.CalledProcessError as e:
 
425
  logging.error(f"Error in ffmpeg_method: {str(e)}")
426
  return None
427
 
428
+ def aria2_method(url, proxy_url, proxy_username, proxy_password):
429
  """
430
  Downloads audio using aria2.
431
 
432
  Args:
433
  url (str): The URL of the audio file.
434
+ proxy_url (str): Proxy URL if needed.
435
+ proxy_username (str): Proxy username.
436
+ proxy_password (str): Proxy password.
437
 
438
  Returns:
439
  str: Path to the downloaded audio file, or None if failed.
 
441
  logging.info("Using aria2 method")
442
  output_file = tempfile.mktemp(suffix='.mp3')
443
  command = ['aria2c', '--split=4', '--max-connection-per-server=4', '--out', output_file, url]
444
+ if proxy_url and len(proxy_url.strip()) > 0:
445
+ command.extend(['--all-proxy', proxy_url])
446
  try:
447
  subprocess.run(command, check=True, capture_output=True, text=True)
448
  logging.info(f"Downloaded audio to: {output_file}")
 
466
  Returns:
467
  str: Path to the trimmed audio file.
468
 
469
+ Raises:
470
+ gr.Error: If invalid start or end times are provided.
471
  """
472
  try:
473
  logging.info(f"Trimming audio from {start_time} to {end_time}")
 
528
  # Dictionary to store loaded models
529
  loaded_models = {}
530
 
531
+ def transcribe_audio(audio_upload, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False, include_timecodes=False):
532
  """
533
  Transcribes audio from a given source using the specified pipeline and model.
534
 
535
  Args:
536
+ audio_upload (file): Uploaded audio file.
537
+ audio_url (str): URL of audio.
538
+ proxy_url (str): Proxy URL if needed.
539
+ proxy_username (str): Proxy username.
540
+ proxy_password (str): Proxy password.
541
  pipeline_type (str): Type of pipeline to use ('faster-batched', 'faster-sequenced', or 'transformers').
542
  model_id (str): The ID of the model to use.
543
  dtype (str): Data type for model computations ('int8', 'float16', or 'float32').
 
546
  start_time (float, optional): Start time in seconds for trimming audio.
547
  end_time (float, optional): End time in seconds for trimming audio.
548
  verbose (bool, optional): Whether to output verbose logging.
549
+ include_timecodes (bool, optional): Whether to include timecodes in the transcription.
550
 
551
  Yields:
552
  Tuple[str, str, str or None]: Metrics and messages, transcription text, path to transcription file.
 
563
  if verbose:
564
  yield verbose_messages, "", None
565
 
566
+ # Determine the audio source
567
  audio_path = None
568
  is_temp_file = False
569
 
570
+ if audio_upload is not None:
571
+ if isinstance(audio_upload, dict) and 'name' in audio_upload:
572
+ # audio_upload is a dict with file info
573
+ audio_path = audio_upload['name']
574
+ is_temp_file = False
575
+ elif isinstance(audio_upload, str) and os.path.exists(audio_upload):
576
+ audio_path = audio_upload
 
 
 
 
 
577
  is_temp_file = False
578
+ elif audio_url is not None and len(audio_url.strip()) > 0:
579
+ # audio_url is provided
580
+ audio_path, is_temp_file = download_audio(audio_url, download_method, proxy_url, proxy_username, proxy_password)
581
+ if not audio_path:
582
+ error_msg = f"Error downloading audio from {audio_url} using method {download_method}. Check logs for details."
583
+ logging.error(error_msg)
584
+ yield verbose_messages + error_msg, "", None
585
+ return
586
  else:
587
+ error_msg = "No audio source provided. Please upload an audio file or enter a URL."
588
  logging.error(error_msg)
589
  yield verbose_messages + error_msg, "", None
590
  return
 
669
 
670
  for segment in segments:
671
  if pipeline_type in ["faster-batched", "faster-sequenced"]:
672
+ if include_timecodes:
673
+ transcription_segment = f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}\n"
674
+ else:
675
+ transcription_segment = f"{segment.text}\n"
676
  else:
677
+ if include_timecodes:
678
+ transcription_segment = f"[{segment['timestamp'][0]:.2f}s -> {segment['timestamp'][1]:.2f}s] {segment['text']}\n"
679
+ else:
680
+ transcription_segment = f"{segment['text']}\n"
681
  transcription += transcription_segment
682
  if verbose:
683
  yield verbose_messages + metrics_output, transcription, None
 
701
  gr.Markdown("Transcribe audio using multiple pipelines and (Faster) Whisper models.")
702
 
703
  with gr.Row():
704
+ audio_upload = gr.Audio(label="Upload or Record Audio", source="upload")
705
+ audio_url = gr.Textbox(label="Or Enter URL of audio file or YouTube link")
706
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
707
  with gr.Row():
708
+ proxy_url = gr.Textbox(label="Proxy URL", placeholder="Enter proxy URL if needed", value="", lines=1)
709
+ proxy_username = gr.Textbox(label="Proxy Username", placeholder="Proxy username (optional)", value="", lines=1)
710
+ proxy_password = gr.Textbox(label="Proxy Password", placeholder="Proxy password (optional)", value="", lines=1, type="password")
711
 
712
  transcribe_button = gr.Button("Transcribe")
713
+
714
+ with gr.Accordion("Advanced Options", open=False):
715
+ with gr.Row():
716
+ pipeline_type = gr.Dropdown(
717
+ choices=["faster-batched", "faster-sequenced", "transformers"],
718
+ label="Pipeline Type",
719
+ value="faster-batched"
720
+ )
721
+ model_id = gr.Dropdown(
722
+ label="Model",
723
+ choices=get_model_options("faster-batched"),
724
+ value="cstr/whisper-large-v3-turbo-int8_float32"
725
+ )
726
+
727
+ with gr.Row():
728
+ dtype = gr.Dropdown(choices=["int8", "float16", "float32"], label="Data Type", value="int8")
729
+ batch_size = gr.Slider(minimum=1, maximum=32, step=1, value=16, label="Batch Size")
730
+ download_method = gr.Dropdown(
731
+ choices=["yt-dlp", "pytube", "youtube-dl", "yt-dlp-alt", "ffmpeg", "aria2", "wget"],
732
+ label="Download Method",
733
+ value="yt-dlp"
734
+ )
735
+
736
+ with gr.Row():
737
+ start_time = gr.Number(label="Start Time (seconds)", value=None, minimum=0)
738
+ end_time = gr.Number(label="End Time (seconds)", value=None, minimum=0)
739
+ verbose = gr.Checkbox(label="Verbose Output", value=False)
740
+ include_timecodes = gr.Checkbox(label="Include timecodes in transcription", value=False)
741
+
742
  with gr.Row():
743
  metrics_output = gr.Textbox(label="Transcription Metrics and Verbose Messages", lines=10)
744
  transcription_output = gr.Textbox(label="Transcription", lines=10)
745
  transcription_file = gr.File(label="Download Transcription")
746
+
747
  def update_model_dropdown(pipeline_type):
748
  """
749
  Updates the model dropdown choices based on the selected pipeline type.
 
765
  logging.error(f"Error in update_model_dropdown: {str(e)}")
766
  return gr.update(choices=["Error"], value="Error", visible=True)
767
 
768
+ # Event handler for pipeline_type change
769
  pipeline_type.change(update_model_dropdown, inputs=[pipeline_type], outputs=[model_id])
770
 
771
  def transcribe_with_progress(*args):
 
774
 
775
  transcribe_button.click(
776
  transcribe_with_progress,
777
+ inputs=[audio_upload, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose, include_timecodes],
778
  outputs=[metrics_output, transcription_output, transcription_file]
779
  )
780
 
781
  gr.Examples(
782
  examples=[
783
+ [None, "https://www.youtube.com/watch?v=daQ_hqA6HDo", "", "", "", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", None, None, False, False],
784
+ [None, "https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453.mp3", "", "", "", "faster-sequenced", "SYSTRAN/faster-whisper-large-v1", "float16", 1, "ffmpeg", 0, 300, False, False],
 
785
  ],
786
+ inputs=[audio_upload, audio_url, proxy_url, proxy_username, proxy_password, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose, include_timecodes],
787
  )
788
 
789
  iface.launch(share=False, debug=True)