cstr commited on
Commit
89cebe2
·
verified ·
1 Parent(s): 516bec5
Files changed (1) hide show
  1. app.py +132 -46
app.py CHANGED
@@ -72,21 +72,19 @@ def download_youtube_audio(url, method_choice):
72
 
73
  Args:
74
  url (str): The YouTube URL.
75
- method_choice (str): The method to use for downloading ('yt-dlp', 'pytube', 'youtube-dl').
76
 
77
  Returns:
78
  str: Path to the downloaded audio file, or None if failed.
79
  """
80
  methods = {
81
- 'yt-dlp': youtube_dl_method,
82
  'pytube': pytube_method,
83
- 'youtube-dl': youtube_dl_classic_method,
84
- 'yt-dlp-alt': youtube_dl_alternative_method,
85
  }
86
  method = methods.get(method_choice)
87
  if method is None:
88
  logging.warning(f"Invalid download method for YouTube: {method_choice}. Defaulting to 'yt-dlp'.")
89
- method = youtube_dl_method
90
  try:
91
  logging.info(f"Attempting to download YouTube audio using {method_choice}")
92
  return method(url)
@@ -115,17 +113,64 @@ def youtube_dl_method(url):
115
  logging.error(f"Error in youtube_dl_method: {str(e)}")
116
  return None
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  def pytube_method(url):
 
 
 
 
 
 
 
 
 
119
  logging.info("Using pytube method")
120
- from pytube import YouTube
121
- yt = YouTube(url)
122
- audio_stream = yt.streams.filter(only_audio=True).first()
123
- out_file = audio_stream.download()
124
- base, ext = os.path.splitext(out_file)
125
- new_file = base + '.mp3'
126
- os.rename(out_file, new_file)
127
- logging.info(f"Downloaded and converted audio to: {new_file}")
128
- return new_file
 
 
 
 
129
 
130
  def youtube_dl_classic_method(url):
131
  logging.info("Using youtube-dl classic method")
@@ -179,6 +224,32 @@ def aria2_method(url):
179
  logging.info(f"Downloaded audio to: {output_file}")
180
  return output_file
181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  def download_direct_audio(url, method_choice):
183
  """
184
  Downloads audio from a direct URL using the specified method.
@@ -191,35 +262,44 @@ def download_direct_audio(url, method_choice):
191
  str: Path to the downloaded audio file, or None if failed.
192
  """
193
  logging.info(f"Downloading direct audio from: {url} using method: {method_choice}")
194
- if method_choice == 'wget':
195
- return wget_method(url)
196
- else:
197
- try:
198
- response = requests.get(url, stream=True)
199
- if response.status_code == 200:
200
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
201
- for chunk in response.iter_content(chunk_size=8192):
202
- if chunk:
203
- temp_file.write(chunk)
204
- logging.info(f"Downloaded direct audio to: {temp_file.name}")
205
- return temp_file.name
206
- else:
207
- raise Exception(f"Failed to download audio from {url} with status code {response.status_code}")
208
- except Exception as e:
209
- logging.error(f"Error downloading direct audio: {str(e)}")
210
- return None
211
 
212
  def wget_method(url):
 
 
 
 
 
 
 
 
 
213
  logging.info("Using wget method")
214
- output_file = tempfile.mktemp(suffix='.mp3')
215
  command = ['wget', '-O', output_file, url]
216
- subprocess.run(command, check=True, capture_output=True)
217
- logging.info(f"Downloaded audio to: {output_file}")
218
- return output_file
 
 
 
 
219
 
220
  def trim_audio(audio_path, start_time, end_time):
221
  """
222
- Trims an audio file to the specified start and end times.
223
 
224
  Args:
225
  audio_path (str): Path to the audio file.
@@ -230,7 +310,7 @@ def trim_audio(audio_path, start_time, end_time):
230
  str: Path to the trimmed audio file.
231
 
232
  Raises:
233
- gr.Error: If invalid start or end times are provided.
234
  """
235
  try:
236
  logging.info(f"Trimming audio from {start_time} to {end_time}")
@@ -256,6 +336,9 @@ def trim_audio(audio_path, start_time, end_time):
256
  trimmed_audio.export(temp_audio_file.name, format="wav")
257
  logging.info(f"Trimmed audio saved to: {temp_audio_file.name}")
258
  return temp_audio_file.name
 
 
 
259
  except Exception as e:
260
  logging.error(f"Error trimming audio: {str(e)}")
261
  raise gr.Error(f"Error trimming audio: {str(e)}")
@@ -319,7 +402,7 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
319
  logging.getLogger().setLevel(logging.INFO)
320
  else:
321
  logging.getLogger().setLevel(logging.WARNING)
322
-
323
  logging.info(f"Transcription parameters: pipeline_type={pipeline_type}, model_id={model_id}, dtype={dtype}, batch_size={batch_size}, download_method={download_method}")
324
  verbose_messages = f"Starting transcription with parameters:\nPipeline Type: {pipeline_type}\nModel ID: {model_id}\nData Type: {dtype}\nBatch Size: {batch_size}\nDownload Method: {download_method}\n"
325
 
@@ -371,21 +454,26 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
371
  elif pipeline_type == "faster-sequenced":
372
  model_or_pipeline = WhisperModel(model_id, device=device, compute_type=dtype)
373
  elif pipeline_type == "transformers":
374
- torch_dtype = torch.float16 if dtype == "float16" else torch.float32
 
 
 
 
 
 
 
375
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
376
- model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True
377
  )
378
- model.to(device)
379
  processor = AutoProcessor.from_pretrained(model_id)
380
  model_or_pipeline = pipeline(
381
- "automatic-speech-recognition",
382
  model=model,
383
  tokenizer=processor.tokenizer,
384
  feature_extractor=processor.feature_extractor,
385
  chunk_length_s=30,
386
  batch_size=batch_size,
387
  return_timestamps=True,
388
- torch_dtype=torch_dtype,
389
  device=device,
390
  )
391
  else:
@@ -432,11 +520,9 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
432
  yield f"An error occurred: {str(e)}", "", None
433
 
434
  finally:
435
- # Clean up temporary files
436
  if audio_path and is_temp_file and os.path.exists(audio_path):
437
  os.remove(audio_path)
438
- if 'transcription_file' in locals() and transcription_file and os.path.exists(transcription_file):
439
- os.remove(transcription_file)
440
 
441
  with gr.Blocks() as iface:
442
  gr.Markdown("# Multi-Pipeline Transcription")
 
72
 
73
  Args:
74
  url (str): The YouTube URL.
75
+ method_choice (str): The method to use for downloading ('yt-dlp', 'pytube').
76
 
77
  Returns:
78
  str: Path to the downloaded audio file, or None if failed.
79
  """
80
  methods = {
81
+ 'yt-dlp': yt_dlp_method,
82
  'pytube': pytube_method,
 
 
83
  }
84
  method = methods.get(method_choice)
85
  if method is None:
86
  logging.warning(f"Invalid download method for YouTube: {method_choice}. Defaulting to 'yt-dlp'.")
87
+ method = yt_dlp_method
88
  try:
89
  logging.info(f"Attempting to download YouTube audio using {method_choice}")
90
  return method(url)
 
113
  logging.error(f"Error in youtube_dl_method: {str(e)}")
114
  return None
115
 
116
+ def yt_dlp_method(url):
117
+ """
118
+ Downloads audio using yt-dlp.
119
+
120
+ Args:
121
+ url (str): The YouTube URL.
122
+
123
+ Returns:
124
+ str: Path to the downloaded audio file, or None if failed.
125
+ """
126
+ logging.info("Using yt-dlp method")
127
+ try:
128
+ ydl_opts = {
129
+ 'format': 'bestaudio/best',
130
+ 'postprocessors': [{
131
+ 'key': 'FFmpegExtractAudio',
132
+ 'preferredcodec': 'mp3',
133
+ 'preferredquality': '192',
134
+ }],
135
+ 'outtmpl': '%(id)s.%(ext)s',
136
+ 'quiet': True,
137
+ 'no_warnings': True,
138
+ }
139
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
140
+ info = ydl.extract_info(url, download=True)
141
+ output_file = ydl.prepare_filename(info)
142
+ if output_file.endswith('.webm') or output_file.endswith('.mp4'):
143
+ output_file = output_file.rsplit('.', 1)[0] + '.mp3'
144
+ logging.info(f"Downloaded YouTube audio: {output_file}")
145
+ return output_file
146
+ except Exception as e:
147
+ logging.error(f"Error in yt_dlp_method: {str(e)}")
148
+ return None
149
+
150
  def pytube_method(url):
151
+ """
152
+ Downloads audio using pytube.
153
+
154
+ Args:
155
+ url (str): The YouTube URL.
156
+
157
+ Returns:
158
+ str: Path to the downloaded audio file, or None if failed.
159
+ """
160
  logging.info("Using pytube method")
161
+ try:
162
+ from pytube import YouTube
163
+ yt = YouTube(url)
164
+ audio_stream = yt.streams.filter(only_audio=True).first()
165
+ out_file = audio_stream.download()
166
+ base, ext = os.path.splitext(out_file)
167
+ new_file = base + '.mp3'
168
+ os.rename(out_file, new_file)
169
+ logging.info(f"Downloaded and converted audio to: {new_file}")
170
+ return new_file
171
+ except Exception as e:
172
+ logging.error(f"Error in pytube_method: {str(e)}")
173
+ return None
174
 
175
  def youtube_dl_classic_method(url):
176
  logging.info("Using youtube-dl classic method")
 
224
  logging.info(f"Downloaded audio to: {output_file}")
225
  return output_file
226
 
227
+ def requests_method(url):
228
+ """
229
+ Downloads audio using the requests library.
230
+
231
+ Args:
232
+ url (str): The URL of the audio file.
233
+
234
+ Returns:
235
+ str: Path to the downloaded audio file, or None if failed.
236
+ """
237
+ try:
238
+ response = requests.get(url, stream=True)
239
+ if response.status_code == 200:
240
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
241
+ for chunk in response.iter_content(chunk_size=8192):
242
+ if chunk:
243
+ temp_file.write(chunk)
244
+ logging.info(f"Downloaded direct audio to: {temp_file.name}")
245
+ return temp_file.name
246
+ else:
247
+ logging.error(f"Failed to download audio from {url} with status code {response.status_code}")
248
+ return None
249
+ except Exception as e:
250
+ logging.error(f"Error in requests_method: {str(e)}")
251
+ return None
252
+
253
  def download_direct_audio(url, method_choice):
254
  """
255
  Downloads audio from a direct URL using the specified method.
 
262
  str: Path to the downloaded audio file, or None if failed.
263
  """
264
  logging.info(f"Downloading direct audio from: {url} using method: {method_choice}")
265
+ methods = {
266
+ 'wget': wget_method,
267
+ 'requests': requests_method,
268
+ }
269
+ method = methods.get(method_choice)
270
+ if method is None:
271
+ logging.warning(f"Invalid download method: {method_choice}. Defaulting to 'requests'.")
272
+ method = requests_method
273
+ try:
274
+ return method(url)
275
+ except Exception as e:
276
+ logging.error(f"Error downloading direct audio: {str(e)}")
277
+ return None
 
 
 
 
278
 
279
  def wget_method(url):
280
+ """
281
+ Downloads audio using the wget command-line tool.
282
+
283
+ Args:
284
+ url (str): The URL of the audio file.
285
+
286
+ Returns:
287
+ str: Path to the downloaded audio file, or None if failed.
288
+ """
289
  logging.info("Using wget method")
290
+ output_file = tempfile.mktemp(suffix='.mp3')
291
  command = ['wget', '-O', output_file, url]
292
+ try:
293
+ subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
294
+ logging.info(f"Downloaded audio to: {output_file}")
295
+ return output_file
296
+ except Exception as e:
297
+ logging.error(f"Error in wget_method: {str(e)}")
298
+ return None
299
 
300
  def trim_audio(audio_path, start_time, end_time):
301
  """
302
+ Trims an audio file to the specified start and end times using pydub.
303
 
304
  Args:
305
  audio_path (str): Path to the audio file.
 
310
  str: Path to the trimmed audio file.
311
 
312
  Raises:
313
+ gr.Error: If invalid start or end times are provided or if FFmpeg is not found.
314
  """
315
  try:
316
  logging.info(f"Trimming audio from {start_time} to {end_time}")
 
336
  trimmed_audio.export(temp_audio_file.name, format="wav")
337
  logging.info(f"Trimmed audio saved to: {temp_audio_file.name}")
338
  return temp_audio_file.name
339
+ except FileNotFoundError as e:
340
+ logging.error(f"FFmpeg not found: {str(e)}")
341
+ raise gr.Error("FFmpeg not found. Please ensure that FFmpeg is installed and in your system PATH.")
342
  except Exception as e:
343
  logging.error(f"Error trimming audio: {str(e)}")
344
  raise gr.Error(f"Error trimming audio: {str(e)}")
 
402
  logging.getLogger().setLevel(logging.INFO)
403
  else:
404
  logging.getLogger().setLevel(logging.WARNING)
405
+
406
  logging.info(f"Transcription parameters: pipeline_type={pipeline_type}, model_id={model_id}, dtype={dtype}, batch_size={batch_size}, download_method={download_method}")
407
  verbose_messages = f"Starting transcription with parameters:\nPipeline Type: {pipeline_type}\nModel ID: {model_id}\nData Type: {dtype}\nBatch Size: {batch_size}\nDownload Method: {download_method}\n"
408
 
 
454
  elif pipeline_type == "faster-sequenced":
455
  model_or_pipeline = WhisperModel(model_id, device=device, compute_type=dtype)
456
  elif pipeline_type == "transformers":
457
+ # Adjust torch_dtype based on dtype and device
458
+ if dtype == "float16" and device == "cpu":
459
+ torch_dtype = torch.float32
460
+ elif dtype == "float16":
461
+ torch_dtype = torch.float16
462
+ else:
463
+ torch_dtype = torch.float32
464
+
465
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
466
+ model_id, torch_dtype=torch_dtype
467
  )
 
468
  processor = AutoProcessor.from_pretrained(model_id)
469
  model_or_pipeline = pipeline(
470
+ "automatic-speech-recognition",
471
  model=model,
472
  tokenizer=processor.tokenizer,
473
  feature_extractor=processor.feature_extractor,
474
  chunk_length_s=30,
475
  batch_size=batch_size,
476
  return_timestamps=True,
 
477
  device=device,
478
  )
479
  else:
 
520
  yield f"An error occurred: {str(e)}", "", None
521
 
522
  finally:
523
+ # Clean up temporary audio files
524
  if audio_path and is_temp_file and os.path.exists(audio_path):
525
  os.remove(audio_path)
 
 
526
 
527
  with gr.Blocks() as iface:
528
  gr.Markdown("# Multi-Pipeline Transcription")