Add support for file size limits in audio and YouTube transcription, and use yt_dlp for video downloads

#6
by alamin655 - opened
Files changed (1) hide show
  1. app.py +16 -22
app.py CHANGED
@@ -1,8 +1,8 @@
1
  import torch
2
-
3
  import gradio as gr
4
- import pytube as pt
5
  from transformers import pipeline
 
6
 
7
  MODEL_NAME = "openai/whisper-large-v2"
8
  BATCH_SIZE = 8
@@ -35,7 +35,7 @@ def transcribe(microphone, file_upload, task):
35
  elif (microphone is None) and (file_upload is None):
36
  raise gr.Error("You have to either use the microphone or upload an audio file")
37
 
38
- file_size_mb = os.stat(inputs).st_size / (1024 * 1024)
39
  if file_size_mb > FILE_LIMIT_MB:
40
  raise gr.Error(
41
  f"File size exceeds file size limit. Got file of size {file_size_mb:.2f}MB for a limit of {FILE_LIMIT_MB}MB."
@@ -59,25 +59,19 @@ def _return_yt_html_embed(yt_url):
59
  return HTML_str
60
 
61
 
62
- def yt_transcribe(yt_url, task, max_filesize=75.0):
63
- yt = pt.YouTube(yt_url)
64
- html_embed_str = _return_yt_html_embed(yt_url)
65
- for attempt in range(YT_ATTEMPT_LIMIT):
66
  try:
67
- yt = pytube.YouTube(yt_url)
68
- stream = yt.streams.filter(only_audio=True)[0]
69
- break
70
- except KeyError:
71
- if attempt + 1 == YT_ATTEMPT_LIMIT:
72
- raise gr.Error("An error occurred while loading the YouTube video. Please try again.")
73
-
74
- if stream.filesize_mb > max_filesize:
75
- raise gr.Error(f"Maximum YouTube file size is {max_filesize}MB, got {stream.filesize_mb:.2f}MB.")
76
-
77
  pipe.model.config.forced_decoder_ids = [[2, transcribe_token_id if task=="transcribe" else translate_token_id]]
78
-
79
- text = pipe("audio.mp3", batch_size=BATCH_SIZE)["text"]
80
-
81
  return html_embed_str, text
82
 
83
 
@@ -120,8 +114,8 @@ yt_transcribe = gr.Interface(
120
  allow_flagging="never",
121
  )
122
 
 
123
  with demo:
124
  gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
125
 
126
- demo.launch(enable_queue=True)
127
-
 
1
  import torch
 
2
  import gradio as gr
3
+ import yt_dlp
4
  from transformers import pipeline
5
+ import os
6
 
7
  MODEL_NAME = "openai/whisper-large-v2"
8
  BATCH_SIZE = 8
 
35
  elif (microphone is None) and (file_upload is None):
36
  raise gr.Error("You have to either use the microphone or upload an audio file")
37
 
38
+ file_size_mb = os.stat(file_upload).st_size / (1024 * 1024)
39
  if file_size_mb > FILE_LIMIT_MB:
40
  raise gr.Error(
41
  f"File size exceeds file size limit. Got file of size {file_size_mb:.2f}MB for a limit of {FILE_LIMIT_MB}MB."
 
59
  return HTML_str
60
 
61
 
62
+ def yt_transcribe(yt_url, task, max_filesize=FILE_LIMIT_MB):
63
+ with yt_dlp.YoutubeDL({'format': 'bestaudio/best'}) as ydl:
 
 
64
  try:
65
+ info_dict = ydl.extract_info(yt_url, download=True)
66
+ a = ydl.prepare_filename(info_dict)
67
+ except Exception as e:
68
+ raise gr.Error(f"Error downloading YouTube video: {str(e)}")
69
+ html_embed_str = _return_yt_html_embed(yt_url)
70
+ if os.stat(a).st_size / (1024 * 1024) > max_filesize:
71
+ raise gr.Error(f"Maximum YouTube file size is {max_filesize}MB, got {os.stat(a).st_size / (1024 * 1024):.2f}MB.")
 
 
 
72
  pipe.model.config.forced_decoder_ids = [[2, transcribe_token_id if task=="transcribe" else translate_token_id]]
73
+ text = pipe(a, batch_size=BATCH_SIZE)["text"]
74
+ os.remove(a)
 
75
  return html_embed_str, text
76
 
77
 
 
114
  allow_flagging="never",
115
  )
116
 
117
+
118
  with demo:
119
  gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
120
 
121
+ demo.launch(enable_queue=True)