Files changed (1) hide show
  1. app.py +15 -7
app.py CHANGED
@@ -1,14 +1,16 @@
1
  import gradio as gr
2
  import whisper
 
3
  from pytube import YouTube
4
 
5
  loaded_model = whisper.load_model("base")
6
  current_size = 'base'
7
- def inference(link):
 
 
8
  yt = YouTube(link)
9
  path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
10
- options = whisper.DecodingOptions(without_timestamps=True)
11
- results = loaded_model.transcribe(path)
12
  return results['text']
13
 
14
  def change_model(size):
@@ -41,9 +43,15 @@ with block:
41
  with gr.Group():
42
  with gr.Box():
43
  sz = gr.Dropdown(label="Model Size", choices=['base','small', 'medium', 'large'], value='base')
44
-
45
- link = gr.Textbox(label="YouTube Link")
46
-
 
 
 
 
 
 
47
  with gr.Row().style(mobile_collapse=False, equal_height=True):
48
  title = gr.Label(label="Video Title", placeholder="Title")
49
  img = gr.Image(label="Thumbnail")
@@ -55,7 +63,7 @@ with block:
55
  btn = gr.Button("Transcribe")
56
 
57
  # Events
58
- btn.click(inference, inputs=[link], outputs=[text])
59
  link.change(populate_metadata, inputs=[link], outputs=[img, title])
60
  sz.change(change_model, inputs=[sz], outputs=[])
61
 
 
1
  import gradio as gr
2
  import whisper
3
+ from whisper import tokenizer
4
  from pytube import YouTube
5
 
6
  loaded_model = whisper.load_model("base")
7
  current_size = 'base'
8
+ AUTO_DETECT_LANG = "Auto Detect"
9
+
10
+ def inference(link,language):
11
  yt = YouTube(link)
12
  path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
13
+ results = loaded_model.transcribe(path,without_timestamps=True,language=language)
 
14
  return results['text']
15
 
16
  def change_model(size):
 
43
  with gr.Group():
44
  with gr.Box():
45
  sz = gr.Dropdown(label="Model Size", choices=['base','small', 'medium', 'large'], value='base')
46
+ with gr.Row(mobile_collaps=False,equal_height=True):
47
+ link = gr.Textbox(label="YouTube Link")
48
+ available_languages = sorted(tokenizer.TO_LANGUAGE_CODE.keys())
49
+ available_languages = [AUTO_DETECT_LANG]+available_languages
50
+ language = gr.Dropdown(label="Language",choices=available_languages,value=AUTO_DETECT_LANG)
51
+
52
+ if language==AUTO_DETECT_LANG:
53
+ language=None
54
+
55
  with gr.Row().style(mobile_collapse=False, equal_height=True):
56
  title = gr.Label(label="Video Title", placeholder="Title")
57
  img = gr.Image(label="Thumbnail")
 
63
  btn = gr.Button("Transcribe")
64
 
65
  # Events
66
+ btn.click(inference, inputs=[link,language], outputs=[text])
67
  link.change(populate_metadata, inputs=[link], outputs=[img, title])
68
  sz.change(change_model, inputs=[sz], outputs=[])
69