mlkorra commited on
Commit
da8d60d
·
1 Parent(s): b6db4e0

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -0
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import whisper
3
+ from whisper.utils import write_vtt
4
+
5
+ from pytube import YouTube
6
+ import os
7
+ import sys
8
+ import subprocess
9
+
10
+ loaded_model = whisper.load_model("base")
11
+ current_size = 'base'
12
+
13
+ def inference(link):
14
+
15
+ yt = YouTube(link)
16
+
17
+ audio_path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
18
+ print(f'audio path : {audio_path}')
19
+ video_path = yt.streams.filter(file_extension='mp4')
20
+
21
+ #options = whisper.DecodingOptions(without_timestamps=True)
22
+ options = dict(beam_size=5, best_of=5, fp16 = False)
23
+ translate_options = dict(task="inference", **options)
24
+ results = loaded_model.transcribe(audio_path,**translate_options)
25
+
26
+ output_dir = ''
27
+ path = path.split(".")[0]
28
+
29
+ with open(os.path.join(output_dir, path + ".vtt"), "w") as vtt:
30
+ write_vtt(results["segments"], file=vtt)
31
+
32
+ subtitle = path + ".vtt"
33
+ output_video = path + "_subtitled.mp4"
34
+
35
+ os.system(f"ffmpeg -i {video_path} -vf subtitles={subtitle} {output_video}")
36
+
37
+ return output_video
38
+
39
+ def change_model(size):
40
+ if size == current_size:
41
+ return
42
+ loaded_model = whisper.load_model(size)
43
+ current_size = size
44
+
45
+ def populate_metadata(link):
46
+ yt = YouTube(link)
47
+ return yt.thumbnail_url, yt.title
48
+
49
+ title="Youtube Caption Generator"
50
+ description="Generate captions of Youtube videos using OpenAI's Whisper"
51
+ block = gr.Blocks()
52
+
53
+ with block:
54
+ gr.HTML(
55
+ """
56
+ <div style="text-align: center; max-width: 500px; margin: 0 auto;">
57
+ <div>
58
+ <h1>Youtube Caption Generator</h1>
59
+ </div>
60
+ <p style="margin-bottom: 10px; font-size: 94%">
61
+ Generate captions of Youtube videos using OpenAI's Whisper
62
+ </p>
63
+ </div>
64
+ """
65
+ )
66
+ with gr.Group():
67
+ with gr.Box():
68
+ sz = gr.Dropdown(label="Model Size", choices=['base','small', 'medium', 'large'], value='base')
69
+
70
+ link = gr.Textbox(label="YouTube Link")
71
+
72
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
73
+ title = gr.Label(label="Video Title", placeholder="Title")
74
+ img = gr.Image(label="Thumbnail")
75
+
76
+ # text = gr.Textbox(
77
+ # label="Transcription",
78
+ # placeholder="Transcription Output",
79
+ # lines=5)
80
+
81
+ op_video = gr.Video()
82
+
83
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
84
+ btn = gr.Button("Generate Captions")
85
+
86
+ # Events
87
+ btn.click(inference, inputs=[link], outputs=[op_video])
88
+ link.change(populate_metadata, inputs=[link], outputs=[img, title])
89
+ sz.change(change_model, inputs=[sz], outputs=[])
90
+
91
+ block.launch(debug=True,enable_queue=True)