File size: 2,842 Bytes
83f94aa
f7ece39
65d0224
 
 
f7ece39
 
 
 
6ec5436
65d0224
f7ece39
 
 
83f94aa
 
 
6ec5436
 
83f94aa
6ec5436
83f94aa
f7ece39
 
 
6ec5436
 
 
f7ece39
83f94aa
6ec5436
83f94aa
f7ece39
 
83f94aa
f7ece39
 
83f94aa
f7ece39
83f94aa
 
f7ece39
 
 
 
83f94aa
 
 
 
 
f7ece39
 
 
 
 
 
83f94aa
f7ece39
 
 
 
 
 
 
 
 
65d0224
 
f7ece39
 
 
 
 
 
 
 
 
 
 
 
 
83f94aa
da8d60d
 
f7ece39
 
 
 
 
 
83f94aa
 
6ec5436
83f94aa
f7ece39
 
83f94aa
 
f7ece39
 
83f94aa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101

import gradio as gr
import whisper
from whisper.utils import write_vtt

from pytube import YouTube
import os
import sys
import subprocess
import re

loaded_model = whisper.load_model("base")
current_size = 'base'

def download_video(link):

    yt = YouTube(link)
    vid = yt.thumbnail_url.split('vi/')[1].split('/')[0]
    print(vid)

    return yt.streams.filter(file_extension='mp4')[0].download(filename=f"{vid}.mp4")

def inference(link):

  yt = YouTube(link)
  vid = yt.thumbnail_url.split('vi/')[1].split('/')[0]

  audio_path = yt.streams.filter(only_audio=True)[0].download(filename= f"{vid}.mp3")
  print(f'audio path :  {audio_path}')
  video_path  = download_video(link)
  
  #video_path = yt.streams.filter(file_extension='mp4')[0].download(filename='video.mp4'
  
  options = dict(beam_size=5, best_of=5, fp16 = False)
  translate_options = dict(task="translate", **options)
  results = loaded_model.transcribe(audio_path,**translate_options)
  
  output_dir = ''
  path = audio_path.split(".")[0]
  
  with open(os.path.join(output_dir, path + ".vtt"), "w") as vtt:
    write_vtt(results["segments"], file=vtt)  

  subtitle = path + ".vtt"
  output_video = path + "_subtitled.mp4"
  
  try:
    os.system(f"ffmpeg -i {video_path} -vf subtitles={subtitle} {output_video}")
  except Exception as exc:
    print(f'system Error : {exc}')
  
  return output_video
  
def change_model(size):
  if size == current_size:
    return

  loaded_model = whisper.load_model(size)
  current_size = size

def populate_metadata(link):
  yt = YouTube(link)
  return yt.thumbnail_url, yt.title

title="Youtube Caption Generator"
description="Generate captions of Youtube videos using OpenAI's Whisper"
block = gr.Blocks()

with block:
    gr.HTML(
        """
            <div style="text-align: center; max-width: 500px; margin: 0 auto;">
              <div>
                <h1>Youtube Caption Generator</h1>
              </div>
              <p style="margin-bottom: 10px; font-size: 94%">
                Generate captions of Youtube videos using OpenAI's Whisper
              </p>
            </div>
        """
    )

    with gr.Group():
        with gr.Box():
          sz = gr.Dropdown(label="Model Size", choices=['base','small', 'medium', 'large'], value='base')
          
          link = gr.Textbox(label="YouTube Link")
          
          with gr.Row().style(mobile_collapse=False, equal_height=True):
            
            input_video  = gr.Video()
            output_video = gr.Video()

          btn = gr.Button("Generate Captions")       
          
          # Events
          btn.click(inference, inputs=[link], outputs=[output_video])
          link.change(download_video,inputs=[link],outputs=[input_video])
          sz.change(change_model, inputs=[sz], outputs=[])

block.launch(debug=True,enable_queue=True)