File size: 1,618 Bytes
8219a00
768f178
 
 
8219a00
bd727b1
 
 
 
 
d6f7fe8
bd727b1
 
1569cbe
 
 
 
 
ddfe2b4
e14969e
 
d6f7fe8
e14969e
cd717c0
c82be35
bd727b1
cd717c0
 
bd727b1
 
 
d6f7fe8
 
 
8219a00
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import gradio as gr
from dataset import TranscriptDataset
from downloader import WhisperPP, YoutubeDownloader
from interpreter import WhisperInterpreter

model_size = "base"
mode = "transcribe"
write = False
download_path = "tmp/"

def dataset(url, name, token):    
    ds = TranscriptDataset(name)
    data = []
    #whisper_options = dict(
    #    model_size=model_size, mode=mode, write=write, number_videos=500)
    #whisperPP = WhisperPP(data,name, **whisper_options)
    #downloader = YoutubeDownloader(download_path)
    #downloader.download(url, whisperPP)
    params = dict(model_size=model_size,write=write, number_videos=500)
    overwrite = True
    ds.generate_dataset(url, download_path, overwrite, params)
    ds.upload(token)
  
    return "Dataset created at : " + "https://huggingface.co/datasets/"+ name 
            
yt_input = gr.Textbox(label = 'Youtube Link')
name_input = gr.Textbox(label = 'Dataset Name',placeholder = "Enter in the format username/repo_name")
token_input = gr.Textbox(label = "HF Token", placeholder="Write access token")

repo_output = gr.Textbox(label = "Outcome")

iface = gr.Interface(fn=dataset, inputs=[yt_input, name_input, token_input], outputs=repo_output, title="Create Transcription Dataset for Youtube using OpenAI Whisper !",
    description="Create a HuggingFace repository for Youtube Transcripts! You need to specify a write token obtained in https://hf.co/settings/token. This Space is a an experimental demo.",
    article="<p>Find your write token at <a href='https://huggingface.co/settings/token' target='_blank'>token settings</a></p>")
iface.launch()