|
import gradio as gr |
|
from dataset import TranscriptDataset |
|
from downloader import WhisperPP, YoutubeDownloader |
|
from interpreter import WhisperInterpreter |
|
|
|
model_size = "base" |
|
mode = "transcribe" |
|
write = False |
|
download_path = "tmp/" |
|
|
|
def dataset(url, name, token): |
|
ds = TranscriptDataset(name) |
|
data = [] |
|
|
|
|
|
|
|
|
|
|
|
params = dict(model_size=model_size,write=write, number_videos=500) |
|
overwrite = True |
|
ds.generate_dataset(url, download_path, overwrite, params) |
|
ds.upload(token) |
|
|
|
return "Dataset created at : " + "https://huggingface.co/datasets/"+ name |
|
|
|
yt_input = gr.Textbox(label = 'Youtube Link') |
|
name_input = gr.Textbox(label = 'Dataset Name',placeholder = "Enter in the format username/repo_name") |
|
token_input = gr.Textbox(label = "HF Token", placeholder="Write access token") |
|
|
|
repo_output = gr.Textbox(label = "Outcome") |
|
|
|
iface = gr.Interface(fn=dataset, inputs=[yt_input, name_input, token_input], outputs=repo_output, title="Create Transcription Dataset for Youtube using OpenAI Whisper !", |
|
description="Create a HuggingFace repository for Youtube Transcripts! You need to specify a write token obtained in https://hf.co/settings/token. This Space is a an experimental demo.", |
|
article="<p>Find your write token at <a href='https://huggingface.co/settings/token' target='_blank'>token settings</a></p>") |
|
iface.launch() |