Spaces:

RamAnanth1
/

Youtube-to-HF-Dataset

App Files Files Community

Youtube-to-HF-Dataset / app.py

RamAnanth1's picture

Update app.py

cd717c0 almost 2 years ago

history blame contribute delete

1.62 kB

	import gradio as gr
	from dataset import TranscriptDataset
	from downloader import WhisperPP, YoutubeDownloader
	from interpreter import WhisperInterpreter

	model_size = "base"
	mode = "transcribe"
	write = False
	download_path = "tmp/"

	def dataset(url, name, token):
	ds = TranscriptDataset(name)
	data = []
	#whisper_options = dict(
	# model_size=model_size, mode=mode, write=write, number_videos=500)
	#whisperPP = WhisperPP(data,name, **whisper_options)
	#downloader = YoutubeDownloader(download_path)
	#downloader.download(url, whisperPP)
	params = dict(model_size=model_size,write=write, number_videos=500)
	overwrite = True
	ds.generate_dataset(url, download_path, overwrite, params)
	ds.upload(token)

	return "Dataset created at : " + "https://huggingface.co/datasets/"+ name

	yt_input = gr.Textbox(label = 'Youtube Link')
	name_input = gr.Textbox(label = 'Dataset Name',placeholder = "Enter in the format username/repo_name")
	token_input = gr.Textbox(label = "HF Token", placeholder="Write access token")

	repo_output = gr.Textbox(label = "Outcome")

	iface = gr.Interface(fn=dataset, inputs=[yt_input, name_input, token_input], outputs=repo_output, title="Create Transcription Dataset for Youtube using OpenAI Whisper !",
	description="Create a HuggingFace repository for Youtube Transcripts! You need to specify a write token obtained in https://hf.co/settings/token. This Space is a an experimental demo.",
	article="<p>Find your write token at <a href='https://huggingface.co/settings/token' target='_blank'>token settings</a></p>")
	iface.launch()