Spaces:

clement-pages
/

gryannote

Running on Zero

App Files Files Community

gryannote / app.py

clement-pages

update app

5e6b18d 2 months ago

raw

history blame contribute delete

3.1 kB

	import spaces
	import gradio as gr
	from gryannote_audio import AudioLabeling
	from gryannote_rttm import RTTM
	from pyannote.audio import Pipeline
	import os
	import torch

	@spaces.GPU(duration=120)
	def apply_pipeline(audio):
	"""Apply specified pipeline on the indicated audio file"""
	pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=os.environ["HF_TOKEN"])
	pipeline.to(torch.device("cuda"))
	annotations = pipeline(audio)

	return ((audio, annotations), annotations)


	def update_annotations(data):
	return rttm.on_edit(data)


	with gr.Blocks() as demo:
	with gr.Row():
	with gr.Column():

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown(
	'<a href="https://github.com/clement-pages/gryannote"><img src="https://github.com/clement-pages/gryannote/blob/main/docs/assets/logo-gryannote.png?raw=true" alt="gryannote logo" width="140"/></a>',
	)
	with gr.Column(scale=10):
	gr.Markdown('<h1 style="font-size: 4em;">gryannote</h1>')
	gr.Markdown()
	gr.Markdown('<h2 style="font-size: 2em;">Make the audio labeling process easier and faster! </h2>')

	with gr.Tab("application"):
	gr.Markdown(
	"To use the component, start by loading or recording audio."
	"Then apply the diarization pipeline (here [pyannote/speaker-diarization-3.1](https://huggingface.co/pyannote/speaker-diarization-3.1))"
	"or double-click directly on the waveform to add an annotations. The annotations produced can be edited."
	" You can also use keyboard shortcuts to speed things up! Click on the help button to see all the available shortcuts."
	" Finally, annotations can be saved by cliking on the downloading button in the RTTM component."
	)
	gr.Markdown()
	gr.Markdown()
	audio_labeling = AudioLabeling(
	type="filepath",
	interactive=True,
	)

	gr.Markdown()
	gr.Markdown()

	run_btn = gr.Button("Run pipeline")

	rttm = RTTM()

	with gr.Tab("poster"):
	gr.Markdown(
	'<p align="center"><img src="https://github.com/clement-pages/gryannote/blob/main/docs/assets/poster-interspeech.jpg?raw=true" alt="gryannote poster" width=700em/></p>'
	)

	run_btn.click(
	fn=apply_pipeline,
	inputs=audio_labeling,
	outputs=[audio_labeling, rttm],
	)

	audio_labeling.edit(
	fn=update_annotations,
	inputs=audio_labeling,
	outputs=rttm,
	preprocess=False,
	postprocess=False,
	)

	rttm.upload(
	fn=audio_labeling.load_annotations,
	inputs=[audio_labeling, rttm],
	outputs=audio_labeling,
	)


	if __name__ == "__main__":
	demo.launch()