Spaces:

clr
/

prosalign

Running

App Files Files Community

prosalign / app.py

clr

Update app.py

42ca8af almost 2 years ago

raw

history blame

3.91 kB

	import gradio as gr
	import subprocess,os
	from datasets import load_dataset, Audio
	import corpora
	import ctcalign,graph
	from numpy import random


	import matplotlib
	matplotlib.use('Agg')
	import matplotlib.pyplot as plt


	def setup():
	r0 = subprocess.run(["pwd"], capture_output=True, text=True)
	print('PWD::', r0.stdout)
	r1 = subprocess.run(["wget", "https://github.com/google/REAPER/archive/refs/heads/master.zip"], capture_output=True, text=True)
	print(r1.stdout)
	subprocess.run(["unzip", "./master.zip"])
	subprocess.run(["mv", "REAPER-master", "REAPER"])
	subprocess.run(["rm", "./master.zip"])
	os.chdir('./REAPER')
	subprocess.run(["mkdir", "build"])
	os.chdir('./build')
	r2 = subprocess.run(["cmake", ".."], capture_output=True, text=True)
	print(r2.stdout)
	r3 = subprocess.run(["make"], capture_output=True, text=True)
	print(r3.stdout)

	os.chdir('../..')
	r9 = subprocess.run(["ls", "-la"], capture_output=True, text=True)
	print('LS::', r9.stdout)


	#print('about to setup')
	setup()

	def load_lang(langname):
	if langname=="Icelandic":
	df = corpora.ds_i
	model_path="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h"
	elif langname =="Faroese":
	df = corpora.ds_f
	model_path = "carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h"

	model_word_separator = '\|'
	model_blank_token = '[PAD]'
	lang_aligner = ctcalign.aligner(model_path,model_word_separator,model_blank_token)

	df = df.data.to_pandas()
	df = df.drop(columns=['audio', 'speaker_id','duration'])
	return (df[:10], lang_aligner) #(df, df[:50])


	def f1(langname,lang_aligner):
	if langname=="Icelandic":
	ds = corpora.ds_i
	elif langname =="Faroese":
	ds = corpora.ds_f


	#fig = plt.figure(figsize=(10,4))
	#plt.axline((0,0),slope=1,color="darkgray")
	#plt.xlabel("Vowel length (ms)")
	#plt.ylabel("Consonant length (ms)")


	maxdat=len(ds)

	ds = ds.select([random.randint(maxdat-1)])
	#print([th for th in ds.sample()])
	sound_path = ds['audio'][0]['path'] # audio 0 array is the audio data itself
	transcript = ds['normalized_text'][0]
	#print('PLACE A:',lang_aligner)
	return graph.align_and_graph(sound_path,transcript,lang_aligner)


	bl = gr.Blocks()

	with bl:

	lloadr = gr.Dropdown(["Faroese", "Icelandic"], label="Select a language")#, info="Loading the dataset takes some time")

	align_func = gr.State()#value=ctcalign.aligner(model_path="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h",model_word_separator = '\|',model_blank_token = '[PAD]'))

	with gr.Row():
	#invisidata = gr.DataFrame(interactive=False, visible=False)
	databrowser = gr.DataFrame(wrap=True, max_rows=50, interactive=False, overflow_row_behaviour='paginate')



	btn1 = gr.Button(value="The random prosody button")
	btn1.style(full_width=False, size="sm")

	pl1 = gr.Plot()

	btn1.click(f1, [lloadr,align_func], pl1)




	lloadr.change(load_lang,lloadr,[databrowser,align_func])


	gr.Markdown(
	"""
	# ABOUT
	This is a work-in-progress demo.

	Icelandic uses the [samromur-asr](https://huggingface.co/datasets/language-and-voice-lab/samromur_asr) corpus, and Faroese uses [ravnursson-asr](https://huggingface.co/datasets/carlosdanielhernandezmena/ravnursson_asr).

	After you select a language, a few example sentences from the corpus are displayed.

	Click the button to view time-aligned prosody information for a random sentence - this could be any sentence, not only one of the ones shown above.

	[ABOUT REAPER PITCH TRACKING - TODO]

	[ABOUT RMSE INTENSITY - TODO]

	[ABOUT CTC ALIGNMENT - TODO]

	caitlinr@ru.is / https://github.com/catiR/
	"""
	)


	if __name__ == "__main__":
	bl.launch()