|
import gradio as gr |
|
import subprocess,os |
|
from datasets import load_dataset, Audio |
|
import corpora |
|
import ctcalign,graph |
|
from numpy import random |
|
|
|
|
|
import matplotlib |
|
matplotlib.use('Agg') |
|
import matplotlib.pyplot as plt |
|
|
|
|
|
def setup(): |
|
r0 = subprocess.run(["pwd"], capture_output=True, text=True) |
|
print('PWD::', r0.stdout) |
|
r1 = subprocess.run(["wget", "https://github.com/google/REAPER/archive/refs/heads/master.zip"], capture_output=True, text=True) |
|
print(r1.stdout) |
|
subprocess.run(["unzip", "./master.zip"]) |
|
subprocess.run(["mv", "REAPER-master", "REAPER"]) |
|
subprocess.run(["rm", "./master.zip"]) |
|
os.chdir('./REAPER') |
|
subprocess.run(["mkdir", "build"]) |
|
os.chdir('./build') |
|
r2 = subprocess.run(["cmake", ".."], capture_output=True, text=True) |
|
print(r2.stdout) |
|
r3 = subprocess.run(["make"], capture_output=True, text=True) |
|
print(r3.stdout) |
|
|
|
os.chdir('../..') |
|
r9 = subprocess.run(["ls", "-la"], capture_output=True, text=True) |
|
print('LS::', r9.stdout) |
|
|
|
|
|
|
|
setup() |
|
|
|
def load_lang(langname): |
|
if langname=="Icelandic": |
|
df = corpora.ds_i |
|
model_path="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h" |
|
elif langname =="Faroese": |
|
df = corpora.ds_f |
|
model_path = "carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h" |
|
|
|
model_word_separator = '|' |
|
model_blank_token = '[PAD]' |
|
lang_aligner = ctcalign.aligner(model_path,model_word_separator,model_blank_token) |
|
|
|
df = df.data.to_pandas() |
|
df = df.drop(columns=['audio', 'speaker_id','duration']) |
|
return (df[:10], lang_aligner) |
|
|
|
|
|
def f1(langname,lang_aligner): |
|
if langname=="Icelandic": |
|
ds = corpora.ds_i |
|
elif langname =="Faroese": |
|
ds = corpora.ds_f |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
maxdat=len(ds) |
|
|
|
ds = ds.select([random.randint(maxdat-1)]) |
|
|
|
sound_path = ds['audio'][0]['path'] |
|
transcript = ds['normalized_text'][0] |
|
|
|
return graph.align_and_graph(sound_path,transcript,lang_aligner) |
|
|
|
|
|
bl = gr.Blocks() |
|
|
|
with bl: |
|
|
|
lloadr = gr.Dropdown(["Faroese", "Icelandic"], label="Select a language") |
|
|
|
align_func = gr.State() |
|
|
|
with gr.Row(): |
|
|
|
databrowser = gr.DataFrame(wrap=True, max_rows=50, interactive=False, overflow_row_behaviour='paginate') |
|
|
|
|
|
|
|
btn1 = gr.Button(value="The random prosody button") |
|
btn1.style(full_width=False, size="sm") |
|
|
|
pl1 = gr.Plot() |
|
|
|
btn1.click(f1, [lloadr,align_func], pl1) |
|
|
|
|
|
|
|
|
|
lloadr.change(load_lang,lloadr,[databrowser,align_func]) |
|
|
|
|
|
gr.Markdown( |
|
""" |
|
# ABOUT |
|
This is a work-in-progress demo. |
|
|
|
Icelandic uses the [samromur-asr](https://huggingface.co/datasets/language-and-voice-lab/samromur_asr) corpus, and Faroese uses [ravnursson-asr](https://huggingface.co/datasets/carlosdanielhernandezmena/ravnursson_asr). |
|
|
|
After you select a language, a few example sentences from the corpus are displayed. |
|
|
|
Click the button to view time-aligned prosody information for a random sentence - this could be any sentence, not only one of the ones shown above. |
|
|
|
[ABOUT REAPER PITCH TRACKING - TODO] |
|
|
|
[ABOUT RMSE INTENSITY - TODO] |
|
|
|
[ABOUT CTC ALIGNMENT - TODO] |
|
|
|
caitlinr@ru.is / https://github.com/catiR/ |
|
""" |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
bl.launch() |