File size: 2,137 Bytes
30f32d0
 
 
 
ed060a2
 
6d84a1e
d13cfa6
42031f0
 
 
 
 
d78118d
30f32d0
d78118d
30f32d0
d78118d
 
9807d0a
 
 
 
 
d78118d
9807d0a
c67e62e
1654da2
9807d0a
 
 
 
2ac0421
84101d2
2693dff
84101d2
30f32d0
 
 
 
2ac0421
30f32d0
e8af28e
30f32d0
830b36d
 
7c7fe0a
30f32d0
 
 
9807d0a
d78118d
 
 
 
 
76c66b1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import gradio as gr

from fastai.vision.all import *

from fastaudio.core.all import *

matplotlib.rcParams['figure.dpi'] = 300

def get_x(df):
    return df.path
def get_y(df):
    return df.pattern
   
learn_removeSilence = load_learner('xresnet50_pitch3_removeSilence.pkl')

learn_plain = load_learner('xresnet50_pitch3.pkl')

labels = learn_removeSilence.dls.vocab

#def process(Record, Upload, version):
 #   if version == 'remove silence':
  #      return predict(Record, Upload, learn_removeSilence)
   # elif version == 'plain':
    #    return predict(Record, Upload, learn_plain)

def predict(Record, Upload, version):
    if Upload: path = Upload
    else: path = Record
    if version == 'remove silence':
        spec,pred,pred_idx,probs = learn_removeSilence.predict(str(path), with_input=True)
    elif version == 'plain':
        spec,pred,pred_idx,probs = learn_plain.predict(str(path), with_input=True)
    fig,ax = plt.subplots(figsize=(16,10))
    show_image(spec, ax=ax)
    ax.invert_yaxis()
    return [{labels[i]: float(probs[i]) for i in range(len(labels))}, fig]


title = "Japanese Pitch Accent Pattern Detector"

description = "This model will predict the pitch accent pattern of a word based on the recording of its pronunciation."

article="<p style='text-align: center'><a href='https://mizoru.github.io/blog/2021/12/25/Japanese-pitch.html' target='_blank'>How did I make this and what is it for?</a></p>"

ex_paths = ['代わる.mp3'],['大丈夫な.mp3'],['熱くない.mp3'], ['あめー雨.mp3'], ['あめー飴.mp3']

examples = [[path, '', 'remove silence'] for path in ex_paths]

enable_queue=True

gr.Interface(fn=predict,
inputs=[gr.inputs.Audio(source='microphone', type='filepath', optional=True),
gr.inputs.Audio(source='upload', type='filepath', optional=True),
gr.inputs.Radio(choices=['plain','remove silence'], type="value", default='remove silence', label='version')
],
outputs=  [gr.outputs.Label(num_top_classes=3), gr.outputs.Image(type="plot", label='Spectrogram')], title=title,description=description,article=article,examples=examples).launch(debug=True, enable_queue=enable_queue)