Spaces:
Running
Running
alessandro trinca tornidor
commited on
Commit
·
38f204d
1
Parent(s):
183d840
feat: update driver.js guided tour
Browse files- aip_trainer/lambdas/js.py +19 -11
- app.py +43 -35
aip_trainer/lambdas/js.py
CHANGED
@@ -84,19 +84,27 @@ head_driver_tour = """
|
|
84 |
<script src="https://cdnjs.cloudflare.com/ajax/libs/driver.js/1.3.1/driver.js.iife.js" integrity="sha512-8EdV4D5VlQLX0dJFcdx6h/oJ/NanAIMlaViz57NDkhzwbQsxabgpFua0gzM4f5vdk60CfRAydhlbfbDThMfh3w==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
|
85 |
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/driver.js/1.3.1/driver.css" integrity="sha512-jRsM62XMRl33ewZ0Si7yX6ANq+ZiWwUcvPk4H2DKr417W80rPMXzbD/towhs2YEoux/dfOuVRkLB+5Tfzmfolg==" crossorigin="anonymous" referrerpolicy="no-referrer" />
|
86 |
<script type="module">
|
87 |
-
|
88 |
-
const driver0 = window.driver;
|
89 |
-
console.log("driver0:", driver0, "!#")
|
90 |
-
const driverJs = driver0.js;
|
91 |
-
console.log("driverJs:", driverJs, "!#")
|
92 |
-
const driver = driverJs.driver;
|
93 |
-
|
94 |
-
console.log("driver:", driver, "!#")
|
95 |
|
96 |
const driverSteps = [
|
97 |
-
|
98 |
-
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
]
|
101 |
const driverObj = driver({
|
102 |
showProgress: true,
|
|
|
84 |
<script src="https://cdnjs.cloudflare.com/ajax/libs/driver.js/1.3.1/driver.js.iife.js" integrity="sha512-8EdV4D5VlQLX0dJFcdx6h/oJ/NanAIMlaViz57NDkhzwbQsxabgpFua0gzM4f5vdk60CfRAydhlbfbDThMfh3w==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
|
85 |
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/driver.js/1.3.1/driver.css" integrity="sha512-jRsM62XMRl33ewZ0Si7yX6ANq+ZiWwUcvPk4H2DKr417W80rPMXzbD/towhs2YEoux/dfOuVRkLB+5Tfzmfolg==" crossorigin="anonymous" referrerpolicy="no-referrer" />
|
86 |
<script type="module">
|
87 |
+
const driver = window.driver.js.driver;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
const driverSteps = [
|
90 |
+
{ element: "id-ai-pronunciation-trainer-gradio-app-container", popover: { title: "AI Pronunciation Trainer Gradio app", description: "A quick tour of the features of the Gradio app 'AI Pronunciation Trainer'." } },
|
91 |
+
{ element: "#radio-language-id-element", popover: { title: "Languages", description: "Choose the input language." } },
|
92 |
+
{ element: "#radio-difficulty-id-element", popover: { title: "Difficulty", description: "Choose the difficulty for the random selection of the sentence to be used as input for speech recognition." } },
|
93 |
+
{ element: "#btn-random-phrase-id-element", popover: { title: "Choose a random phrase", description: "Choose a random sentence to be used as input for speech recognition." } }
|
94 |
+
{ element: "#text-student-transcription-id-element", popover: { title: "Phrase to read for speech recognition", description: "Enter the sentence to be used as input for speech recognition." } },
|
95 |
+
{ element: "#audio-tts-id-element", popover: { title: "Audio TTS", description: "Speech-to-text audio output." } },
|
96 |
+
{ element: "#btn-run-tts-id-element", popover: { title: "In-browser Text-to-Speech", description: "Execute the text-to-speech functionality in the browser by reading the student's transcription." } },
|
97 |
+
{ element: "#btn-run-tts-backend-id-element", popover: { title: "Backend Text-to-Speech", description: "Execute the text-to-speech functionality in the backend by reading the student's transcription." } },
|
98 |
+
{ element: "#btn-clear-tts-backend-id-element", popover: { title: "Clear Text-to-Speech", description: "Clear the synthetic audio output of the text-to-speech synthesis." } },
|
99 |
+
{ element: "#audio-student-recording-stt-id-element", popover: { title: "Speech-to-Text audio output", description: "Recorded audio output of the speech recognition." } },
|
100 |
+
{ element: "#text-student-recording-ipa-id-element", popover: { title: "Student phonetic transcription", description: "Phonetic transcription of the student's speech." } },
|
101 |
+
{ element: "#text-ideal-ipa-id-element", popover: { title: "'Ideal' phonetic transcription", description: "'Ideal' phonetic transcription." } },
|
102 |
+
{ element: ".speech-output-group", popover: { title: "Detailed speech accuracy output", description: "Detailed output of speech accuracy, word by word." } },
|
103 |
+
{ element: "#number-pronunciation-accuracy-id-element", popover: { title: "Current accuracy", description: "Current speech accuracy." } },
|
104 |
+
{ element: "#number-score-de-id-element", popover: { title: "Global German accuracy", description: "Total speech accuracy in German." } },
|
105 |
+
{ element: "#number-score-en-id-element", popover: { title: "Global English accuracy", description: "Total speech accuracy in English." } },
|
106 |
+
{ element: "#btn-recognize-speech-accuracy-id-element", popover: { title: "Run speech accuracy recognition", description: "Execute the speech accuracy recognition." } },
|
107 |
+
{ element: "#accordion-examples-id-element", popover: { title: "Text examples", description: "Some text examples to be used as input for speech recognition." } },
|
108 |
]
|
109 |
const driverObj = driver({
|
110 |
showProgress: true,
|
app.py
CHANGED
@@ -6,8 +6,11 @@ from aip_trainer.lambdas import js, lambdaGetSample, lambdaSpeechToScore, lambda
|
|
6 |
|
7 |
|
8 |
css = """
|
9 |
-
.speech-output-label p {color: grey;}
|
10 |
-
.
|
|
|
|
|
|
|
11 |
"""
|
12 |
|
13 |
|
@@ -31,7 +34,7 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
|
|
31 |
with gr.Column(scale=4, min_width=300):
|
32 |
with gr.Row():
|
33 |
with gr.Column(scale=2, min_width=80):
|
34 |
-
radio_language = gr.Radio(["de", "en"], label="Language", value="en")
|
35 |
with gr.Column(scale=5, min_width=160):
|
36 |
radio_difficulty = gr.Radio(
|
37 |
label="Difficulty",
|
@@ -42,29 +45,32 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
|
|
42 |
("medium", 2),
|
43 |
("hard", 3),
|
44 |
],
|
|
|
45 |
)
|
46 |
with gr.Column(scale=1, min_width=100):
|
47 |
-
btn_random_phrase = gr.Button(value="Choose a random phrase")
|
48 |
with gr.Row():
|
49 |
with gr.Column(scale=7, min_width=300):
|
50 |
-
|
51 |
lines=3,
|
52 |
-
label="
|
53 |
value="Hi there, how are you?",
|
|
|
54 |
)
|
55 |
with gr.Row():
|
56 |
-
audio_tts = gr.Audio(label="Audio TTS")
|
57 |
with gr.Row():
|
58 |
btn_run_tts = gr.Button(value="TTS in browser", elem_id="btn-run-tts-id-element")
|
59 |
btn_run_tts_backend = gr.Button(value="TTS backend", elem_id="btn-run-tts-backend-id-element")
|
60 |
-
btn_clear_tts = gr.Button(value="Clear TTS backend")
|
61 |
btn_clear_tts.click(clear, inputs=[], outputs=[audio_tts])
|
62 |
with gr.Row():
|
63 |
-
|
64 |
-
label="
|
65 |
sources=["microphone", "upload"],
|
66 |
type="filepath",
|
67 |
show_download_button=True,
|
|
|
68 |
)
|
69 |
with gr.Column(scale=4, min_width=320):
|
70 |
text_transcribed_hidden = gr.Textbox(
|
@@ -76,35 +82,36 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
|
|
76 |
visible=False,
|
77 |
)
|
78 |
text_recording_ipa = gr.Textbox(
|
79 |
-
placeholder=None, label="
|
80 |
)
|
81 |
text_ideal_ipa = gr.Textbox(
|
82 |
-
placeholder=None, label="Ideal phonetic transcription"
|
83 |
)
|
84 |
text_raw_json_output_hidden = gr.Textbox(placeholder=None, label="text_raw_json_output_hidden", visible=False)
|
85 |
-
gr.
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
|
|
96 |
with gr.Row():
|
97 |
-
gr.Markdown("### Speech accuracy score (%)", elem_classes="speech-accuracy-score-container row1")
|
98 |
with gr.Row():
|
99 |
with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col1"):
|
100 |
-
number_pronunciation_accuracy = gr.Number(label="Current score")
|
101 |
with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col2"):
|
102 |
-
number_score_de = gr.Number(label="Global score DE", value=0, interactive=False)
|
103 |
with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col3"):
|
104 |
-
number_score_en = gr.Number(label="Global score EN", value=0, interactive=False)
|
105 |
with gr.Row():
|
106 |
-
btn = gr.Button(value="Recognize speech accuracy")
|
107 |
-
with gr.Accordion("Click here to expand the table examples", open=
|
108 |
examples_text = gr.Examples(
|
109 |
examples=[
|
110 |
["Hallo, wie geht es dir?", "de", 1],
|
@@ -114,7 +121,8 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
|
|
114 |
["Die König-Ludwig-Eiche ist ein Naturdenkmal im Staatsbad Brückenau, einem Ortsteil des drei Kilometer nordöstlich gelegenen Bad Brückenau im Landkreis Bad Kissingen in Bayern.", "de", 3],
|
115 |
["Some machine learning models are designed to understand and generate human-like text based on the input they receive.", "en", 3],
|
116 |
],
|
117 |
-
inputs=[
|
|
|
118 |
)
|
119 |
|
120 |
def get_updated_score_by_language(text: str, audio_rec: str | Path, lang: str, score_de: float, score_en: float):
|
@@ -145,7 +153,7 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
|
|
145 |
|
146 |
btn.click(
|
147 |
get_updated_score_by_language,
|
148 |
-
inputs=[
|
149 |
outputs=[
|
150 |
text_transcribed_hidden,
|
151 |
text_letter_correctness,
|
@@ -156,21 +164,21 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
|
|
156 |
number_score_de, number_score_en
|
157 |
],
|
158 |
)
|
159 |
-
btn_run_tts.click(fn=None, inputs=[
|
160 |
btn_run_tts_backend.click(
|
161 |
fn=lambdaTTS.get_tts,
|
162 |
-
inputs=[
|
163 |
outputs=audio_tts,
|
164 |
)
|
165 |
btn_random_phrase.click(
|
166 |
lambdaGetSample.get_random_selection,
|
167 |
inputs=[radio_language, radio_difficulty],
|
168 |
-
outputs=[
|
169 |
)
|
170 |
btn_random_phrase.click(
|
171 |
clear2,
|
172 |
inputs=[],
|
173 |
-
outputs=[
|
174 |
)
|
175 |
html_output.change(
|
176 |
None,
|
|
|
6 |
|
7 |
|
8 |
css = """
|
9 |
+
.speech-output-label p {color: grey; margin-bottom: white;}
|
10 |
+
.background-white {background-color: white !important; }
|
11 |
+
.speech-output-group {padding: 12px;}
|
12 |
+
.speech-output-container {min-height: 60px;}
|
13 |
+
.speech-output-html {text-align: left; }
|
14 |
"""
|
15 |
|
16 |
|
|
|
34 |
with gr.Column(scale=4, min_width=300):
|
35 |
with gr.Row():
|
36 |
with gr.Column(scale=2, min_width=80):
|
37 |
+
radio_language = gr.Radio(["de", "en"], label="Language", value="en", elem_id="radio-language-id-element")
|
38 |
with gr.Column(scale=5, min_width=160):
|
39 |
radio_difficulty = gr.Radio(
|
40 |
label="Difficulty",
|
|
|
45 |
("medium", 2),
|
46 |
("hard", 3),
|
47 |
],
|
48 |
+
elem_id="radio-difficulty-id-element",
|
49 |
)
|
50 |
with gr.Column(scale=1, min_width=100):
|
51 |
+
btn_random_phrase = gr.Button(value="Choose a random phrase", elem_id="btn-random-phrase-id-element")
|
52 |
with gr.Row():
|
53 |
with gr.Column(scale=7, min_width=300):
|
54 |
+
text_student_transcription = gr.Textbox(
|
55 |
lines=3,
|
56 |
+
label="Phrase to read for speech recognition",
|
57 |
value="Hi there, how are you?",
|
58 |
+
elem_id="text-student-transcription-id-element",
|
59 |
)
|
60 |
with gr.Row():
|
61 |
+
audio_tts = gr.Audio(label="Audio TTS", elem_id="audio-tts-id-element")
|
62 |
with gr.Row():
|
63 |
btn_run_tts = gr.Button(value="TTS in browser", elem_id="btn-run-tts-id-element")
|
64 |
btn_run_tts_backend = gr.Button(value="TTS backend", elem_id="btn-run-tts-backend-id-element")
|
65 |
+
btn_clear_tts = gr.Button(value="Clear TTS backend", elem_id="btn-clear-tts-backend-id-element")
|
66 |
btn_clear_tts.click(clear, inputs=[], outputs=[audio_tts])
|
67 |
with gr.Row():
|
68 |
+
audio_student_recording_stt = gr.Audio(
|
69 |
+
label="Speech-toText audio output",
|
70 |
sources=["microphone", "upload"],
|
71 |
type="filepath",
|
72 |
show_download_button=True,
|
73 |
+
elem_id="audio-student-recording-stt-id-element",
|
74 |
)
|
75 |
with gr.Column(scale=4, min_width=320):
|
76 |
text_transcribed_hidden = gr.Textbox(
|
|
|
82 |
visible=False,
|
83 |
)
|
84 |
text_recording_ipa = gr.Textbox(
|
85 |
+
placeholder=None, label="Student phonetic transcription", elem_id="text-student-recording-ipa-id-element"
|
86 |
)
|
87 |
text_ideal_ipa = gr.Textbox(
|
88 |
+
placeholder=None, label="Ideal phonetic transcription", elem_id="text-ideal-ipa-id-element"
|
89 |
)
|
90 |
text_raw_json_output_hidden = gr.Textbox(placeholder=None, label="text_raw_json_output_hidden", visible=False)
|
91 |
+
with gr.Group(elem_classes="speech-output-group background-white"):
|
92 |
+
gr.Markdown("Speech accuracy output", elem_classes="speech-output-label background-white")
|
93 |
+
with gr.Group(elem_classes="speech-output-container background-white"):
|
94 |
+
html_output = gr.HTML(
|
95 |
+
label="Speech accuracy output",
|
96 |
+
elem_id="speech-output",
|
97 |
+
show_label=False,
|
98 |
+
visible=True,
|
99 |
+
render=True,
|
100 |
+
value=" - ",
|
101 |
+
elem_classes="speech-output-html background-white",
|
102 |
+
)
|
103 |
with gr.Row():
|
104 |
+
gr.Markdown("### Speech accuracy score (%)", elem_classes="speech-accuracy-score-container row1", elem_id="speech-accuracy-score-container-id-element")
|
105 |
with gr.Row():
|
106 |
with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col1"):
|
107 |
+
number_pronunciation_accuracy = gr.Number(label="Current score", elem_id="number-pronunciation-accuracy-id-element")
|
108 |
with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col2"):
|
109 |
+
number_score_de = gr.Number(label="Global score DE", value=0, interactive=False, elem_id="number-score-de-id-element")
|
110 |
with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col3"):
|
111 |
+
number_score_en = gr.Number(label="Global score EN", value=0, interactive=False, elem_id="number-score-en-id-element")
|
112 |
with gr.Row():
|
113 |
+
btn = gr.Button(value="Recognize speech accuracy", elem_id="btn-recognize-speech-accuracy-id-element")
|
114 |
+
with gr.Accordion("Click here to expand the table examples", open=True, elem_id="accordion-examples-id-element"):
|
115 |
examples_text = gr.Examples(
|
116 |
examples=[
|
117 |
["Hallo, wie geht es dir?", "de", 1],
|
|
|
121 |
["Die König-Ludwig-Eiche ist ein Naturdenkmal im Staatsbad Brückenau, einem Ortsteil des drei Kilometer nordöstlich gelegenen Bad Brückenau im Landkreis Bad Kissingen in Bayern.", "de", 3],
|
122 |
["Some machine learning models are designed to understand and generate human-like text based on the input they receive.", "en", 3],
|
123 |
],
|
124 |
+
inputs=[text_student_transcription, radio_language, radio_difficulty],
|
125 |
+
elem_id="examples-text-id-element",
|
126 |
)
|
127 |
|
128 |
def get_updated_score_by_language(text: str, audio_rec: str | Path, lang: str, score_de: float, score_en: float):
|
|
|
153 |
|
154 |
btn.click(
|
155 |
get_updated_score_by_language,
|
156 |
+
inputs=[text_student_transcription, audio_student_recording_stt, radio_language, number_score_de, number_score_en],
|
157 |
outputs=[
|
158 |
text_transcribed_hidden,
|
159 |
text_letter_correctness,
|
|
|
164 |
number_score_de, number_score_en
|
165 |
],
|
166 |
)
|
167 |
+
btn_run_tts.click(fn=None, inputs=[text_student_transcription, radio_language], outputs=audio_tts, js=js.js_play_audio)
|
168 |
btn_run_tts_backend.click(
|
169 |
fn=lambdaTTS.get_tts,
|
170 |
+
inputs=[text_student_transcription, radio_language],
|
171 |
outputs=audio_tts,
|
172 |
)
|
173 |
btn_random_phrase.click(
|
174 |
lambdaGetSample.get_random_selection,
|
175 |
inputs=[radio_language, radio_difficulty],
|
176 |
+
outputs=[text_student_transcription],
|
177 |
)
|
178 |
btn_random_phrase.click(
|
179 |
clear2,
|
180 |
inputs=[],
|
181 |
+
outputs=[audio_student_recording_stt, audio_tts]
|
182 |
)
|
183 |
html_output.change(
|
184 |
None,
|