Spaces:
Running
Running
alessandro trinca tornidor
commited on
Commit
·
918182d
1
Parent(s):
3bef9be
feat: handle example selection and input change by resetting audio and text outputs
Browse files- aip_trainer/lambdas/lambdaSpeechToScore.py +7 -3
- app.py +69 -29
aip_trainer/lambdas/lambdaSpeechToScore.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
|
2 |
import base64
|
3 |
import json
|
4 |
import os
|
@@ -176,8 +175,13 @@ def soundfile_write(audiofile: str | Path, data: np.ndarray, samplerate: int):
|
|
176 |
|
177 |
|
178 |
def get_selected_word(idx_recorded_word: int, raw_json_output: str) -> str:
|
179 |
-
|
180 |
-
list_audio_files =
|
|
|
|
|
|
|
|
|
|
|
181 |
return list_audio_files[idx_recorded_word]
|
182 |
|
183 |
|
|
|
|
|
1 |
import base64
|
2 |
import json
|
3 |
import os
|
|
|
175 |
|
176 |
|
177 |
def get_selected_word(idx_recorded_word: int, raw_json_output: str) -> str:
|
178 |
+
recognition_output = json.loads(raw_json_output)
|
179 |
+
list_audio_files = recognition_output["audio_files"]
|
180 |
+
real_transcripts = recognition_output["real_transcripts"]
|
181 |
+
real_transcripts_list = real_transcripts.split()
|
182 |
+
app_logger.info(f"idx_recorded_word:{idx_recorded_word} ...")
|
183 |
+
current_word = real_transcripts_list[idx_recorded_word]
|
184 |
+
app_logger.info(f"real_transcripts, current word:{current_word} ...")
|
185 |
return list_audio_files[idx_recorded_word]
|
186 |
|
187 |
|
app.py
CHANGED
@@ -12,6 +12,11 @@ css = """
|
|
12 |
.speech-output-container {min-height: 60px;}
|
13 |
.speech-output-html {text-align: left; }
|
14 |
"""
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
|
17 |
def clear():
|
@@ -118,31 +123,18 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
|
|
118 |
gr.Markdown("### Speech accuracy score (%)", elem_classes="speech-accuracy-score-container row1", elem_id="speech-accuracy-score-container-id-element")
|
119 |
with gr.Row():
|
120 |
with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col1"):
|
121 |
-
|
122 |
with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col2"):
|
123 |
-
|
124 |
with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col3"):
|
125 |
-
|
126 |
with gr.Row():
|
127 |
btn_recognize_speech_accuracy = gr.Button(value="Recognize speech accuracy", elem_id="btn-recognize-speech-accuracy-id-element")
|
128 |
with gr.Row():
|
129 |
with gr.Column(scale=1, min_width=50):
|
130 |
num_tot_recognized_words = gr.Number(label="Total recognized words", visible=True, minimum=0, interactive=False)
|
131 |
with gr.Column(scale=1, min_width=50):
|
132 |
-
num_selected_recognized_words = gr.Number(label=
|
133 |
-
|
134 |
-
def change_max_selected_words(n):
|
135 |
-
app_logger.info(f"change_max_selected_words: {n} ...")
|
136 |
-
app_logger.info(f"num_selected_recognized_words.maximum, pre: {num_selected_recognized_words.maximum} ...")
|
137 |
-
new_num_selected_recognized_words = gr.Number(label=f"Recognized word index, max {n}!", visible=True, value=0, minimum=0, maximum=n)
|
138 |
-
app_logger.info(f"num_selected_recognized_words.maximum, post: {num_selected_recognized_words.maximum} ...")
|
139 |
-
return new_num_selected_recognized_words
|
140 |
-
|
141 |
-
num_tot_recognized_words.change(
|
142 |
-
change_max_selected_words,
|
143 |
-
inputs=[num_tot_recognized_words],
|
144 |
-
outputs=[num_selected_recognized_words],
|
145 |
-
)
|
146 |
with gr.Column(scale=2, min_width=100):
|
147 |
# todo: use https://www.gradio.app/docs/gradio/multimodaltextbox
|
148 |
audio_splitted_student_recording_stt = gr.Audio(
|
@@ -170,11 +162,11 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
|
|
170 |
|
171 |
def get_updated_score_by_language(text: str, audio_rec: str | Path, lang: str, score_de: float, score_en: float):
|
172 |
_transcribed_text, _letter_correctness, _pronunciation_accuracy, _recording_ipa, _ideal_ipa, _num_tot_recognized_words, _res = lambdaSpeechToScore.get_speech_to_score_tuple(text, audio_rec, lang, remove_random_file=False)
|
173 |
-
new_num_selected_recognized_words = gr.Number(label=
|
174 |
output = {
|
175 |
text_transcribed_hidden: _transcribed_text,
|
176 |
text_letter_correctness: _letter_correctness,
|
177 |
-
|
178 |
text_recording_ipa: _recording_ipa,
|
179 |
text_ideal_ipa: _ideal_ipa,
|
180 |
text_raw_json_output_hidden: _res,
|
@@ -184,14 +176,14 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
|
|
184 |
match lang:
|
185 |
case "de":
|
186 |
return {
|
187 |
-
|
188 |
-
|
189 |
**output
|
190 |
}
|
191 |
case "en":
|
192 |
return {
|
193 |
-
|
194 |
-
|
195 |
**output
|
196 |
}
|
197 |
case _:
|
@@ -199,20 +191,68 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
|
|
199 |
|
200 |
btn_recognize_speech_accuracy.click(
|
201 |
get_updated_score_by_language,
|
202 |
-
inputs=[text_student_transcription, audio_student_recording_stt, radio_language,
|
203 |
outputs=[
|
204 |
text_transcribed_hidden,
|
205 |
text_letter_correctness,
|
206 |
-
|
207 |
text_recording_ipa,
|
208 |
text_ideal_ipa,
|
209 |
text_raw_json_output_hidden,
|
210 |
-
|
211 |
-
|
212 |
num_tot_recognized_words,
|
213 |
num_selected_recognized_words
|
214 |
],
|
215 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
btn_run_tts.click(fn=None, inputs=[text_student_transcription, radio_language], outputs=audio_tts, js=js.js_play_audio)
|
217 |
btn_run_tts_backend.click(
|
218 |
fn=lambdaTTS.get_tts,
|
@@ -236,12 +276,12 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
|
|
236 |
js=js.js_update_ipa_output,
|
237 |
)
|
238 |
|
239 |
-
@gradio_app.load(inputs=[local_storage], outputs=[
|
240 |
def load_from_local_storage(saved_values):
|
241 |
print("loading from local storage", saved_values)
|
242 |
return saved_values[0], saved_values[1]
|
243 |
|
244 |
-
@gr.on([
|
245 |
def save_to_local_storage(score_de, score_en):
|
246 |
return [score_de, score_en]
|
247 |
|
|
|
12 |
.speech-output-container {min-height: 60px;}
|
13 |
.speech-output-html {text-align: left; }
|
14 |
"""
|
15 |
+
word_idx_text = "Recognized word index"
|
16 |
+
|
17 |
+
|
18 |
+
def get_textbox_hidden():
|
19 |
+
return gr.Textbox(visible=False)
|
20 |
|
21 |
|
22 |
def clear():
|
|
|
123 |
gr.Markdown("### Speech accuracy score (%)", elem_classes="speech-accuracy-score-container row1", elem_id="speech-accuracy-score-container-id-element")
|
124 |
with gr.Row():
|
125 |
with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col1"):
|
126 |
+
num_pronunciation_accuracy = gr.Number(label="Current score", elem_id="number-pronunciation-accuracy-id-element")
|
127 |
with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col2"):
|
128 |
+
num_score_de = gr.Number(label="Global score DE", value=0, interactive=False, elem_id="number-score-de-id-element")
|
129 |
with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col3"):
|
130 |
+
num_score_en = gr.Number(label="Global score EN", value=0, interactive=False, elem_id="number-score-en-id-element")
|
131 |
with gr.Row():
|
132 |
btn_recognize_speech_accuracy = gr.Button(value="Recognize speech accuracy", elem_id="btn-recognize-speech-accuracy-id-element")
|
133 |
with gr.Row():
|
134 |
with gr.Column(scale=1, min_width=50):
|
135 |
num_tot_recognized_words = gr.Number(label="Total recognized words", visible=True, minimum=0, interactive=False)
|
136 |
with gr.Column(scale=1, min_width=50):
|
137 |
+
num_selected_recognized_words = gr.Number(label=word_idx_text, visible=True, minimum=0, value=0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
with gr.Column(scale=2, min_width=100):
|
139 |
# todo: use https://www.gradio.app/docs/gradio/multimodaltextbox
|
140 |
audio_splitted_student_recording_stt = gr.Audio(
|
|
|
162 |
|
163 |
def get_updated_score_by_language(text: str, audio_rec: str | Path, lang: str, score_de: float, score_en: float):
|
164 |
_transcribed_text, _letter_correctness, _pronunciation_accuracy, _recording_ipa, _ideal_ipa, _num_tot_recognized_words, _res = lambdaSpeechToScore.get_speech_to_score_tuple(text, audio_rec, lang, remove_random_file=False)
|
165 |
+
new_num_selected_recognized_words = gr.Number(label=word_idx_text, visible=True, value=0)
|
166 |
output = {
|
167 |
text_transcribed_hidden: _transcribed_text,
|
168 |
text_letter_correctness: _letter_correctness,
|
169 |
+
num_pronunciation_accuracy: _pronunciation_accuracy,
|
170 |
text_recording_ipa: _recording_ipa,
|
171 |
text_ideal_ipa: _ideal_ipa,
|
172 |
text_raw_json_output_hidden: _res,
|
|
|
176 |
match lang:
|
177 |
case "de":
|
178 |
return {
|
179 |
+
num_score_de: float(score_de) + float(_pronunciation_accuracy),
|
180 |
+
num_score_en: float(score_en),
|
181 |
**output
|
182 |
}
|
183 |
case "en":
|
184 |
return {
|
185 |
+
num_score_en: float(score_en) + float(_pronunciation_accuracy),
|
186 |
+
num_score_de: float(score_de),
|
187 |
**output
|
188 |
}
|
189 |
case _:
|
|
|
191 |
|
192 |
btn_recognize_speech_accuracy.click(
|
193 |
get_updated_score_by_language,
|
194 |
+
inputs=[text_student_transcription, audio_student_recording_stt, radio_language, num_score_de, num_score_en],
|
195 |
outputs=[
|
196 |
text_transcribed_hidden,
|
197 |
text_letter_correctness,
|
198 |
+
num_pronunciation_accuracy,
|
199 |
text_recording_ipa,
|
200 |
text_ideal_ipa,
|
201 |
text_raw_json_output_hidden,
|
202 |
+
num_score_de,
|
203 |
+
num_score_en,
|
204 |
num_tot_recognized_words,
|
205 |
num_selected_recognized_words
|
206 |
],
|
207 |
)
|
208 |
+
|
209 |
+
def change_max_selected_words(n):
|
210 |
+
app_logger.info(f"change_max_selected_words: {n} ...")
|
211 |
+
app_logger.info(f"num_selected_recognized_words.maximum, pre: {num_selected_recognized_words.maximum} ...")
|
212 |
+
label = word_idx_text if n == 0 else f"{word_idx_text}, max {n}!"
|
213 |
+
new_num_selected_recognized_words = gr.Number(label=label, visible=True, value=0, minimum=0, maximum=n)
|
214 |
+
app_logger.info(f"num_selected_recognized_words.maximum, post: {num_selected_recognized_words.maximum} ...")
|
215 |
+
return new_num_selected_recognized_words
|
216 |
+
|
217 |
+
num_tot_recognized_words.change(
|
218 |
+
change_max_selected_words,
|
219 |
+
inputs=[num_tot_recognized_words],
|
220 |
+
outputs=[num_selected_recognized_words],
|
221 |
+
)
|
222 |
+
|
223 |
+
def clear3():
|
224 |
+
return None, None, None, None, None, None, 0, 0, 0
|
225 |
+
|
226 |
+
text_student_transcription.change(
|
227 |
+
clear3,
|
228 |
+
inputs=[],
|
229 |
+
outputs=[
|
230 |
+
audio_student_recording_stt, audio_tts, audio_splitted_student_recording_stt, text_recording_ipa, text_ideal_ipa, text_transcribed_hidden,
|
231 |
+
num_pronunciation_accuracy, num_selected_recognized_words, num_pronunciation_accuracy
|
232 |
+
],
|
233 |
+
)
|
234 |
+
|
235 |
+
def reset_max_total_recognized_words(content_text_recording_ipa, content_num_tot_recognized_words):
|
236 |
+
if content_text_recording_ipa is None or content_text_recording_ipa == "":
|
237 |
+
app_logger.info("reset_max_total_recognized_words...")
|
238 |
+
new_num_tot_recognized_words = gr.Number(label="Total recognized words", visible=True, value=0, minimum=0, interactive=False)
|
239 |
+
return new_num_tot_recognized_words
|
240 |
+
return content_num_tot_recognized_words
|
241 |
+
|
242 |
+
text_recording_ipa.change(
|
243 |
+
reset_max_total_recognized_words,
|
244 |
+
inputs=[text_recording_ipa, num_tot_recognized_words],
|
245 |
+
outputs=[
|
246 |
+
num_tot_recognized_words
|
247 |
+
],
|
248 |
+
)
|
249 |
+
text_recording_ipa.change(
|
250 |
+
None,
|
251 |
+
inputs=[get_textbox_hidden(), get_textbox_hidden()],
|
252 |
+
outputs=[html_output],
|
253 |
+
js=js.js_update_ipa_output,
|
254 |
+
)
|
255 |
+
|
256 |
btn_run_tts.click(fn=None, inputs=[text_student_transcription, radio_language], outputs=audio_tts, js=js.js_play_audio)
|
257 |
btn_run_tts_backend.click(
|
258 |
fn=lambdaTTS.get_tts,
|
|
|
276 |
js=js.js_update_ipa_output,
|
277 |
)
|
278 |
|
279 |
+
@gradio_app.load(inputs=[local_storage], outputs=[num_score_de, num_score_en])
|
280 |
def load_from_local_storage(saved_values):
|
281 |
print("loading from local storage", saved_values)
|
282 |
return saved_values[0], saved_values[1]
|
283 |
|
284 |
+
@gr.on([num_score_de.change, num_score_en.change], inputs=[num_score_de, num_score_en], outputs=[local_storage])
|
285 |
def save_to_local_storage(score_de, score_en):
|
286 |
return [score_de, score_en]
|
287 |
|