alessandro trinca tornidor commited on
Commit
918182d
·
1 Parent(s): 3bef9be

feat: handle example selection and input change by resetting audio and text outputs

Browse files
aip_trainer/lambdas/lambdaSpeechToScore.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import base64
3
  import json
4
  import os
@@ -176,8 +175,13 @@ def soundfile_write(audiofile: str | Path, data: np.ndarray, samplerate: int):
176
 
177
 
178
  def get_selected_word(idx_recorded_word: int, raw_json_output: str) -> str:
179
- json_output = json.loads(raw_json_output)
180
- list_audio_files = json_output["audio_files"]
 
 
 
 
 
181
  return list_audio_files[idx_recorded_word]
182
 
183
 
 
 
1
  import base64
2
  import json
3
  import os
 
175
 
176
 
177
  def get_selected_word(idx_recorded_word: int, raw_json_output: str) -> str:
178
+ recognition_output = json.loads(raw_json_output)
179
+ list_audio_files = recognition_output["audio_files"]
180
+ real_transcripts = recognition_output["real_transcripts"]
181
+ real_transcripts_list = real_transcripts.split()
182
+ app_logger.info(f"idx_recorded_word:{idx_recorded_word} ...")
183
+ current_word = real_transcripts_list[idx_recorded_word]
184
+ app_logger.info(f"real_transcripts, current word:{current_word} ...")
185
  return list_audio_files[idx_recorded_word]
186
 
187
 
app.py CHANGED
@@ -12,6 +12,11 @@ css = """
12
  .speech-output-container {min-height: 60px;}
13
  .speech-output-html {text-align: left; }
14
  """
 
 
 
 
 
15
 
16
 
17
  def clear():
@@ -118,31 +123,18 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
118
  gr.Markdown("### Speech accuracy score (%)", elem_classes="speech-accuracy-score-container row1", elem_id="speech-accuracy-score-container-id-element")
119
  with gr.Row():
120
  with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col1"):
121
- number_pronunciation_accuracy = gr.Number(label="Current score", elem_id="number-pronunciation-accuracy-id-element")
122
  with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col2"):
123
- number_score_de = gr.Number(label="Global score DE", value=0, interactive=False, elem_id="number-score-de-id-element")
124
  with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col3"):
125
- number_score_en = gr.Number(label="Global score EN", value=0, interactive=False, elem_id="number-score-en-id-element")
126
  with gr.Row():
127
  btn_recognize_speech_accuracy = gr.Button(value="Recognize speech accuracy", elem_id="btn-recognize-speech-accuracy-id-element")
128
  with gr.Row():
129
  with gr.Column(scale=1, min_width=50):
130
  num_tot_recognized_words = gr.Number(label="Total recognized words", visible=True, minimum=0, interactive=False)
131
  with gr.Column(scale=1, min_width=50):
132
- num_selected_recognized_words = gr.Number(label="Recognized word index", visible=True, minimum=0, value=0)
133
-
134
- def change_max_selected_words(n):
135
- app_logger.info(f"change_max_selected_words: {n} ...")
136
- app_logger.info(f"num_selected_recognized_words.maximum, pre: {num_selected_recognized_words.maximum} ...")
137
- new_num_selected_recognized_words = gr.Number(label=f"Recognized word index, max {n}!", visible=True, value=0, minimum=0, maximum=n)
138
- app_logger.info(f"num_selected_recognized_words.maximum, post: {num_selected_recognized_words.maximum} ...")
139
- return new_num_selected_recognized_words
140
-
141
- num_tot_recognized_words.change(
142
- change_max_selected_words,
143
- inputs=[num_tot_recognized_words],
144
- outputs=[num_selected_recognized_words],
145
- )
146
  with gr.Column(scale=2, min_width=100):
147
  # todo: use https://www.gradio.app/docs/gradio/multimodaltextbox
148
  audio_splitted_student_recording_stt = gr.Audio(
@@ -170,11 +162,11 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
170
 
171
  def get_updated_score_by_language(text: str, audio_rec: str | Path, lang: str, score_de: float, score_en: float):
172
  _transcribed_text, _letter_correctness, _pronunciation_accuracy, _recording_ipa, _ideal_ipa, _num_tot_recognized_words, _res = lambdaSpeechToScore.get_speech_to_score_tuple(text, audio_rec, lang, remove_random_file=False)
173
- new_num_selected_recognized_words = gr.Number(label="Recognized word index", visible=True, value=0)
174
  output = {
175
  text_transcribed_hidden: _transcribed_text,
176
  text_letter_correctness: _letter_correctness,
177
- number_pronunciation_accuracy: _pronunciation_accuracy,
178
  text_recording_ipa: _recording_ipa,
179
  text_ideal_ipa: _ideal_ipa,
180
  text_raw_json_output_hidden: _res,
@@ -184,14 +176,14 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
184
  match lang:
185
  case "de":
186
  return {
187
- number_score_de: float(score_de) + float(_pronunciation_accuracy),
188
- number_score_en: float(score_en),
189
  **output
190
  }
191
  case "en":
192
  return {
193
- number_score_en: float(score_en) + float(_pronunciation_accuracy),
194
- number_score_de: float(score_de),
195
  **output
196
  }
197
  case _:
@@ -199,20 +191,68 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
199
 
200
  btn_recognize_speech_accuracy.click(
201
  get_updated_score_by_language,
202
- inputs=[text_student_transcription, audio_student_recording_stt, radio_language, number_score_de, number_score_en],
203
  outputs=[
204
  text_transcribed_hidden,
205
  text_letter_correctness,
206
- number_pronunciation_accuracy,
207
  text_recording_ipa,
208
  text_ideal_ipa,
209
  text_raw_json_output_hidden,
210
- number_score_de,
211
- number_score_en,
212
  num_tot_recognized_words,
213
  num_selected_recognized_words
214
  ],
215
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  btn_run_tts.click(fn=None, inputs=[text_student_transcription, radio_language], outputs=audio_tts, js=js.js_play_audio)
217
  btn_run_tts_backend.click(
218
  fn=lambdaTTS.get_tts,
@@ -236,12 +276,12 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
236
  js=js.js_update_ipa_output,
237
  )
238
 
239
- @gradio_app.load(inputs=[local_storage], outputs=[number_score_de, number_score_en])
240
  def load_from_local_storage(saved_values):
241
  print("loading from local storage", saved_values)
242
  return saved_values[0], saved_values[1]
243
 
244
- @gr.on([number_score_de.change, number_score_en.change], inputs=[number_score_de, number_score_en], outputs=[local_storage])
245
  def save_to_local_storage(score_de, score_en):
246
  return [score_de, score_en]
247
 
 
12
  .speech-output-container {min-height: 60px;}
13
  .speech-output-html {text-align: left; }
14
  """
15
+ word_idx_text = "Recognized word index"
16
+
17
+
18
+ def get_textbox_hidden():
19
+ return gr.Textbox(visible=False)
20
 
21
 
22
  def clear():
 
123
  gr.Markdown("### Speech accuracy score (%)", elem_classes="speech-accuracy-score-container row1", elem_id="speech-accuracy-score-container-id-element")
124
  with gr.Row():
125
  with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col1"):
126
+ num_pronunciation_accuracy = gr.Number(label="Current score", elem_id="number-pronunciation-accuracy-id-element")
127
  with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col2"):
128
+ num_score_de = gr.Number(label="Global score DE", value=0, interactive=False, elem_id="number-score-de-id-element")
129
  with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col3"):
130
+ num_score_en = gr.Number(label="Global score EN", value=0, interactive=False, elem_id="number-score-en-id-element")
131
  with gr.Row():
132
  btn_recognize_speech_accuracy = gr.Button(value="Recognize speech accuracy", elem_id="btn-recognize-speech-accuracy-id-element")
133
  with gr.Row():
134
  with gr.Column(scale=1, min_width=50):
135
  num_tot_recognized_words = gr.Number(label="Total recognized words", visible=True, minimum=0, interactive=False)
136
  with gr.Column(scale=1, min_width=50):
137
+ num_selected_recognized_words = gr.Number(label=word_idx_text, visible=True, minimum=0, value=0)
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  with gr.Column(scale=2, min_width=100):
139
  # todo: use https://www.gradio.app/docs/gradio/multimodaltextbox
140
  audio_splitted_student_recording_stt = gr.Audio(
 
162
 
163
  def get_updated_score_by_language(text: str, audio_rec: str | Path, lang: str, score_de: float, score_en: float):
164
  _transcribed_text, _letter_correctness, _pronunciation_accuracy, _recording_ipa, _ideal_ipa, _num_tot_recognized_words, _res = lambdaSpeechToScore.get_speech_to_score_tuple(text, audio_rec, lang, remove_random_file=False)
165
+ new_num_selected_recognized_words = gr.Number(label=word_idx_text, visible=True, value=0)
166
  output = {
167
  text_transcribed_hidden: _transcribed_text,
168
  text_letter_correctness: _letter_correctness,
169
+ num_pronunciation_accuracy: _pronunciation_accuracy,
170
  text_recording_ipa: _recording_ipa,
171
  text_ideal_ipa: _ideal_ipa,
172
  text_raw_json_output_hidden: _res,
 
176
  match lang:
177
  case "de":
178
  return {
179
+ num_score_de: float(score_de) + float(_pronunciation_accuracy),
180
+ num_score_en: float(score_en),
181
  **output
182
  }
183
  case "en":
184
  return {
185
+ num_score_en: float(score_en) + float(_pronunciation_accuracy),
186
+ num_score_de: float(score_de),
187
  **output
188
  }
189
  case _:
 
191
 
192
  btn_recognize_speech_accuracy.click(
193
  get_updated_score_by_language,
194
+ inputs=[text_student_transcription, audio_student_recording_stt, radio_language, num_score_de, num_score_en],
195
  outputs=[
196
  text_transcribed_hidden,
197
  text_letter_correctness,
198
+ num_pronunciation_accuracy,
199
  text_recording_ipa,
200
  text_ideal_ipa,
201
  text_raw_json_output_hidden,
202
+ num_score_de,
203
+ num_score_en,
204
  num_tot_recognized_words,
205
  num_selected_recognized_words
206
  ],
207
  )
208
+
209
+ def change_max_selected_words(n):
210
+ app_logger.info(f"change_max_selected_words: {n} ...")
211
+ app_logger.info(f"num_selected_recognized_words.maximum, pre: {num_selected_recognized_words.maximum} ...")
212
+ label = word_idx_text if n == 0 else f"{word_idx_text}, max {n}!"
213
+ new_num_selected_recognized_words = gr.Number(label=label, visible=True, value=0, minimum=0, maximum=n)
214
+ app_logger.info(f"num_selected_recognized_words.maximum, post: {num_selected_recognized_words.maximum} ...")
215
+ return new_num_selected_recognized_words
216
+
217
+ num_tot_recognized_words.change(
218
+ change_max_selected_words,
219
+ inputs=[num_tot_recognized_words],
220
+ outputs=[num_selected_recognized_words],
221
+ )
222
+
223
+ def clear3():
224
+ return None, None, None, None, None, None, 0, 0, 0
225
+
226
+ text_student_transcription.change(
227
+ clear3,
228
+ inputs=[],
229
+ outputs=[
230
+ audio_student_recording_stt, audio_tts, audio_splitted_student_recording_stt, text_recording_ipa, text_ideal_ipa, text_transcribed_hidden,
231
+ num_pronunciation_accuracy, num_selected_recognized_words, num_pronunciation_accuracy
232
+ ],
233
+ )
234
+
235
+ def reset_max_total_recognized_words(content_text_recording_ipa, content_num_tot_recognized_words):
236
+ if content_text_recording_ipa is None or content_text_recording_ipa == "":
237
+ app_logger.info("reset_max_total_recognized_words...")
238
+ new_num_tot_recognized_words = gr.Number(label="Total recognized words", visible=True, value=0, minimum=0, interactive=False)
239
+ return new_num_tot_recognized_words
240
+ return content_num_tot_recognized_words
241
+
242
+ text_recording_ipa.change(
243
+ reset_max_total_recognized_words,
244
+ inputs=[text_recording_ipa, num_tot_recognized_words],
245
+ outputs=[
246
+ num_tot_recognized_words
247
+ ],
248
+ )
249
+ text_recording_ipa.change(
250
+ None,
251
+ inputs=[get_textbox_hidden(), get_textbox_hidden()],
252
+ outputs=[html_output],
253
+ js=js.js_update_ipa_output,
254
+ )
255
+
256
  btn_run_tts.click(fn=None, inputs=[text_student_transcription, radio_language], outputs=audio_tts, js=js.js_play_audio)
257
  btn_run_tts_backend.click(
258
  fn=lambdaTTS.get_tts,
 
276
  js=js.js_update_ipa_output,
277
  )
278
 
279
+ @gradio_app.load(inputs=[local_storage], outputs=[num_score_de, num_score_en])
280
  def load_from_local_storage(saved_values):
281
  print("loading from local storage", saved_values)
282
  return saved_values[0], saved_values[1]
283
 
284
+ @gr.on([num_score_de.change, num_score_en.change], inputs=[num_score_de, num_score_en], outputs=[local_storage])
285
  def save_to_local_storage(score_de, score_en):
286
  return [score_de, score_en]
287