alessandro trinca tornidor commited on
Commit
dc92d10
·
1 Parent(s): 290bfe0

feat: underline the single selected while reproducing it

Browse files
aip_trainer/lambdas/js.py CHANGED
@@ -1,5 +1,5 @@
1
  js_update_ipa_output = """
2
- function updateCssText(text, letters) {
3
  let wordsArr = text.split(" ")
4
  let lettersWordsArr = letters.split(" ")
5
  let speechOutputContainer = document.querySelector('#speech-output');
@@ -9,11 +9,12 @@ function updateCssText(text, letters) {
9
  let word = wordsArr[idx]
10
  let letterIsCorrect = lettersWordsArr[idx]
11
  for (let idx1 in word) {
12
- let letterCorrect = letterIsCorrect[idx1] == "1"
13
- let containerLetter = document.createElement("span")
14
- containerLetter.style.color = letterCorrect ? 'green' : "red"
15
- containerLetter.innerText = word[idx1];
16
- speechOutputContainer.appendChild(containerLetter)
 
17
  }
18
  let containerSpace = document.createElement("span")
19
  containerSpace.textContent = " "
 
1
  js_update_ipa_output = """
2
+ function updateCssText(text, letters, idxSelectedWord) {
3
  let wordsArr = text.split(" ")
4
  let lettersWordsArr = letters.split(" ")
5
  let speechOutputContainer = document.querySelector('#speech-output');
 
9
  let word = wordsArr[idx]
10
  let letterIsCorrect = lettersWordsArr[idx]
11
  for (let idx1 in word) {
12
+ let letterCorrect = letterIsCorrect[idx1] == "1"
13
+ let containerLetter = document.createElement("span")
14
+ let color = letterCorrect ? 'green' : "red"
15
+ containerLetter.style.cssText = idx == idxSelectedWord ? `color: ${color}; text-decoration-line: underline;` : `color: ${color};`
16
+ containerLetter.innerText = word[idx1];
17
+ speechOutputContainer.appendChild(containerLetter)
18
  }
19
  let containerSpace = document.createElement("span")
20
  containerSpace.textContent = " "
aip_trainer/lambdas/lambdaSpeechToScore.py CHANGED
@@ -166,7 +166,8 @@ def get_speech_to_score_tuple(real_text: str, file_bytes_or_audiotmpfile: str |
166
 
167
  audio_files = get_splitted_audio_file(audiotmpfile=file_bytes_or_audiotmpfile, start_time=start_time, end_time=end_time)
168
  output = {'audio_files': audio_files, **output}
169
- return real_transcripts, is_letter_correct_all_words, pronunciation_accuracy, ipa_transcript, real_transcripts_ipa, num_words, json.dumps(output)
 
170
 
171
 
172
  def soundfile_write(audiofile: str | Path, data: np.ndarray, samplerate: int):
 
166
 
167
  audio_files = get_splitted_audio_file(audiotmpfile=file_bytes_or_audiotmpfile, start_time=start_time, end_time=end_time)
168
  output = {'audio_files': audio_files, **output}
169
+ first_audio_file = audio_files[0]
170
+ return real_transcripts, is_letter_correct_all_words, pronunciation_accuracy, ipa_transcript, real_transcripts_ipa, num_words, first_audio_file, json.dumps(output)
171
 
172
 
173
  def soundfile_write(audiofile: str | Path, data: np.ndarray, samplerate: int):
app.py CHANGED
@@ -15,9 +15,11 @@ css = """
15
  word_idx_text = "Selected word index"
16
 
17
 
18
- def get_textbox_hidden():
19
  return gr.Textbox(visible=False)
20
 
 
 
21
 
22
  def clear():
23
  return None
@@ -130,25 +132,18 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
130
  with gr.Row():
131
  num_tot_recognized_words = gr.Number(label="Total recognized words", visible=False, minimum=0, interactive=False)
132
  with gr.Column(scale=1, min_width=50):
133
- num_selected_recognized_words = gr.Number(label=word_idx_text, visible=True, minimum=0, value=0, interactive=False)
134
  with gr.Column(scale=4, min_width=100):
135
- # todo: use https://www.gradio.app/docs/gradio/multimodaltextbox
136
  audio_splitted_student_recording_stt = gr.Audio(
137
  label="Splitted student speech output",
138
  type="filepath",
139
  show_download_button=True,
140
  elem_id="audio-splitted-student-recording-stt-id-element",
141
  )
142
- with gr.Row():
143
- num_selected_recognized_words.input(
144
- lambdaSpeechToScore.get_selected_word,
145
- inputs=[num_selected_recognized_words, text_raw_json_output_hidden],
146
- outputs=[audio_splitted_student_recording_stt],
147
- )
148
 
149
  def get_updated_score_by_language(text: str, audio_rec: str | Path, lang: str, score_de: float, score_en: float):
150
- _transcribed_text, _letter_correctness, _pronunciation_accuracy, _recording_ipa, _ideal_ipa, _num_tot_recognized_words, _res = lambdaSpeechToScore.get_speech_to_score_tuple(text, audio_rec, lang, remove_random_file=False)
151
- new_num_selected_recognized_words = gr.Number(label=word_idx_text, visible=True, value=0)
152
  output = {
153
  text_transcribed_hidden: _transcribed_text,
154
  text_letter_correctness: _letter_correctness,
@@ -156,8 +151,9 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
156
  text_recording_ipa: _recording_ipa,
157
  text_ideal_ipa: _ideal_ipa,
158
  text_raw_json_output_hidden: _res,
159
- num_tot_recognized_words: _num_tot_recognized_words,
160
- num_selected_recognized_words: new_num_selected_recognized_words
 
161
  }
162
  match lang:
163
  case "de":
@@ -188,25 +184,26 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
188
  num_score_de,
189
  num_score_en,
190
  num_tot_recognized_words,
191
- num_selected_recognized_words
 
192
  ],
193
  )
194
 
195
  def change_max_selected_words(n):
196
  app_logger.info(f"change_max_selected_words: {n} ...")
197
  num_max_selected_words = n -1
198
- app_logger.info(f"num_selected_recognized_words.maximum, pre: {num_selected_recognized_words.maximum} ...")
199
  label = word_idx_text if n == 0 else f"{word_idx_text} (from 0 to {num_max_selected_words})"
200
  interactive = n > 0
201
  app_logger.info(f"change_max_selected_words: {n}, is interactive? {interactive} ...")
202
  new_num_selected_recognized_words = gr.Number(label=label, visible=True, value=0, minimum=0, maximum=num_max_selected_words, interactive=interactive)
203
- app_logger.info(f"num_selected_recognized_words.maximum, post: {num_selected_recognized_words.maximum} ...")
204
  return new_num_selected_recognized_words
205
 
206
  num_tot_recognized_words.change(
207
- change_max_selected_words,
208
  inputs=[num_tot_recognized_words],
209
- outputs=[num_selected_recognized_words],
210
  )
211
 
212
  def clear3():
@@ -217,7 +214,7 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
217
  inputs=[],
218
  outputs=[
219
  audio_student_recording_stt, audio_tts, audio_splitted_student_recording_stt, text_recording_ipa, text_ideal_ipa, text_transcribed_hidden,
220
- num_pronunciation_accuracy, num_selected_recognized_words, num_pronunciation_accuracy
221
  ],
222
  )
223
 
@@ -237,7 +234,7 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
237
  )
238
  text_recording_ipa.change(
239
  None,
240
- inputs=[get_textbox_hidden(), get_textbox_hidden()],
241
  outputs=[html_output],
242
  js=js.js_update_ipa_output,
243
  )
@@ -249,7 +246,7 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
249
  outputs=audio_tts,
250
  )
251
  btn_random_phrase.click(
252
- lambdaGetSample.get_random_selection,
253
  inputs=[radio_language, radio_difficulty],
254
  outputs=[text_student_transcription],
255
  )
@@ -260,10 +257,15 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
260
  )
261
  html_output.change(
262
  None,
263
- inputs=[text_transcribed_hidden, text_letter_correctness],
264
  outputs=[html_output],
265
  js=js.js_update_ipa_output,
266
  )
 
 
 
 
 
267
 
268
  @gradio_app.load(inputs=[local_storage], outputs=[num_score_de, num_score_en])
269
  def load_from_local_storage(saved_values):
 
15
  word_idx_text = "Selected word index"
16
 
17
 
18
+ def get_textbox_empty_hidden():
19
  return gr.Textbox(visible=False)
20
 
21
+ def get_number_empty_hidden():
22
+ return gr.Number(visible=False)
23
 
24
  def clear():
25
  return None
 
132
  with gr.Row():
133
  num_tot_recognized_words = gr.Number(label="Total recognized words", visible=False, minimum=0, interactive=False)
134
  with gr.Column(scale=1, min_width=50):
135
+ num_selected_recognized_word = gr.Number(label=word_idx_text, visible=True, minimum=0, value=0, interactive=False)
136
  with gr.Column(scale=4, min_width=100):
 
137
  audio_splitted_student_recording_stt = gr.Audio(
138
  label="Splitted student speech output",
139
  type="filepath",
140
  show_download_button=True,
141
  elem_id="audio-splitted-student-recording-stt-id-element",
142
  )
 
 
 
 
 
 
143
 
144
  def get_updated_score_by_language(text: str, audio_rec: str | Path, lang: str, score_de: float, score_en: float):
145
+ _transcribed_text, _letter_correctness, _pronunciation_accuracy, _recording_ipa, _ideal_ipa, _num_tot_recognized_word, first_audio_file, _res = lambdaSpeechToScore.get_speech_to_score_tuple(text, audio_rec, lang, remove_random_file=False)
146
+ new_num_selected_recognized_word = gr.Number(label=word_idx_text, visible=True, value=0)
147
  output = {
148
  text_transcribed_hidden: _transcribed_text,
149
  text_letter_correctness: _letter_correctness,
 
151
  text_recording_ipa: _recording_ipa,
152
  text_ideal_ipa: _ideal_ipa,
153
  text_raw_json_output_hidden: _res,
154
+ num_tot_recognized_words: _num_tot_recognized_word,
155
+ num_selected_recognized_word: new_num_selected_recognized_word,
156
+ audio_splitted_student_recording_stt: first_audio_file
157
  }
158
  match lang:
159
  case "de":
 
184
  num_score_de,
185
  num_score_en,
186
  num_tot_recognized_words,
187
+ num_selected_recognized_word,
188
+ audio_splitted_student_recording_stt
189
  ],
190
  )
191
 
192
  def change_max_selected_words(n):
193
  app_logger.info(f"change_max_selected_words: {n} ...")
194
  num_max_selected_words = n -1
195
+ app_logger.info(f"num_selected_recognized_words.maximum, pre: {num_selected_recognized_word.maximum} ...")
196
  label = word_idx_text if n == 0 else f"{word_idx_text} (from 0 to {num_max_selected_words})"
197
  interactive = n > 0
198
  app_logger.info(f"change_max_selected_words: {n}, is interactive? {interactive} ...")
199
  new_num_selected_recognized_words = gr.Number(label=label, visible=True, value=0, minimum=0, maximum=num_max_selected_words, interactive=interactive)
200
+ app_logger.info(f"num_selected_recognized_words.maximum, post: {num_selected_recognized_word.maximum} ...")
201
  return new_num_selected_recognized_words
202
 
203
  num_tot_recognized_words.change(
204
+ fn=change_max_selected_words,
205
  inputs=[num_tot_recognized_words],
206
+ outputs=[num_selected_recognized_word],
207
  )
208
 
209
  def clear3():
 
214
  inputs=[],
215
  outputs=[
216
  audio_student_recording_stt, audio_tts, audio_splitted_student_recording_stt, text_recording_ipa, text_ideal_ipa, text_transcribed_hidden,
217
+ num_pronunciation_accuracy, num_selected_recognized_word, num_pronunciation_accuracy
218
  ],
219
  )
220
 
 
234
  )
235
  text_recording_ipa.change(
236
  None,
237
+ inputs=[get_textbox_empty_hidden(), get_textbox_empty_hidden(), get_number_empty_hidden()],
238
  outputs=[html_output],
239
  js=js.js_update_ipa_output,
240
  )
 
246
  outputs=audio_tts,
247
  )
248
  btn_random_phrase.click(
249
+ fn=lambdaGetSample.get_random_selection,
250
  inputs=[radio_language, radio_difficulty],
251
  outputs=[text_student_transcription],
252
  )
 
257
  )
258
  html_output.change(
259
  None,
260
+ inputs=[text_transcribed_hidden, text_letter_correctness, num_selected_recognized_word],
261
  outputs=[html_output],
262
  js=js.js_update_ipa_output,
263
  )
264
+ num_selected_recognized_word.input(
265
+ fn=lambdaSpeechToScore.get_selected_word,
266
+ inputs=[num_selected_recognized_word, text_raw_json_output_hidden],
267
+ outputs=[audio_splitted_student_recording_stt],
268
+ )
269
 
270
  @gradio_app.load(inputs=[local_storage], outputs=[num_score_de, num_score_en])
271
  def load_from_local_storage(saved_values):