Spaces:

aletrn
/

ai-pronunciation-trainer

Running

App Files Files Community

alessandro trinca tornidor commited on Nov 30, 2024

Commit

dc92d10

1 Parent(s): 290bfe0

feat: underline the single selected while reproducing it

Browse files

Files changed (3) hide show

aip_trainer/lambdas/js.py +7 -6
aip_trainer/lambdas/lambdaSpeechToScore.py +2 -1
app.py +24 -22

aip_trainer/lambdas/js.py CHANGED Viewed

@@ -1,5 +1,5 @@
 js_update_ipa_output = """
-function updateCssText(text, letters) {
     let wordsArr = text.split(" ")
     let lettersWordsArr = letters.split(" ")
     let speechOutputContainer = document.querySelector('#speech-output');
@@ -9,11 +9,12 @@ function updateCssText(text, letters) {
         let word = wordsArr[idx]
         let letterIsCorrect = lettersWordsArr[idx]
         for (let idx1 in word) {
-        let letterCorrect = letterIsCorrect[idx1] == "1"
-        let containerLetter = document.createElement("span")
-        containerLetter.style.color = letterCorrect ? 'green' : "red"
-        containerLetter.innerText = word[idx1];
-        speechOutputContainer.appendChild(containerLetter)
         }
         let containerSpace = document.createElement("span")
         containerSpace.textContent = " "

 js_update_ipa_output = """
+function updateCssText(text, letters, idxSelectedWord) {
     let wordsArr = text.split(" ")
     let lettersWordsArr = letters.split(" ")
     let speechOutputContainer = document.querySelector('#speech-output');
         let word = wordsArr[idx]
         let letterIsCorrect = lettersWordsArr[idx]
         for (let idx1 in word) {
+            let letterCorrect = letterIsCorrect[idx1] == "1"
+            let containerLetter = document.createElement("span")
+            let color = letterCorrect ? 'green' : "red"
+            containerLetter.style.cssText = idx == idxSelectedWord ? `color: ${color}; text-decoration-line: underline;` : `color: ${color};`
+            containerLetter.innerText = word[idx1];
+            speechOutputContainer.appendChild(containerLetter)
         }
         let containerSpace = document.createElement("span")
         containerSpace.textContent = " "

aip_trainer/lambdas/lambdaSpeechToScore.py CHANGED Viewed

@@ -166,7 +166,8 @@ def get_speech_to_score_tuple(real_text: str, file_bytes_or_audiotmpfile: str |
     audio_files = get_splitted_audio_file(audiotmpfile=file_bytes_or_audiotmpfile, start_time=start_time, end_time=end_time)
     output = {'audio_files': audio_files, **output}
-    return real_transcripts, is_letter_correct_all_words, pronunciation_accuracy, ipa_transcript, real_transcripts_ipa, num_words, json.dumps(output)
 def soundfile_write(audiofile: str | Path, data: np.ndarray, samplerate: int):

     audio_files = get_splitted_audio_file(audiotmpfile=file_bytes_or_audiotmpfile, start_time=start_time, end_time=end_time)
     output = {'audio_files': audio_files, **output}
+    first_audio_file = audio_files[0]
+    return real_transcripts, is_letter_correct_all_words, pronunciation_accuracy, ipa_transcript, real_transcripts_ipa, num_words, first_audio_file, json.dumps(output)
 def soundfile_write(audiofile: str | Path, data: np.ndarray, samplerate: int):

app.py CHANGED Viewed

@@ -15,9 +15,11 @@ css = """
 word_idx_text = "Selected word index"
-def get_textbox_hidden():
     return gr.Textbox(visible=False)
 def clear():
     return None
@@ -130,25 +132,18 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
             with gr.Row():
                 num_tot_recognized_words = gr.Number(label="Total recognized words", visible=False, minimum=0, interactive=False)
                 with gr.Column(scale=1, min_width=50):
-                    num_selected_recognized_words = gr.Number(label=word_idx_text, visible=True, minimum=0, value=0, interactive=False)
                 with gr.Column(scale=4, min_width=100):
-                    # todo: use https://www.gradio.app/docs/gradio/multimodaltextbox
                     audio_splitted_student_recording_stt = gr.Audio(
                         label="Splitted student speech output",
                         type="filepath",
                         show_download_button=True,
                         elem_id="audio-splitted-student-recording-stt-id-element",
                     )
-            with gr.Row():
-                num_selected_recognized_words.input(
-                    lambdaSpeechToScore.get_selected_word,
-                    inputs=[num_selected_recognized_words, text_raw_json_output_hidden],
-                    outputs=[audio_splitted_student_recording_stt],
-                )
     def get_updated_score_by_language(text: str, audio_rec: str | Path, lang: str, score_de: float, score_en: float):
-        _transcribed_text, _letter_correctness, _pronunciation_accuracy, _recording_ipa, _ideal_ipa, _num_tot_recognized_words, _res = lambdaSpeechToScore.get_speech_to_score_tuple(text, audio_rec, lang, remove_random_file=False)
-        new_num_selected_recognized_words = gr.Number(label=word_idx_text, visible=True, value=0)
         output = {
             text_transcribed_hidden: _transcribed_text,
             text_letter_correctness: _letter_correctness,
@@ -156,8 +151,9 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
             text_recording_ipa: _recording_ipa,
             text_ideal_ipa: _ideal_ipa,
             text_raw_json_output_hidden: _res,
-            num_tot_recognized_words: _num_tot_recognized_words,
-            num_selected_recognized_words: new_num_selected_recognized_words
         }
         match lang:
             case "de":
@@ -188,25 +184,26 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
             num_score_de,
             num_score_en,
             num_tot_recognized_words,
-            num_selected_recognized_words
         ],
     )
     def change_max_selected_words(n):
         app_logger.info(f"change_max_selected_words: {n} ...")
         num_max_selected_words = n -1
-        app_logger.info(f"num_selected_recognized_words.maximum, pre: {num_selected_recognized_words.maximum} ...")
         label = word_idx_text if n == 0 else f"{word_idx_text} (from 0 to {num_max_selected_words})"
         interactive = n > 0
         app_logger.info(f"change_max_selected_words: {n}, is interactive? {interactive} ...")
         new_num_selected_recognized_words = gr.Number(label=label, visible=True, value=0, minimum=0, maximum=num_max_selected_words, interactive=interactive)
-        app_logger.info(f"num_selected_recognized_words.maximum, post: {num_selected_recognized_words.maximum} ...")
         return new_num_selected_recognized_words
     num_tot_recognized_words.change(
-        change_max_selected_words,
         inputs=[num_tot_recognized_words],
-        outputs=[num_selected_recognized_words],
     )
     def clear3():
@@ -217,7 +214,7 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
         inputs=[],
         outputs=[
             audio_student_recording_stt, audio_tts, audio_splitted_student_recording_stt, text_recording_ipa, text_ideal_ipa, text_transcribed_hidden,
-            num_pronunciation_accuracy, num_selected_recognized_words, num_pronunciation_accuracy
         ],
     )
@@ -237,7 +234,7 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
     )
     text_recording_ipa.change(
         None,
-        inputs=[get_textbox_hidden(), get_textbox_hidden()],
         outputs=[html_output],
         js=js.js_update_ipa_output,
     )
@@ -249,7 +246,7 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
         outputs=audio_tts,
     )
     btn_random_phrase.click(
-        lambdaGetSample.get_random_selection,
         inputs=[radio_language, radio_difficulty],
         outputs=[text_student_transcription],
     )
@@ -260,10 +257,15 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
     )
     html_output.change(
         None,
-        inputs=[text_transcribed_hidden, text_letter_correctness],
         outputs=[html_output],
         js=js.js_update_ipa_output,
     )
     @gradio_app.load(inputs=[local_storage], outputs=[num_score_de, num_score_en])
     def load_from_local_storage(saved_values):

 word_idx_text = "Selected word index"
+def get_textbox_empty_hidden():
     return gr.Textbox(visible=False)
+def get_number_empty_hidden():
+    return gr.Number(visible=False)
 def clear():
     return None
             with gr.Row():
                 num_tot_recognized_words = gr.Number(label="Total recognized words", visible=False, minimum=0, interactive=False)
                 with gr.Column(scale=1, min_width=50):
+                    num_selected_recognized_word = gr.Number(label=word_idx_text, visible=True, minimum=0, value=0, interactive=False)
                 with gr.Column(scale=4, min_width=100):
                     audio_splitted_student_recording_stt = gr.Audio(
                         label="Splitted student speech output",
                         type="filepath",
                         show_download_button=True,
                         elem_id="audio-splitted-student-recording-stt-id-element",
                     )
     def get_updated_score_by_language(text: str, audio_rec: str | Path, lang: str, score_de: float, score_en: float):
+        _transcribed_text, _letter_correctness, _pronunciation_accuracy, _recording_ipa, _ideal_ipa, _num_tot_recognized_word, first_audio_file, _res = lambdaSpeechToScore.get_speech_to_score_tuple(text, audio_rec, lang, remove_random_file=False)
+        new_num_selected_recognized_word = gr.Number(label=word_idx_text, visible=True, value=0)
         output = {
             text_transcribed_hidden: _transcribed_text,
             text_letter_correctness: _letter_correctness,
             text_recording_ipa: _recording_ipa,
             text_ideal_ipa: _ideal_ipa,
             text_raw_json_output_hidden: _res,
+            num_tot_recognized_words: _num_tot_recognized_word,
+            num_selected_recognized_word: new_num_selected_recognized_word,
+            audio_splitted_student_recording_stt: first_audio_file
         }
         match lang:
             case "de":
             num_score_de,
             num_score_en,
             num_tot_recognized_words,
+            num_selected_recognized_word,
+            audio_splitted_student_recording_stt
         ],
     )
     def change_max_selected_words(n):
         app_logger.info(f"change_max_selected_words: {n} ...")
         num_max_selected_words = n -1
+        app_logger.info(f"num_selected_recognized_words.maximum, pre: {num_selected_recognized_word.maximum} ...")
         label = word_idx_text if n == 0 else f"{word_idx_text} (from 0 to {num_max_selected_words})"
         interactive = n > 0
         app_logger.info(f"change_max_selected_words: {n}, is interactive? {interactive} ...")
         new_num_selected_recognized_words = gr.Number(label=label, visible=True, value=0, minimum=0, maximum=num_max_selected_words, interactive=interactive)
+        app_logger.info(f"num_selected_recognized_words.maximum, post: {num_selected_recognized_word.maximum} ...")
         return new_num_selected_recognized_words
     num_tot_recognized_words.change(
+        fn=change_max_selected_words,
         inputs=[num_tot_recognized_words],
+        outputs=[num_selected_recognized_word],
     )
     def clear3():
         inputs=[],
         outputs=[
             audio_student_recording_stt, audio_tts, audio_splitted_student_recording_stt, text_recording_ipa, text_ideal_ipa, text_transcribed_hidden,
+            num_pronunciation_accuracy, num_selected_recognized_word, num_pronunciation_accuracy
         ],
     )
     )
     text_recording_ipa.change(
         None,
+        inputs=[get_textbox_empty_hidden(), get_textbox_empty_hidden(), get_number_empty_hidden()],
         outputs=[html_output],
         js=js.js_update_ipa_output,
     )
         outputs=audio_tts,
     )
     btn_random_phrase.click(
+        fn=lambdaGetSample.get_random_selection,
         inputs=[radio_language, radio_difficulty],
         outputs=[text_student_transcription],
     )
     )
     html_output.change(
         None,
+        inputs=[text_transcribed_hidden, text_letter_correctness, num_selected_recognized_word],
         outputs=[html_output],
         js=js.js_update_ipa_output,
     )
+    num_selected_recognized_word.input(
+        fn=lambdaSpeechToScore.get_selected_word,
+        inputs=[num_selected_recognized_word, text_raw_json_output_hidden],
+        outputs=[audio_splitted_student_recording_stt],
+    )
     @gradio_app.load(inputs=[local_storage], outputs=[num_score_de, num_score_en])
     def load_from_local_storage(saved_values):