Spaces:

ixxan
/

uyghur-pronunciation-checker

Running

Irpan commited on Dec 9, 2024

Commit

d6d3fa1

1 Parent(s): 8e4b54d

app

Files changed (2) hide show

app.py CHANGED Viewed

@@ -44,25 +44,25 @@ with gr.Blocks() as app:
                 with gr.Row():
                     transcript_ugArab_box = gr.Textbox(
                         label="Transcript (Uyghur Arabic)",
-                        placeholder="ASR transcription of your audio..."
                     )
                 with gr.Row():
                     transcript_ugLatn_box = gr.Textbox(
                         label="Transcript (Uyghur Latin)",
-                        placeholder="ASR transcription of your audio..."
                     )
-            # Group correct and user pronunciation
             with gr.Group():
                 with gr.Row():
-                    correct_pronunciation_box = gr.Textbox(
-                        label="Correct Pronunciation",
-                        placeholder="IPA representation of the correct pronunciation..."
                     )
                 with gr.Row():
                     user_pronunciation_box = gr.Textbox(
                         label="User Pronunciation",
-                        placeholder="IPA representation of your pronunciation..."
                     )
             with gr.Group():
@@ -99,7 +99,7 @@ with gr.Blocks() as app:
     check_btn.click(
         asr.check_pronunciation,
         inputs=[input_text, script_choice, user_audio],
-        outputs=[transcript_ugArab_box, transcript_ugLatn_box, correct_pronunciation_box, user_pronunciation_box, match_box, score_box]
     )
 # Main

                 with gr.Row():
                     transcript_ugArab_box = gr.Textbox(
                         label="Transcript (Uyghur Arabic)",
+                        placeholder="ASR transcription of user audio..."
                     )
                 with gr.Row():
                     transcript_ugLatn_box = gr.Textbox(
                         label="Transcript (Uyghur Latin)",
+                        placeholder="ASR transcription of user audio..."
                     )
+            # Group machine and user pronunciation
             with gr.Group():
                 with gr.Row():
+                    machine_pronunciation_box = gr.Textbox(
+                        label="Machine Pronunciation",
+                        placeholder="IPA representation of the machine pronunciation..."
                     )
                 with gr.Row():
                     user_pronunciation_box = gr.Textbox(
                         label="User Pronunciation",
+                        placeholder="IPA representation of the user pronunciation..."
                     )
             with gr.Group():
     check_btn.click(
         asr.check_pronunciation,
         inputs=[input_text, script_choice, user_audio],
+        outputs=[transcript_ugArab_box, transcript_ugLatn_box, machine_pronunciation_box, user_pronunciation_box, match_box, score_box]
     )
 # Main

asr.py CHANGED Viewed

@@ -15,7 +15,7 @@ asr_model = asr_model.to(device)
 def asr(user_audio):
     # Load and resample user audio
-    audio_input, sampling_rate = util.load_and_resample_audio(user_audio, target_rate=16000)
     # Process audio through ASR model
     inputs = asr_processor(audio_input.squeeze(), sampling_rate=sampling_rate, return_tensors="pt", padding=True)
@@ -36,9 +36,9 @@ def check_pronunciation(input_text, script, user_audio):
     # Get IPA and Pronunciation Feedback
     if script == 'Uyghur Latin':
         input_text = ug_latn_to_arab(input_text) # make sure input text is arabic script to IPA conversion
-    correct_pronunciation, user_pronunciation, pronunciation_match, pronunciation_score = util.calculate_pronunciation_accuracy(
         reference_text = input_text,
         output_text = transcript_ugArab_box,
         language_code='uig-Arab')
-    return transcript_ugArab_box, transcript_ugLatn_box, correct_pronunciation, user_pronunciation, pronunciation_match, pronunciation_score

 def asr(user_audio):
     # Load and resample user audio
+    audio_input, sampling_rate = util.load_and_resample_audio(file_path = user_audio, target_rate=16000)
     # Process audio through ASR model
     inputs = asr_processor(audio_input.squeeze(), sampling_rate=sampling_rate, return_tensors="pt", padding=True)
     # Get IPA and Pronunciation Feedback
     if script == 'Uyghur Latin':
         input_text = ug_latn_to_arab(input_text) # make sure input text is arabic script to IPA conversion
+        machine_pronunciation, user_pronunciation, pronunciation_match, pronunciation_score = util.calculate_pronunciation_accuracy(
         reference_text = input_text,
         output_text = transcript_ugArab_box,
         language_code='uig-Arab')
+    return transcript_ugArab_box, transcript_ugLatn_box, machine_pronunciation, user_pronunciation, pronunciation_match, pronunciation_score