Irpan
commited on
Commit
·
d6d3fa1
1
Parent(s):
8e4b54d
app
Browse files
app.py
CHANGED
@@ -44,25 +44,25 @@ with gr.Blocks() as app:
|
|
44 |
with gr.Row():
|
45 |
transcript_ugArab_box = gr.Textbox(
|
46 |
label="Transcript (Uyghur Arabic)",
|
47 |
-
placeholder="ASR transcription of
|
48 |
)
|
49 |
with gr.Row():
|
50 |
transcript_ugLatn_box = gr.Textbox(
|
51 |
label="Transcript (Uyghur Latin)",
|
52 |
-
placeholder="ASR transcription of
|
53 |
)
|
54 |
|
55 |
-
# Group
|
56 |
with gr.Group():
|
57 |
with gr.Row():
|
58 |
-
|
59 |
-
label="
|
60 |
-
placeholder="IPA representation of the
|
61 |
)
|
62 |
with gr.Row():
|
63 |
user_pronunciation_box = gr.Textbox(
|
64 |
label="User Pronunciation",
|
65 |
-
placeholder="IPA representation of
|
66 |
)
|
67 |
|
68 |
with gr.Group():
|
@@ -99,7 +99,7 @@ with gr.Blocks() as app:
|
|
99 |
check_btn.click(
|
100 |
asr.check_pronunciation,
|
101 |
inputs=[input_text, script_choice, user_audio],
|
102 |
-
outputs=[transcript_ugArab_box, transcript_ugLatn_box,
|
103 |
)
|
104 |
|
105 |
# Main
|
|
|
44 |
with gr.Row():
|
45 |
transcript_ugArab_box = gr.Textbox(
|
46 |
label="Transcript (Uyghur Arabic)",
|
47 |
+
placeholder="ASR transcription of user audio..."
|
48 |
)
|
49 |
with gr.Row():
|
50 |
transcript_ugLatn_box = gr.Textbox(
|
51 |
label="Transcript (Uyghur Latin)",
|
52 |
+
placeholder="ASR transcription of user audio..."
|
53 |
)
|
54 |
|
55 |
+
# Group machine and user pronunciation
|
56 |
with gr.Group():
|
57 |
with gr.Row():
|
58 |
+
machine_pronunciation_box = gr.Textbox(
|
59 |
+
label="Machine Pronunciation",
|
60 |
+
placeholder="IPA representation of the machine pronunciation..."
|
61 |
)
|
62 |
with gr.Row():
|
63 |
user_pronunciation_box = gr.Textbox(
|
64 |
label="User Pronunciation",
|
65 |
+
placeholder="IPA representation of the user pronunciation..."
|
66 |
)
|
67 |
|
68 |
with gr.Group():
|
|
|
99 |
check_btn.click(
|
100 |
asr.check_pronunciation,
|
101 |
inputs=[input_text, script_choice, user_audio],
|
102 |
+
outputs=[transcript_ugArab_box, transcript_ugLatn_box, machine_pronunciation_box, user_pronunciation_box, match_box, score_box]
|
103 |
)
|
104 |
|
105 |
# Main
|
asr.py
CHANGED
@@ -15,7 +15,7 @@ asr_model = asr_model.to(device)
|
|
15 |
|
16 |
def asr(user_audio):
|
17 |
# Load and resample user audio
|
18 |
-
audio_input, sampling_rate = util.load_and_resample_audio(user_audio, target_rate=16000)
|
19 |
|
20 |
# Process audio through ASR model
|
21 |
inputs = asr_processor(audio_input.squeeze(), sampling_rate=sampling_rate, return_tensors="pt", padding=True)
|
@@ -36,9 +36,9 @@ def check_pronunciation(input_text, script, user_audio):
|
|
36 |
# Get IPA and Pronunciation Feedback
|
37 |
if script == 'Uyghur Latin':
|
38 |
input_text = ug_latn_to_arab(input_text) # make sure input text is arabic script to IPA conversion
|
39 |
-
|
40 |
reference_text = input_text,
|
41 |
output_text = transcript_ugArab_box,
|
42 |
language_code='uig-Arab')
|
43 |
|
44 |
-
return transcript_ugArab_box, transcript_ugLatn_box,
|
|
|
15 |
|
16 |
def asr(user_audio):
|
17 |
# Load and resample user audio
|
18 |
+
audio_input, sampling_rate = util.load_and_resample_audio(file_path = user_audio, target_rate=16000)
|
19 |
|
20 |
# Process audio through ASR model
|
21 |
inputs = asr_processor(audio_input.squeeze(), sampling_rate=sampling_rate, return_tensors="pt", padding=True)
|
|
|
36 |
# Get IPA and Pronunciation Feedback
|
37 |
if script == 'Uyghur Latin':
|
38 |
input_text = ug_latn_to_arab(input_text) # make sure input text is arabic script to IPA conversion
|
39 |
+
machine_pronunciation, user_pronunciation, pronunciation_match, pronunciation_score = util.calculate_pronunciation_accuracy(
|
40 |
reference_text = input_text,
|
41 |
output_text = transcript_ugArab_box,
|
42 |
language_code='uig-Arab')
|
43 |
|
44 |
+
return transcript_ugArab_box, transcript_ugLatn_box, machine_pronunciation, user_pronunciation, pronunciation_match, pronunciation_score
|