Irpan commited on
Commit
d6d3fa1
·
1 Parent(s): 8e4b54d
Files changed (2) hide show
  1. app.py +8 -8
  2. asr.py +3 -3
app.py CHANGED
@@ -44,25 +44,25 @@ with gr.Blocks() as app:
44
  with gr.Row():
45
  transcript_ugArab_box = gr.Textbox(
46
  label="Transcript (Uyghur Arabic)",
47
- placeholder="ASR transcription of your audio..."
48
  )
49
  with gr.Row():
50
  transcript_ugLatn_box = gr.Textbox(
51
  label="Transcript (Uyghur Latin)",
52
- placeholder="ASR transcription of your audio..."
53
  )
54
 
55
- # Group correct and user pronunciation
56
  with gr.Group():
57
  with gr.Row():
58
- correct_pronunciation_box = gr.Textbox(
59
- label="Correct Pronunciation",
60
- placeholder="IPA representation of the correct pronunciation..."
61
  )
62
  with gr.Row():
63
  user_pronunciation_box = gr.Textbox(
64
  label="User Pronunciation",
65
- placeholder="IPA representation of your pronunciation..."
66
  )
67
 
68
  with gr.Group():
@@ -99,7 +99,7 @@ with gr.Blocks() as app:
99
  check_btn.click(
100
  asr.check_pronunciation,
101
  inputs=[input_text, script_choice, user_audio],
102
- outputs=[transcript_ugArab_box, transcript_ugLatn_box, correct_pronunciation_box, user_pronunciation_box, match_box, score_box]
103
  )
104
 
105
  # Main
 
44
  with gr.Row():
45
  transcript_ugArab_box = gr.Textbox(
46
  label="Transcript (Uyghur Arabic)",
47
+ placeholder="ASR transcription of user audio..."
48
  )
49
  with gr.Row():
50
  transcript_ugLatn_box = gr.Textbox(
51
  label="Transcript (Uyghur Latin)",
52
+ placeholder="ASR transcription of user audio..."
53
  )
54
 
55
+ # Group machine and user pronunciation
56
  with gr.Group():
57
  with gr.Row():
58
+ machine_pronunciation_box = gr.Textbox(
59
+ label="Machine Pronunciation",
60
+ placeholder="IPA representation of the machine pronunciation..."
61
  )
62
  with gr.Row():
63
  user_pronunciation_box = gr.Textbox(
64
  label="User Pronunciation",
65
+ placeholder="IPA representation of the user pronunciation..."
66
  )
67
 
68
  with gr.Group():
 
99
  check_btn.click(
100
  asr.check_pronunciation,
101
  inputs=[input_text, script_choice, user_audio],
102
+ outputs=[transcript_ugArab_box, transcript_ugLatn_box, machine_pronunciation_box, user_pronunciation_box, match_box, score_box]
103
  )
104
 
105
  # Main
asr.py CHANGED
@@ -15,7 +15,7 @@ asr_model = asr_model.to(device)
15
 
16
  def asr(user_audio):
17
  # Load and resample user audio
18
- audio_input, sampling_rate = util.load_and_resample_audio(user_audio, target_rate=16000)
19
 
20
  # Process audio through ASR model
21
  inputs = asr_processor(audio_input.squeeze(), sampling_rate=sampling_rate, return_tensors="pt", padding=True)
@@ -36,9 +36,9 @@ def check_pronunciation(input_text, script, user_audio):
36
  # Get IPA and Pronunciation Feedback
37
  if script == 'Uyghur Latin':
38
  input_text = ug_latn_to_arab(input_text) # make sure input text is arabic script to IPA conversion
39
- correct_pronunciation, user_pronunciation, pronunciation_match, pronunciation_score = util.calculate_pronunciation_accuracy(
40
  reference_text = input_text,
41
  output_text = transcript_ugArab_box,
42
  language_code='uig-Arab')
43
 
44
- return transcript_ugArab_box, transcript_ugLatn_box, correct_pronunciation, user_pronunciation, pronunciation_match, pronunciation_score
 
15
 
16
  def asr(user_audio):
17
  # Load and resample user audio
18
+ audio_input, sampling_rate = util.load_and_resample_audio(file_path = user_audio, target_rate=16000)
19
 
20
  # Process audio through ASR model
21
  inputs = asr_processor(audio_input.squeeze(), sampling_rate=sampling_rate, return_tensors="pt", padding=True)
 
36
  # Get IPA and Pronunciation Feedback
37
  if script == 'Uyghur Latin':
38
  input_text = ug_latn_to_arab(input_text) # make sure input text is arabic script to IPA conversion
39
+ machine_pronunciation, user_pronunciation, pronunciation_match, pronunciation_score = util.calculate_pronunciation_accuracy(
40
  reference_text = input_text,
41
  output_text = transcript_ugArab_box,
42
  language_code='uig-Arab')
43
 
44
+ return transcript_ugArab_box, transcript_ugLatn_box, machine_pronunciation, user_pronunciation, pronunciation_match, pronunciation_score