Irpan commited on
Commit
448bf1b
·
1 Parent(s): 81e83c9
Files changed (1) hide show
  1. app.py +52 -88
app.py CHANGED
@@ -3,100 +3,64 @@ import asr
3
  import tts
4
  import util
5
 
6
- # Define the Speech-to-Text tab
7
- def create_stt_tab():
8
- with gr.Blocks() as mms_transcribe:
9
- gr.Markdown("### Speech-To-Text")
10
- with gr.Row():
11
- audio_input = gr.Audio(
12
- label="Record or Upload Uyghur Audio",
13
- sources=["microphone", "upload"],
14
- type="filepath",
15
- )
16
- model_selection_stt = gr.Dropdown(
17
- choices=[model for model in asr.models_info],
18
- label="Select a Model",
19
- value="ixxan/wav2vec2-large-mms-1b-uyghur-latin",
20
- interactive=True
21
- )
22
- with gr.Row():
23
- arabic_output = gr.Textbox(label="Uyghur Arabic Transcription", interactive=False)
24
- latin_output = gr.Textbox(label="Uyghur Latin Transcription", interactive=False)
25
- with gr.Row():
26
- stt_submit_btn = gr.Button("Submit")
27
- stt_clear_btn = gr.Button("Clear")
 
 
 
 
28
 
29
- # Example button to load examples
30
- with gr.Row():
31
- stt_examples = gr.Examples(
32
- examples=util.asr_examples,
33
- inputs=[audio_input, model_selection_stt],
34
- outputs=[arabic_output, latin_output],
35
- label="Examples"
36
- )
37
-
38
- # Define button functionality
39
- stt_submit_btn.click(
40
- asr.transcribe,
41
- inputs=[audio_input, model_selection_stt],
42
- outputs=[arabic_output, latin_output]
43
- )
44
- stt_clear_btn.click(
45
- lambda: (None, None, None), # Clear inputs and outputs
46
- inputs=[],
47
- outputs=[audio_input, arabic_output, latin_output]
48
- )
49
-
50
- return mms_transcribe
51
-
52
- # Define the Text-to-Speech tab
53
- def create_tts_tab():
54
- with gr.Blocks() as mms_synthesize:
55
- gr.Markdown("### Text-To-Speech")
56
- with gr.Row():
57
- input_text = gr.Text(label="Input text")
58
- model_selection_tts = gr.Dropdown(
59
- choices=[model for model in tts.models_info],
60
- label="Select a Model",
61
- value="Meta-MMS",
62
- interactive=True
63
- )
64
- with gr.Row():
65
- generated_audio = gr.Audio(label="Generated Audio", interactive=False)
66
- with gr.Row():
67
- tts_submit_btn = gr.Button("Submit")
68
- tts_clear_btn = gr.Button("Clear")
69
-
70
- # Example button to load examples
71
- with gr.Row():
72
- tts_examples = gr.Examples(
73
- examples=util.tts_examples,
74
- inputs=[input_text, model_selection_tts],
75
- outputs=[generated_audio],
76
- label="Examples"
77
- )
78
-
79
- # Define button functionality
80
- tts_submit_btn.click(
81
- tts.synthesize,
82
- inputs=[input_text, model_selection_tts],
83
- outputs=[generated_audio]
84
- )
85
- tts_clear_btn.click(
86
- lambda: (None, None), # Clear inputs and outputs
87
- inputs=[],
88
- outputs=[input_text, generated_audio]
89
  )
 
 
 
 
 
 
 
 
 
 
 
90
 
91
- return mms_synthesize
 
 
 
92
 
93
- # Combine tabs into a Tabbed Interface
94
  with gr.Blocks() as demo:
95
- gr.Markdown("### Uyghur Language Tools: STT and TTS")
96
- with gr.TabbedInterface([create_stt_tab(), create_tts_tab()], ["Speech-To-Text", "Text-To-Speech"]):
97
- pass
98
 
99
- # Run the app
100
  if __name__ == "__main__":
101
  demo.queue()
102
  demo.launch()
 
3
  import tts
4
  import util
5
 
6
+ mms_transcribe = gr.Interface(
7
+ fn=asr.transcribe,
8
+ inputs=[
9
+ gr.Audio(
10
+ label="Record or Upload Uyghur Audio",
11
+ sources=["microphone", "upload"],
12
+ type="filepath",
13
+ ),
14
+ gr.Dropdown(
15
+ choices=[model for model in asr.models_info],
16
+ label="Select a Model",
17
+ value="ixxan/wav2vec2-large-mms-1b-uyghur-latin",
18
+ interactive=True
19
+ ),
20
+ ],
21
+ outputs=[
22
+ gr.Textbox(label="Uyghur Arabic Transcription"),
23
+ gr.Textbox(label="Uyghur Latin Transcription"),
24
+ ],
25
+ examples=util.asr_examples,
26
+ title="Speech-To-Text",
27
+ description=(
28
+ "Transcribe Uyghur speech audio from a microphone or input file."
29
+ ),
30
+ allow_flagging="never",
31
+ )
32
 
33
+ mms_synthesize = gr.Interface(
34
+ fn=tts.synthesize,
35
+ inputs=[
36
+ gr.Text(label="Input text"),
37
+ gr.Dropdown(
38
+ choices=[model for model in tts.models_info],
39
+ label="Select a Model",
40
+ value="Meta-MMS",
41
+ interactive=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  )
43
+ ],
44
+ outputs=[
45
+ gr.Audio(label="Generated Audio"),
46
+ ],
47
+ examples=util.tts_examples,
48
+ title="Text-To-Speech",
49
+ description=(
50
+ "Generate audio from input Uyghur text."
51
+ ),
52
+ allow_flagging="never",
53
+ )
54
 
55
+ tabbed_interface = gr.TabbedInterface(
56
+ [mms_transcribe, mms_synthesize],
57
+ ["Speech-To-Text", "Text-To-Speech"],
58
+ )
59
 
 
60
  with gr.Blocks() as demo:
61
+ gr.Markdown("Comparision of STT and TTS models for Uyghur language.")
62
+ tabbed_interface.render()
 
63
 
 
64
  if __name__ == "__main__":
65
  demo.queue()
66
  demo.launch()