anderbogia commited on
Commit
431e989
·
verified ·
1 Parent(s): 8a04c2b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -10
app.py CHANGED
@@ -6,16 +6,15 @@ os.system("pip install transformers==4.30.2") #Some interoperability issue with
6
  os.system("pip install tokenizers fairseq")
7
  os.system("pip install numpy==1.24") #NumPy 1.24 or less needed by Numba. Use 1.23, librosa still uses np.complex which was dropped in NumPy 1.24
8
  #os.system("pip install git+https://github.com/huggingface/transformers datasets[torch]")
9
- os.system("pip install torch accelerate torchaudio datasets easymms")
10
  os.system("pip install librosa==0.9.0")
11
 
12
 
13
  import gradio as gr
14
- from transformers import pipeline, Wav2Vec2ForCTC, AutoProcessor
15
  from datasets import load_dataset, Audio, Dataset
16
  import torch
17
  import librosa #For converting audio sample rate to 16k
18
- from easymms.models.tts import TTSModel #For TTS inference using EasyMMS
19
 
20
  LANG = "dtp" #Change to tih for Timugon Murut or iba for Iban
21
  model_id = "facebook/mms-1b-all"
@@ -27,6 +26,9 @@ model.load_adapter(LANG)
27
 
28
  asr_pipeline = pipeline(task = "automatic-speech-recognition", model = model_id) #Function that returns a dict, transcription stored in item with key "text"
29
 
 
 
 
30
  def preprocess(input): #Sets recording sampling rate to 16k and returns numpy ndarray from audio
31
  speech, sample_rate = librosa.load(input)
32
  speech = librosa.resample(speech, orig_sr=sample_rate, target_sr=16000)
@@ -61,13 +63,15 @@ with gr.Blocks(theme = gr.themes.Soft()) as demo:
61
  </div>
62
  """)
63
 
64
- tts = TTSModel(LANG)
65
 
66
- def fn2(input):
67
- res = tts.synthesize(input)
68
- flip_tuple = (res[1], res[0]) #EasyMMS synthesize() returns Tuple(data, sample_rate) where data is a numpy.array and sample_rate is int,
69
- #but Gradio Audio() expects the same tuple but with the elements flipped
70
- return flip_tuple
 
 
 
71
 
72
  with gr.Row():
73
  with gr.Column(scale = 1):
@@ -89,6 +93,6 @@ with gr.Blocks(theme = gr.themes.Soft()) as demo:
89
  input_text = gr.components.Textbox(label = "Ginarit", placeholder = "Potutakai suat nu hiti")
90
  button2 = gr.Button("Poulayo'")
91
  output_audio = gr.components.Audio(label = "Rolou pinoulai")
92
- button2.click(fn2, inputs = input_text, outputs = output_audio)
93
 
94
  demo.launch(debug = True)
 
6
  os.system("pip install tokenizers fairseq")
7
  os.system("pip install numpy==1.24") #NumPy 1.24 or less needed by Numba. Use 1.23, librosa still uses np.complex which was dropped in NumPy 1.24
8
  #os.system("pip install git+https://github.com/huggingface/transformers datasets[torch]")
9
+ os.system("pip install torch accelerate torchaudio datasets")
10
  os.system("pip install librosa==0.9.0")
11
 
12
 
13
  import gradio as gr
14
+ from transformers import pipeline, Wav2Vec2ForCTC, AutoProcessor, VitsModel, AutoTokenizer
15
  from datasets import load_dataset, Audio, Dataset
16
  import torch
17
  import librosa #For converting audio sample rate to 16k
 
18
 
19
  LANG = "dtp" #Change to tih for Timugon Murut or iba for Iban
20
  model_id = "facebook/mms-1b-all"
 
26
 
27
  asr_pipeline = pipeline(task = "automatic-speech-recognition", model = model_id) #Function that returns a dict, transcription stored in item with key "text"
28
 
29
+ model_tts = VitsModel.from_pretrained("facebook/mms-tts-dtp")
30
+ tokenizer_tts = AutoTokenizer.from_pretrained("facebook/mms-tts-dtp")
31
+
32
  def preprocess(input): #Sets recording sampling rate to 16k and returns numpy ndarray from audio
33
  speech, sample_rate = librosa.load(input)
34
  speech = librosa.resample(speech, orig_sr=sample_rate, target_sr=16000)
 
63
  </div>
64
  """)
65
 
 
66
 
67
+ def tts_run(input):
68
+ tokenizer(input, return_tensors="pt")
69
+ with torch.no_grad():
70
+ output = model(**inputs).waveform
71
+
72
+ gradio_tuple = [16000, output]
73
+
74
+ return gradio_tuple
75
 
76
  with gr.Row():
77
  with gr.Column(scale = 1):
 
93
  input_text = gr.components.Textbox(label = "Ginarit", placeholder = "Potutakai suat nu hiti")
94
  button2 = gr.Button("Poulayo'")
95
  output_audio = gr.components.Audio(label = "Rolou pinoulai")
96
+ button2.click(tts_run, inputs = input_text, outputs = output_audio)
97
 
98
  demo.launch(debug = True)