mms-tts-sherab / app.py
10zinten's picture
Update app.py
5de81a9 verified
import gradio as gr
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
import scipy.io.wavfile
import numpy as np
# Load the MMS-TTS model and processor for Tibetan (bod)
model_id = "openpecha/mms-tts-sherab"
# Use the text-to-speech pipeline with the model
synthesiser = pipeline("text-to-speech", model_id) # add device=0 if you want to use a GPU
def replace_numbers_with_convert(sentence, wylie=True):
pattern = r'\d+(\.\d+)?'
def replace(match):
return convert(match.group(), wylie)
result = re.sub(pattern, replace, sentence)
return result
def num2letter(sentence):
tibetan_nums = "༠༡༢༣༤༥༦༧༨༩"
for i, n in enumerate(tibetan_nums):
sentence = sentence.replace(n, str(i))
result = replace_numbers_with_convert(sentence, wylie=False)
return result
# Function to perform TTS inference and save audio to a file
def generate_audio(input_text):
# preprocess
text = num2letter(text)
# Perform TTS inference
speech = synthesiser(input_text)
# postprocess
audio = noisereduce.reduce_noise(y=speech["audio"], sr=speech["sampling_rate"])
return audio, speech["sampling_rate"]
# Create the Gradio interface
iface = gr.Interface(
fn=generate_audio,
inputs="text", # Text input for the TTS
outputs="audio", # Output will be an audio file
title="Tibetan Text-to-Speech (MMS-TTS) Sherab",
description="Enter Tibetan text and generate speech using MMS-TTS."
)
# Launch the Gradio interface
iface.launch()