Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline | |
import scipy.io.wavfile | |
import numpy as np | |
# Load the MMS-TTS model and processor for Tibetan (bod) | |
model_id = "openpecha/mms-tts-sherab" | |
# Use the text-to-speech pipeline with the model | |
synthesiser = pipeline("text-to-speech", model_id) # add device=0 if you want to use a GPU | |
def replace_numbers_with_convert(sentence, wylie=True): | |
pattern = r'\d+(\.\d+)?' | |
def replace(match): | |
return convert(match.group(), wylie) | |
result = re.sub(pattern, replace, sentence) | |
return result | |
def num2letter(sentence): | |
tibetan_nums = "༠༡༢༣༤༥༦༧༨༩" | |
for i, n in enumerate(tibetan_nums): | |
sentence = sentence.replace(n, str(i)) | |
result = replace_numbers_with_convert(sentence, wylie=False) | |
return result | |
# Function to perform TTS inference and save audio to a file | |
def generate_audio(input_text): | |
# preprocess | |
text = num2letter(text) | |
# Perform TTS inference | |
speech = synthesiser(input_text) | |
# postprocess | |
audio = noisereduce.reduce_noise(y=speech["audio"], sr=speech["sampling_rate"]) | |
return audio, speech["sampling_rate"] | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=generate_audio, | |
inputs="text", # Text input for the TTS | |
outputs="audio", # Output will be an audio file | |
title="Tibetan Text-to-Speech (MMS-TTS) Sherab", | |
description="Enter Tibetan text and generate speech using MMS-TTS." | |
) | |
# Launch the Gradio interface | |
iface.launch() | |