Spaces:

openpecha
/

TTS

Runtime error

App Files Files Community

TTS / app.py

TenzinGayche

Update app.py

0dc87ee 9 months ago

raw history blame

No virus

5.6 kB

	import gradio as gr
	import librosa
	import numpy as np
	import torch
	import pyewts
	import noisereduce as nr
	from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
	from num2tib.core import convert
	from num2tib.core import convert2text
	import re

	def replace_numbers_with_convert(sentence, wylie=True):
	pattern = r'\d+(\.\d+)?'
	def replace(match):
	return convert(match.group(), wylie)
	result = re.sub(pattern, replace, sentence)

	return result

	converter = pyewts.pyewts()
	checkpoint = "TenzinGayche/TTS_run3_ep20_174k_b"
	processor = SpeechT5Processor.from_pretrained(checkpoint)
	model = SpeechT5ForTextToSpeech.from_pretrained(checkpoint)
	model.to('cuda')
	vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")


	speaker_embeddings = {
	"Lhasa(female)": "female_2.npy",

	}

	replacements = [
	('_', '_'),
	('*', 'v'),
	('`', ';'),
	('~', ','),
	('+', ','),
	('\\', ';'),
	('\|', ';'),
	('╚',''),
	('╗','')
	]
	def cleanup_text(inputs):
	for src, dst in replacements:
	inputs = inputs.replace(src, dst)
	return inputs
	def predict(text, speaker):
	if len(text.strip()) == 0:
	return (16000, np.zeros(0).astype(np.int16))
	text = converter.toWylie(text)
	text=cleanup_text(text)
	text=replace_numbers_with_convert(text)
	inputs = processor(text=text, return_tensors="pt")
	# limit input length
	input_ids = inputs["input_ids"]
	input_ids = input_ids[..., :model.config.max_text_positions]
	speaker_embedding = np.load(speaker_embeddings[speaker])
	speaker_embedding = torch.tensor(speaker_embedding)
	speech = model.generate_speech(input_ids.to('cuda'), speaker_embedding.to('cuda'), vocoder=vocoder.to('cuda'))
	speech = nr.reduce_noise(y=speech.to('cpu'), sr=16000)
	return (16000, speech)


	title = "Tibetan TTS"

	description = """
	Feedbacks: https://forms.gle/psbZnXGeBWXptkvs9
	"""
	article = """
	<div style='margin:20px auto;'>
	<p>References: <a href="https://arxiv.org/abs/2110.07205">SpeechT5 paper</a> \|
	<a href="https://github.com/microsoft/SpeechT5/">original GitHub</a> \|
	<a href="https://huggingface.co/mechanicalsea/speecht5-tts">original weights</a></p>
	<pre>
	@article{Ao2021SpeechT5,
	title = {SpeechT5: Unified-Modal Encoder-Decoder Pre-training for Spoken Language Processing},
	author = {Junyi Ao and Rui Wang and Long Zhou and Chengyi Wang and Shuo Ren and Yu Wu and Shujie Liu and Tom Ko and Qing Li and Yu Zhang and Zhihua Wei and Yao Qian and Jinyu Li and Furu Wei},
	eprint={2110.07205},
	archivePrefix={arXiv},
	primaryClass={eess.AS},
	year={2021}
	}
	</pre>
	<p>Speaker embeddings were generated from <a href="http://www.festvox.org/cmu_arctic/">CMU ARCTIC</a> using <a href="https://huggingface.co/mechanicalsea/speecht5-vc/blob/main/manifest/utils/prep_cmu_arctic_spkemb.py">this script</a>.</p>
	</div>
	"""

	examples = [
	["ད་དེ་ཚོ་འདི་བྱེད་དགོས་རེད་ ན་ཚ་ མ་ཡོང་སྔོན་ལ་ཁོ་རང་ལ་ཡང་ཁྱི་ཁོ་རང་ཁོ་ལ་ཡང་ཁབ་རྒྱག་ཡ་ཡོད་རེད། ཨུན་སྔོན་འགོག་དང་རཱབྷིསས་ཁབ་རྒྱག་ཡ་ཡོད་རེད་ད།", "Lhasa(female)"],
	["སྟོབས་ཆེན་རྒྱལ་ཁབ་ཉི་ཤུའི་ལྷན་ཚོགས་ཐོག་ལ་རྒྱ་ནག་གཞུང་གིས་བོད་ནང་རིག་གཞུང་རྩ་གཏོར་ཀྱི་སྲིད་བྱུས་ཁག་དཔར་རིས་ཐོག་ནས་ལས་འགུལ་སྤེལ་བའི་སྐོར འཇམ་དབྱངས་རྒྱ་མཚོ་ལགས་ཀྱིས་སྙན་སྒྲོན་གནང་གི་རེད།", "Lhasa(female)"],
	["དངོས་གནས་ལབ་དགོས་རཱ་ད། མི་དབུལ་པོ་དེ་ཚོ་ལ་ག་རེ་ལབ་དགོས་རེད། སྦྱིན་པ་གཏང་ཡ་ཡོད་རཱ། ཨུན། དེ་འདྲ་གི་ལས་འགུལ་དེ་འདྲའི་མང་པོ་བརྩམས་ཀི་འདུག་བ། དེ་ཚོ་ཡང་ངས་ཚད་ལས་བརྒལ་བའི་ཡག་པོ་རེད་དྲན་གི་འདུག། ", "Lhasa(female)"],
	["ཁོང་རྣམ་པ་ནི་སྤྱིར་བཏང་གི་གང་ཟག་ཅིག་མ་ཡིན་པར་མི་རབས་ནས་མི་རབས་རྒྱུད་པ་འཛིན་པའི་ནོར་བུ་ཡིན་ཞིང་། ", "Lhasa(female)"],
	["ཨ་ལེ། ཨེ་ནས་སྤྱིར་བཏང་ད་ང་ཚོ་ད་ལྟ་ཁྱེད་རང་གིས་དམིགས་ཡུལ་ད་གལ་ཆེན་པོ་བརྩིས་ནས།", "Lhasa(female)"],
	["ཀུན་གླེང་གསར་འགྱུར། ༢༠༢༣ ལོའི་ཟླ་༩ ཚེས་༢༧ །", "Lhasa(female)"],
	]
	gr.Interface(
	fn=predict,
	inputs=[
	gr.Text(label="Input Text"),
	gr.Radio(label="Speaker", choices=[
	"Lhasa(female)",

	],
	value="Lhasa(female)"),
	],
	outputs=[
	gr.Audio(label="Generated Speech", type="numpy"),
	],
	title=title,
	description=description,
	article=article,
	examples=examples,
	).launch()