Spaces:

yesyesjaewook
/

tts

Runtime error

App Files Files Community

tts / app.py

yesyesjaewook

Update TTS

00c7109 over 1 year ago

raw

history blame contribute delete

3.91 kB

	import re
	from datetime import datetime
	from typing import Tuple

	import gradio as gr
	import numpy as np
	import torch
	from espnet2.bin.tts_inference import Text2Speech

	model = Text2Speech.from_pretrained("yesyesjaewook/jets-jaewook-ko")

	emotions = {
	"가엾다": 1,
	"갑갑하다": 2,
	"갑작스럽다": 3,
	"경멸스럽다": 4,
	"고맙다": 5,
	"고통스럽다": 6,
	"곤란하다": 7,
	"괴롭다": 8,
	"굉장하다": 9,
	"그립다": 10,
	"급하다": 11,
	"기막히다": 12,
	"긴밀하다": 13,
	"꺼림칙하다": 14,
	"끔찍하다": 15,
	"난데없다": 16,
	"남사스럽다": 17,
	"냉정하다": 18,
	"녹녹하다": 19,
	"다급하다": 20,
	"담백하다": 21,
	"대동소이하다": 22,
	"두렵다": 23,
	"둥그스름하다": 24,
	"뚜렷하다": 25,
	"만족스럽다": 26,
	"망하다": 27,
	"매정하다": 28,
	"못되다": 29,
	"무관심하다": 30,
	"무례하다": 31,
	"뭉클하다": 32,
	"밉다": 33,
	"반갑다": 34,
	"배은망덕하다": 35,
	"버겁다": 36,
	"보잘것없다": 37,
	"부끄럽다": 38,
	"부당하다": 39,
	"부유하다": 40,
	"불쌍하다": 41,
	"불행하다": 42,
	"불확실하다": 43,
	"뻔뻔스럽다": 44,
	"뼈아프다": 45,
	"사랑스럽다": 46,
	"서럽다": 47,
	"섭섭하다": 48,
	"수상하다": 49,
	"슬프다": 50,
	"시무룩하다": 51,
	"심란하다": 52,
	"쓰리다": 53,
	"쓸쓸하다": 54,
	"아니꼽다": 55,
	"아득하다": 56,
	"아쉽다": 57,
	"아찔하다": 58,
	"악랄하다": 59,
	"안타깝다": 60,
	"약하다": 61,
	"얕다": 62,
	"어렴풋하다": 63,
	"어리둥절하다": 64,
	"억울하다": 65,
	"언짢다": 66,
	"엄밀하다": 67,
	"엄중하다": 68,
	"엉뚱하다": 69,
	"영광스럽다": 70,
	"예사롭다": 71,
	"외롭다": 72,
	"위험하다": 73,
	"의심스럽다": 74,
	"자랑스럽다": 75,
	"자세하다": 76,
	"자유롭다": 77,
	"조마조마하다": 78,
	"즐겁다": 79,
	"짜증스럽다": 80,
	"창피하다": 81,
	"치욕스럽다": 82,
	"한심하다": 83,
	"행복하다": 84,
	"혼란하다": 85,
	"홀가분하다": 86,
	"화목하다": 87,
	"흐뭇하다": 88,
	}


	def float32_to_pcm16(waveform: torch.Tensor) -> np.array:
	info = np.iinfo(np.int16)
	waveform = waveform.numpy()
	return (waveform * info.max).clip(info.min, info.max).astype(np.int16)


	def endswith_punctuation(text: str) -> bool:
	return re.search(r"[.?!]", text) is not None


	def synthesize(text: str, emotion: str) -> Tuple[int, np.array]:
	text = text.strip()
	if not endswith_punctuation(text):
	text += "."

	print(f"[{datetime.now().isoformat()}] <{emotion}> {text}")

	output = model(text, lids=np.array(emotions[emotion]))
	return (model.fs, float32_to_pcm16(output["wav"]))


	with gr.Blocks() as demo:
	gr.Markdown("# 송재욱 TTS 프로젝트")

	with gr.Row():
	with gr.Column():
	text = gr.Textbox(
	label="텍스트",
	value="그동안 사랑해주신분들에게는 감사하다는 말만 전하고싶습니다. 제 방송 햇수로 3년이 흐르고 지금 여기까지 오는데 굉장히 노력많이했습니다.",
	lines=3,
	)
	synthesize_button = gr.Button("합성")
	with gr.Box():
	ouptut = gr.Audio()
	gr.Markdown("음성 다운로드는 플레이어 오른쪽의 ··· 메뉴 클릭 부탁드립니다 🙏")

	with gr.Column():
	emotion = gr.Radio(label="감정", choices=[*emotions.keys()], value="담백하다")

	synthesize_button.click(fn=synthesize, inputs=[text, emotion], outputs=ouptut)

	demo.launch()