text_to_speech_ukr

Running

App Files Files Community

text_to_speech_ukr / app.py

fewe32

Update app.py

4a3db98 verified 4 months ago

raw

history blame contribute delete

6.84 kB

	import gradio as gr
	from infer import inference
	import unicodedata
	import regex
	import threading

	description = '''
	Програма для перетворення текста в мову. Озвучування тексту українською мовою за допомогою штучного інтелекту
	'''

	# Text Pre-processing Functions
	def normalize_text(text):
	return unicodedata.normalize('NFC', text)

	def remove_combining_chars(text):
	decomposed = unicodedata.normalize('NFD', text)
	filtered = ''.join(c for c in decomposed if unicodedata.category(c) != 'Mn')
	return unicodedata.normalize('NFC', filtered)

	def adjust_case(original, replacement):
	if original.isupper():
	return replacement.upper()
	elif original[0].isupper() and original[1:].islower():
	return replacement.capitalize()
	elif original.islower():
	return replacement.lower()
	else:
	adjusted = ''
	for o_char, r_char in zip(original, replacement):
	if o_char.isupper():
	adjusted += r_char.upper()
	else:
	adjusted += r_char.lower()
	adjusted += replacement[len(original):]
	return adjusted

	def replace_with_custom_dict(text, custom_dict, unknown_words):
	text = normalize_text(text)
	tokens = regex.findall(r'[\p{L}\p{M}\+]+\|\s+\|[^\s\p{L}\p{M}]+', text)
	new_tokens = []
	for token in tokens:
	token_normalized = normalize_text(token)
	if regex.match(r'^[\p{L}\p{M}\+]+$', token_normalized):
	token_no_combining = remove_combining_chars(token_normalized)
	base_token = token_no_combining.replace('+', '').lower()
	base_token = normalize_text(base_token)
	if base_token in custom_dict:
	replacement = custom_dict[base_token]
	adjusted_replacement = adjust_case(token, replacement)
	new_tokens.append(adjusted_replacement)
	else:
	new_tokens.append(token)
	unknown_words.add(base_token)
	else:
	new_tokens.append(token)
	return ''.join(new_tokens)

	def convert_accented_text(text):
	result = ""
	for char in text:
	decomposed = unicodedata.normalize('NFD', char)
	if any('COMBINING ACUTE ACCENT' in unicodedata.name(c, '') for c in decomposed):
	base_char = ''.join([c for c in decomposed if 'COMBINING ACUTE ACCENT' not in unicodedata.name(c, '')])
	result += unicodedata.normalize('NFC', base_char) + "+"
	else:
	result += unicodedata.normalize('NFC', char)
	return result

	def add_pauses_to_text(text):
	text = text.replace(':', ':::')
	text = text.replace(',', ',:::')
	text = text.replace(';', ';:::')
	text = text.replace('—', '—:::')
	text = text.replace('–', '–:::')
	text = text.replace('.', '. ... ... ')
	text = text.replace('!', '! ... ...')
	text = text.replace('?', '? ... ...')
	return text

	# Load the custom dictionary from dict.txt
	custom_dict = {}
	with open('dict.txt', 'r', encoding='utf-8') as f:
	for line in f:
	line = line.strip()
	if line:
	line_normalized = normalize_text(line)
	base_word = remove_combining_chars(line_normalized.replace('+', '').lower())
	custom_dict[base_word] = line_normalized

	# Load existing words from new_dict.txt
	existing_new_dict_words = set()
	try:
	with open('new_dict.txt', 'r', encoding='utf-8') as f:
	for line in f:
	existing_word = line.strip()
	if existing_word:
	existing_new_dict_words.add(existing_word)
	except FileNotFoundError:
	pass # If the file doesn't exist, we'll create it later

	# Lock for thread-safe file writing
	file_lock = threading.Lock()

	def transform_text(text, apply_custom_dict, add_pauses_flag):
	unknown_words = set()
	text = normalize_text(text)
	if apply_custom_dict:
	text = replace_with_custom_dict(text, custom_dict, unknown_words)
	text = convert_accented_text(text)
	if add_pauses_flag:
	text = add_pauses_to_text(text)

	# Write unknown words to new_dict.txt
	new_words_to_add = unknown_words - existing_new_dict_words
	if new_words_to_add:
	with file_lock:
	with open('new_dict.txt', 'a', encoding='utf-8') as f:
	for word in sorted(new_words_to_add):
	f.write(word + '\n')
	existing_new_dict_words.update(new_words_to_add)
	return text

	def synthesise(transformed_text, speed, steps, progress=gr.Progress()):
	if transformed_text.strip() == "":
	raise gr.Error("Ви повинні ввести текст")
	if len(transformed_text) > 50000:
	raise gr.Error("Текст повинен бути менше 50 000 символів")

	print("* saying *")
	print(transformed_text)
	print("* end *")

	return 24000, inference(transformed_text, progress, speed=speed, alpha=1.0, diffusion_steps=steps, embedding_scale=1.0)[0]

	if __name__ == "__main__":
	with gr.Blocks() as demo:
	gr.Markdown(description)
	with gr.Row():
	text_input = gr.Textbox(label='Text:', lines=5, max_lines=10)
	transformed_text_output = gr.Textbox(label='Transformed Text:', lines=5, max_lines=10, interactive=True)
	with gr.Row():
	apply_custom_dict_checkbox = gr.Checkbox(label='Замінити слова за словником', value=True)
	add_pauses_checkbox = gr.Checkbox(label='Додати паузи', value=False)
	with gr.Row():
	speed_slider = gr.Slider(label='Швидкість:', maximum=1.3, minimum=0.7, value=1.0)
	steps_slider = gr.Slider(label='Кількість кроків дифузії:', minimum=3, maximum=20, step=1, value=3)
	with gr.Row():
	transform_button = gr.Button('Transform Text')
	generate_button = gr.Button('Згенерувати аудіо')
	audio_output = gr.Audio(label="Audio:", autoplay=False, streaming=False, type="numpy")

	def update_transformed_text(text, apply_custom_dict, add_pauses_flag):
	transformed_text = transform_text(text, apply_custom_dict, add_pauses_flag)
	return transformed_text

	# Set up transformation on button click
	transform_button.click(fn=update_transformed_text, inputs=[text_input, apply_custom_dict_checkbox, add_pauses_checkbox], outputs=transformed_text_output)

	def generate_audio(transformed_text, speed, steps):
	return synthesise(transformed_text, speed, steps)

	generate_button.click(fn=generate_audio, inputs=[transformed_text_output, speed_slider, steps_slider], outputs=audio_output)

	demo.launch(share=False, server_name="0.0.0.0")