import gradio as gr from infer import inference import unicodedata import regex import threading description = ''' Програма для перетворення текста в мову. Озвучування тексту українською мовою за допомогою штучного інтелекту ''' # Text Pre-processing Functions def normalize_text(text): return unicodedata.normalize('NFC', text) def remove_combining_chars(text): decomposed = unicodedata.normalize('NFD', text) filtered = ''.join(c for c in decomposed if unicodedata.category(c) != 'Mn') return unicodedata.normalize('NFC', filtered) def adjust_case(original, replacement): if original.isupper(): return replacement.upper() elif original[0].isupper() and original[1:].islower(): return replacement.capitalize() elif original.islower(): return replacement.lower() else: adjusted = '' for o_char, r_char in zip(original, replacement): if o_char.isupper(): adjusted += r_char.upper() else: adjusted += r_char.lower() adjusted += replacement[len(original):] return adjusted def replace_with_custom_dict(text, custom_dict, unknown_words): text = normalize_text(text) tokens = regex.findall(r'[\p{L}\p{M}\+]+|\s+|[^\s\p{L}\p{M}]+', text) new_tokens = [] for token in tokens: token_normalized = normalize_text(token) if regex.match(r'^[\p{L}\p{M}\+]+$', token_normalized): token_no_combining = remove_combining_chars(token_normalized) base_token = token_no_combining.replace('+', '').lower() base_token = normalize_text(base_token) if base_token in custom_dict: replacement = custom_dict[base_token] adjusted_replacement = adjust_case(token, replacement) new_tokens.append(adjusted_replacement) else: new_tokens.append(token) unknown_words.add(base_token) else: new_tokens.append(token) return ''.join(new_tokens) def convert_accented_text(text): result = "" for char in text: decomposed = unicodedata.normalize('NFD', char) if any('COMBINING ACUTE ACCENT' in unicodedata.name(c, '') for c in decomposed): base_char = ''.join([c for c in decomposed if 'COMBINING ACUTE ACCENT' not in unicodedata.name(c, '')]) result += unicodedata.normalize('NFC', base_char) + "+" else: result += unicodedata.normalize('NFC', char) return result def add_pauses_to_text(text): text = text.replace(':', ':::') text = text.replace(',', ',:::') text = text.replace(';', ';:::') text = text.replace('—', '—:::') text = text.replace('–', '–:::') text = text.replace('.', '. ... ... ') text = text.replace('!', '! ... ...') text = text.replace('?', '? ... ...') return text # Load the custom dictionary from dict.txt custom_dict = {} with open('dict.txt', 'r', encoding='utf-8') as f: for line in f: line = line.strip() if line: line_normalized = normalize_text(line) base_word = remove_combining_chars(line_normalized.replace('+', '').lower()) custom_dict[base_word] = line_normalized # Load existing words from new_dict.txt existing_new_dict_words = set() try: with open('new_dict.txt', 'r', encoding='utf-8') as f: for line in f: existing_word = line.strip() if existing_word: existing_new_dict_words.add(existing_word) except FileNotFoundError: pass # If the file doesn't exist, we'll create it later # Lock for thread-safe file writing file_lock = threading.Lock() def transform_text(text, apply_custom_dict, add_pauses_flag): unknown_words = set() text = normalize_text(text) if apply_custom_dict: text = replace_with_custom_dict(text, custom_dict, unknown_words) text = convert_accented_text(text) if add_pauses_flag: text = add_pauses_to_text(text) # Write unknown words to new_dict.txt new_words_to_add = unknown_words - existing_new_dict_words if new_words_to_add: with file_lock: with open('new_dict.txt', 'a', encoding='utf-8') as f: for word in sorted(new_words_to_add): f.write(word + '\n') existing_new_dict_words.update(new_words_to_add) return text def synthesise(transformed_text, speed, steps, progress=gr.Progress()): if transformed_text.strip() == "": raise gr.Error("Ви повинні ввести текст") if len(transformed_text) > 50000: raise gr.Error("Текст повинен бути менше 50 000 символів") print("*** saying ***") print(transformed_text) print("*** end ***") return 24000, inference(transformed_text, progress, speed=speed, alpha=1.0, diffusion_steps=steps, embedding_scale=1.0)[0] if __name__ == "__main__": with gr.Blocks() as demo: gr.Markdown(description) with gr.Row(): text_input = gr.Textbox(label='Text:', lines=5, max_lines=10) transformed_text_output = gr.Textbox(label='Transformed Text:', lines=5, max_lines=10, interactive=True) with gr.Row(): apply_custom_dict_checkbox = gr.Checkbox(label='Замінити слова за словником', value=True) add_pauses_checkbox = gr.Checkbox(label='Додати паузи', value=False) with gr.Row(): speed_slider = gr.Slider(label='Швидкість:', maximum=1.3, minimum=0.7, value=1.0) steps_slider = gr.Slider(label='Кількість кроків дифузії:', minimum=3, maximum=20, step=1, value=3) with gr.Row(): transform_button = gr.Button('Transform Text') generate_button = gr.Button('Згенерувати аудіо') audio_output = gr.Audio(label="Audio:", autoplay=False, streaming=False, type="numpy") def update_transformed_text(text, apply_custom_dict, add_pauses_flag): transformed_text = transform_text(text, apply_custom_dict, add_pauses_flag) return transformed_text # Set up transformation on button click transform_button.click(fn=update_transformed_text, inputs=[text_input, apply_custom_dict_checkbox, add_pauses_checkbox], outputs=transformed_text_output) def generate_audio(transformed_text, speed, steps): return synthesise(transformed_text, speed, steps) generate_button.click(fn=generate_audio, inputs=[transformed_text_output, speed_slider, steps_slider], outputs=audio_output) demo.launch(share=False, server_name="0.0.0.0")