alialek commited on
Commit
3fb186a
0 Parent(s):

text-to-speech init

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +58 -0
  3. requirements.txt +4 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .gradio
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ import numpy as np
3
+ from scipy.io.wavfile import write
4
+ import gradio as gr
5
+ from transformers import VitsTokenizer, VitsModel, set_seed, pipeline
6
+
7
+
8
+ class CustomFlagging(gr.FlaggingCallback):
9
+ def setup(self, *args, **kwargs):
10
+ pass # Optional setup steps
11
+
12
+ def flag(self, flag_data, flag_option=None, username=None):
13
+ print(f"Аудио: {flag_data}, Сообщение: {flag_option}")
14
+
15
+
16
+ # Custom options
17
+ flagging_callback = CustomFlagging()
18
+ flagging_options = ["Хорошая озвучка", "Слышен механический треск", "Не совпадает произношение букв", 'Проглочены буквы'] # Customize options
19
+
20
+
21
+ # Load your fine-tuned model
22
+ model_name = "leks-forever/vits_lez_tts" # Replace with your Hugging Face model name
23
+ tokenizer = VitsTokenizer.from_pretrained(model_name)
24
+ model = VitsModel.from_pretrained(model_name)
25
+
26
+ tts_pipeline = pipeline("text-to-speech", model=model_name)
27
+
28
+
29
+ def tts_function(input_text):
30
+ inputs = tokenizer(text=input_text, return_tensors="pt")
31
+ speech = tts_pipeline(input_text)
32
+ set_seed(900)
33
+
34
+ # make speech faster and more noisy
35
+ model.speaking_rate = 0.9
36
+ model.noise_scale = 0
37
+
38
+ sampling_rate = speech["sampling_rate"]
39
+
40
+ outputs = model(**inputs)
41
+ waveform = outputs.waveform[0]
42
+ waveform = waveform.detach().cpu().float().numpy()
43
+
44
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile:
45
+ write(tmpfile.name, rate=sampling_rate, data=waveform)
46
+ return tmpfile.name # Return the filepath
47
+
48
+
49
+ interface = gr.Interface(
50
+ fn=tts_function,
51
+ inputs=gr.Textbox(label="Введите текст на лезгинском"),
52
+ outputs=gr.Audio(label="Аудио"),
53
+ title="Text-to-speech Лезги ЧIалал",
54
+ flagging_mode="auto", # Enable the flagging button
55
+ )
56
+
57
+ # Launch the app
58
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ transformers
3
+ scipy
4
+ torch