adowu commited on
Commit
3062c72
·
verified ·
1 Parent(s): b416379

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -16
app.py CHANGED
@@ -2,6 +2,8 @@ import spaces
2
  import gradio as gr
3
  import torch
4
  from TTS.api import TTS
 
 
5
  import os
6
  import json
7
  import scipy.io.wavfile as wavfile
@@ -13,31 +15,30 @@ device = "cuda"
13
 
14
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
15
 
 
 
 
 
16
  @spaces.GPU(enable_queue=True)
17
  def clone(text, audio):
18
  # Generowanie mowy
19
- result = tts.tts(text=text, speaker_wav=audio, language="pl", return_dict=True)
20
 
21
  # Konwersja do numpy array i zapisanie jako plik WAV
22
- wav_np = np.array(result['wav'])
23
  wavfile.write("./output.wav", 24000, (wav_np * 32767).astype(np.int16))
24
 
 
 
 
 
25
  # Przygotowanie informacji o fonemach
26
  phonemes_data = []
27
- cumulative_duration = 0
28
- if 'phonemes' in result and 'durations' in result:
29
- for phoneme, duration in zip(result['phonemes'], result['durations']):
30
- start_time = cumulative_duration
31
- end_time = start_time + duration
32
- phonemes_data.append({
33
- "phoneme": phoneme,
34
- "start": float(start_time),
35
- "end": float(end_time),
36
- "duration": float(duration)
37
- })
38
- cumulative_duration = end_time
39
- else:
40
- phonemes_data.append({"error": "Brak informacji o fonemach"})
41
 
42
  # Zapisywanie informacji o fonemach do pliku JSON
43
  with open("./phonemes_info.json", "w", encoding="utf-8") as f:
 
2
  import gradio as gr
3
  import torch
4
  from TTS.api import TTS
5
+ from TTS.tts.utils.text.tokenizer import TTSTokenizer
6
+ from TTS.tts.utils.text.phonemizer import Phonemizer
7
  import os
8
  import json
9
  import scipy.io.wavfile as wavfile
 
15
 
16
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
17
 
18
+ # Inicjalizacja tokenizera i fonemizera
19
+ tokenizer = TTSTokenizer(use_phonemes=False)
20
+ phonemizer = Phonemizer()
21
+
22
  @spaces.GPU(enable_queue=True)
23
  def clone(text, audio):
24
  # Generowanie mowy
25
+ wav = tts.tts(text=text, speaker_wav=audio, language="pl")
26
 
27
  # Konwersja do numpy array i zapisanie jako plik WAV
28
+ wav_np = np.array(wav)
29
  wavfile.write("./output.wav", 24000, (wav_np * 32767).astype(np.int16))
30
 
31
+ # Przetwarzanie tekstu na fonemy
32
+ tokens = tokenizer.text_to_ids(text)
33
+ phonemes = phonemizer.phonemize(tokens, language="pl")
34
+
35
  # Przygotowanie informacji o fonemach
36
  phonemes_data = []
37
+ for i, phoneme in enumerate(phonemes):
38
+ phonemes_data.append({
39
+ "phoneme": phoneme,
40
+ "index": i
41
+ })
 
 
 
 
 
 
 
 
 
42
 
43
  # Zapisywanie informacji o fonemach do pliku JSON
44
  with open("./phonemes_info.json", "w", encoding="utf-8") as f: