Update tts.py
Browse files
tts.py
CHANGED
@@ -1,7 +1,5 @@
|
|
1 |
-
import
|
2 |
-
from transformers import AutoTokenizer, AutoModelForTextToWaveform
|
3 |
import logging
|
4 |
-
import numpy as np
|
5 |
import soundfile as sf
|
6 |
|
7 |
# Set up logging
|
@@ -9,13 +7,12 @@ logging.basicConfig(level=logging.DEBUG)
|
|
9 |
|
10 |
MODEL_ID = "facebook/mms-tts-fao"
|
11 |
|
12 |
-
# Try to
|
13 |
try:
|
14 |
-
|
15 |
-
|
16 |
-
logging.info("Model and tokenizer loaded successfully.")
|
17 |
except Exception as e:
|
18 |
-
logging.error(f"Error
|
19 |
raise
|
20 |
|
21 |
def synthesize_speech(text):
|
@@ -25,24 +22,12 @@ def synthesize_speech(text):
|
|
25 |
logging.error("Text input is empty.")
|
26 |
return None
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
model.to(device)
|
31 |
-
inputs = inputs.to(device)
|
32 |
|
33 |
-
|
34 |
-
outputs = model.generate(**inputs)
|
35 |
-
|
36 |
-
logging.info("Speech generated successfully.")
|
37 |
-
|
38 |
-
# Convert outputs to waveform
|
39 |
-
waveform = outputs.cpu().numpy().flatten()
|
40 |
-
# Normalize waveform to the range [-1, 1]
|
41 |
-
waveform = np.clip(waveform, -1.0, 1.0)
|
42 |
-
|
43 |
-
# Convert waveform to audio format that Gradio can handle
|
44 |
audio_path = "output.wav"
|
45 |
-
sf.write(audio_path, waveform, 16000)
|
46 |
return audio_path
|
47 |
except Exception as e:
|
48 |
logging.error(f"Error during speech synthesis: {e}")
|
|
|
1 |
+
from transformers import pipeline
|
|
|
2 |
import logging
|
|
|
3 |
import soundfile as sf
|
4 |
|
5 |
# Set up logging
|
|
|
7 |
|
8 |
MODEL_ID = "facebook/mms-tts-fao"
|
9 |
|
10 |
+
# Try to create the pipeline
|
11 |
try:
|
12 |
+
pipe = pipeline("text-to-speech", model=MODEL_ID)
|
13 |
+
logging.info("Pipeline created successfully.")
|
|
|
14 |
except Exception as e:
|
15 |
+
logging.error(f"Error creating pipeline: {e}")
|
16 |
raise
|
17 |
|
18 |
def synthesize_speech(text):
|
|
|
22 |
logging.error("Text input is empty.")
|
23 |
return None
|
24 |
|
25 |
+
result = pipe(text)
|
26 |
+
waveform = result["waveform"]
|
|
|
|
|
27 |
|
28 |
+
# Save waveform to an audio file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
audio_path = "output.wav"
|
30 |
+
sf.write(audio_path, waveform, 16000) # Write the waveform using soundfile
|
31 |
return audio_path
|
32 |
except Exception as e:
|
33 |
logging.error(f"Error during speech synthesis: {e}")
|