File size: 1,285 Bytes
3e1ff6b 41d6250 ba0da04 1180f3c 19401eb ed95412 ba0da04 3e1ff6b ba0da04 3e1ff6b ba0da04 3e1ff6b 0083046 ba0da04 58f6d57 0083046 3e1ff6b f7a79dd ba0da04 1d4c12b f7a79dd 1d4c12b f7a79dd 58f6d57 ba0da04 58f6d57 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
from transformers import pipeline
import logging
import soundfile as sf
# Set up logging
logging.basicConfig(level=logging.DEBUG)
MODEL_ID = "facebook/mms-tts-fao"
# Try to create the pipeline
try:
pipe = pipeline("text-to-speech", model=MODEL_ID)
logging.info("Pipeline created successfully.")
except Exception as e:
logging.error(f"Error creating pipeline: {e}")
raise
def synthesize_speech(text):
try:
# Ensure text is not empty
if not text.strip():
logging.error("Text input is empty.")
return None
result = pipe(text)
logging.debug(f"Pipeline result: {result}")
# Check if the output contains 'audio' and 'sampling_rate' keys
if 'audio' in result and 'sampling_rate' in result:
audio = result['audio']
sampling_rate = result['sampling_rate']
# Save audio to an audio file
audio_path = "output.wav"
sf.write(audio_path, audio[0], sampling_rate) # Write the audio using soundfile
return audio_path
else:
logging.error(f"Unexpected pipeline output: {result}")
return None
except Exception as e:
logging.error(f"Error during speech synthesis: {e}")
return None
|