metaambod / tts.py
unijoh's picture
Update tts.py
1d4c12b verified
from transformers import pipeline
import logging
import soundfile as sf
# Set up logging
logging.basicConfig(level=logging.DEBUG)
MODEL_ID = "facebook/mms-tts-fao"
# Try to create the pipeline
try:
pipe = pipeline("text-to-speech", model=MODEL_ID)
logging.info("Pipeline created successfully.")
except Exception as e:
logging.error(f"Error creating pipeline: {e}")
raise
def synthesize_speech(text):
try:
# Ensure text is not empty
if not text.strip():
logging.error("Text input is empty.")
return None
result = pipe(text)
logging.debug(f"Pipeline result: {result}")
# Check if the output contains 'audio' and 'sampling_rate' keys
if 'audio' in result and 'sampling_rate' in result:
audio = result['audio']
sampling_rate = result['sampling_rate']
# Save audio to an audio file
audio_path = "output.wav"
sf.write(audio_path, audio[0], sampling_rate) # Write the audio using soundfile
return audio_path
else:
logging.error(f"Unexpected pipeline output: {result}")
return None
except Exception as e:
logging.error(f"Error during speech synthesis: {e}")
return None