|
from transformers import pipeline |
|
import logging |
|
import soundfile as sf |
|
|
|
|
|
logging.basicConfig(level=logging.DEBUG) |
|
|
|
MODEL_ID = "facebook/mms-tts-fao" |
|
|
|
|
|
try: |
|
pipe = pipeline("text-to-speech", model=MODEL_ID) |
|
logging.info("Pipeline created successfully.") |
|
except Exception as e: |
|
logging.error(f"Error creating pipeline: {e}") |
|
raise |
|
|
|
def synthesize_speech(text): |
|
try: |
|
|
|
if not text.strip(): |
|
logging.error("Text input is empty.") |
|
return None |
|
|
|
result = pipe(text) |
|
logging.debug(f"Pipeline result: {result}") |
|
|
|
|
|
if 'audio' in result and 'sampling_rate' in result: |
|
audio = result['audio'] |
|
sampling_rate = result['sampling_rate'] |
|
|
|
audio_path = "output.wav" |
|
sf.write(audio_path, audio[0], sampling_rate) |
|
return audio_path |
|
else: |
|
logging.error(f"Unexpected pipeline output: {result}") |
|
return None |
|
except Exception as e: |
|
logging.error(f"Error during speech synthesis: {e}") |
|
return None |
|
|