File size: 1,285 Bytes
3e1ff6b
41d6250
ba0da04
1180f3c
19401eb
 
 
ed95412
ba0da04
3e1ff6b
ba0da04
3e1ff6b
 
ba0da04
3e1ff6b
0083046
ba0da04
58f6d57
 
0083046
 
 
 
 
3e1ff6b
f7a79dd
ba0da04
1d4c12b
 
 
 
 
f7a79dd
1d4c12b
f7a79dd
 
 
 
58f6d57
ba0da04
58f6d57
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from transformers import pipeline
import logging
import soundfile as sf

# Set up logging
logging.basicConfig(level=logging.DEBUG)

MODEL_ID = "facebook/mms-tts-fao"

# Try to create the pipeline
try:
    pipe = pipeline("text-to-speech", model=MODEL_ID)
    logging.info("Pipeline created successfully.")
except Exception as e:
    logging.error(f"Error creating pipeline: {e}")
    raise

def synthesize_speech(text):
    try:
        # Ensure text is not empty
        if not text.strip():
            logging.error("Text input is empty.")
            return None

        result = pipe(text)
        logging.debug(f"Pipeline result: {result}")

        # Check if the output contains 'audio' and 'sampling_rate' keys
        if 'audio' in result and 'sampling_rate' in result:
            audio = result['audio']
            sampling_rate = result['sampling_rate']
            # Save audio to an audio file
            audio_path = "output.wav"
            sf.write(audio_path, audio[0], sampling_rate)  # Write the audio using soundfile
            return audio_path
        else:
            logging.error(f"Unexpected pipeline output: {result}")
            return None
    except Exception as e:
        logging.error(f"Error during speech synthesis: {e}")
        return None