Spaces:
Running
Running
# if you dont use pipenv uncomment the following: | |
from dotenv import load_dotenv | |
load_dotenv() | |
#Step1a: Setup Text to Speech–TTS–model with gTTS | |
import os | |
from gtts import gTTS | |
def text_to_speech_with_gtts_old(input_text, output_filepath): | |
language="en" | |
audioobj= gTTS( | |
text=input_text, | |
lang=language, | |
slow=False | |
) | |
audioobj.save(output_filepath) | |
# input_text="Hi" | |
# text_to_speech_with_gtts_old(input_text=input_text, output_filepath="gtts_testing.mp3") | |
#Step1b: Setup Text to Speech–TTS–model with ElevenLabs | |
import elevenlabs | |
from elevenlabs.client import ElevenLabs | |
ELEVENLABS_API_KEY=os.environ.get("ELEVENLABS_API_KEY") | |
def text_to_speech_with_elevenlabs_old(input_text, output_filepath): | |
client=ElevenLabs(api_key=ELEVENLABS_API_KEY) | |
audio=client.generate( | |
text= input_text, | |
voice= "Emily", | |
output_format= "mp3_22050_32", | |
model= "eleven_turbo_v2" | |
) | |
elevenlabs.save(audio, output_filepath) | |
# text_to_speech_with_elevenlabs_old(input_text, output_filepath="elevenlabs_testing.mp3") | |
# #Step2: Use Model for Text output to Voice | |
# when the files of the doctor gets saved, they dont play automatically so we have to do this step 2 in order to automatically run the audio files. | |
import subprocess | |
import platform | |
from pydub import AudioSegment | |
from pydub.playback import play | |
import tempfile | |
def text_to_speech_with_gtts(input_text, output_filepath): | |
language="en" | |
audioobj= gTTS( | |
text=input_text, | |
lang=language, | |
slow=False | |
) | |
audioobj.save(output_filepath) | |
os_name = platform.system() | |
try: | |
if os_name == "Darwin": # macOS | |
subprocess.run(['afplay', output_filepath]) | |
elif os_name == "Windows": # Windows | |
subprocess.run(['powershell', '-c', f'(New-Object Media.SoundPlayer "{output_filepath}").PlaySync();']) | |
elif os_name == "Linux": # Linux | |
subprocess.run(['aplay', output_filepath]) # Alternative: use 'mpg123' or 'ffplay' | |
else: | |
raise OSError("Unsupported operating system") | |
except Exception as e: | |
print(f"An error occurred while trying to play the audio: {e}") | |
# input_text="Hi" | |
# #text_to_speech_with_gtts(input_text=input_text, output_filepath="gtts_testing_autoplay.mp3") | |
def play_audio(file_path): | |
os_name = platform.system() | |
try: | |
if os_name == "Darwin": # macOS | |
subprocess.run(['afplay', file_path]) | |
elif os_name == "Windows": # Windows | |
# Load MP3 and convert to WAV for playback | |
audio = AudioSegment.from_mp3(file_path) | |
# Create a temporary WAV file | |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_wav: | |
wav_path = temp_wav.name | |
audio.export(wav_path, format='wav') | |
# Play the WAV file | |
subprocess.run(['powershell', '-c', f'(New-Object Media.SoundPlayer "{wav_path}").PlaySync();']) | |
# Clean up temporary file | |
os.unlink(wav_path) | |
elif os_name == "Linux": # Linux | |
subprocess.run(['mpg123', file_path]) # Using mpg123 for MP3 playback | |
else: | |
raise OSError("Unsupported operating system") | |
except Exception as e: | |
print(f"An error occurred while trying to play the audio: {e}") | |
def text_to_speech_with_elevenlabs(input_text, output_filepath): | |
client = ElevenLabs(api_key=ELEVENLABS_API_KEY) | |
audio = client.generate( | |
text=input_text, | |
voice="Aria", | |
output_format="mp3_22050_32", | |
model="eleven_turbo_v2" | |
) | |
elevenlabs.save(audio, output_filepath) | |
# Play the audio | |
play_audio(output_filepath) | |
return output_filepath | |
# text_to_speech_with_elevenlabs(input_text, output_filepath="elevenlabs_testing_autoplay.mp3") |