bluesix / autogpt /speak.py
soulwax
Push stable to huggingface
923696f
import os
import requests
from playsound import playsound
from autogpt.config import Config
import threading
from threading import Lock, Semaphore
import gtts
cfg = Config()
# Default voice IDs
default_voices = ["ErXwobaYiN019PkySvjV", "EXAVITQu4vr4xnSDxMaL"]
# Retrieve custom voice IDs from the Config class
custom_voice_1 = cfg.elevenlabs_voice_1_id
custom_voice_2 = cfg.elevenlabs_voice_2_id
# Placeholder values that should be treated as empty
placeholders = {"your-voice-id"}
# Use custom voice IDs if provided and not placeholders, otherwise use default voice IDs
voices = [
custom_voice_1
if custom_voice_1 and custom_voice_1 not in placeholders
else default_voices[0],
custom_voice_2
if custom_voice_2 and custom_voice_2 not in placeholders
else default_voices[1],
]
tts_headers = {"Content-Type": "application/json", "xi-api-key": cfg.elevenlabs_api_key}
mutex_lock = Lock() # Ensure only one sound is played at a time
queue_semaphore = Semaphore(
1
) # The amount of sounds to queue before blocking the main thread
def eleven_labs_speech(text, voice_index=0):
"""Speak text using elevenlabs.io's API"""
tts_url = "https://api.elevenlabs.io/v1/text-to-speech/{voice_id}".format(
voice_id=voices[voice_index]
)
formatted_message = {"text": text}
response = requests.post(tts_url, headers=tts_headers, json=formatted_message)
if response.status_code == 200:
with mutex_lock:
with open("speech.mpeg", "wb") as f:
f.write(response.content)
playsound("speech.mpeg", True)
os.remove("speech.mpeg")
return True
else:
print("Request failed with status code:", response.status_code)
print("Response content:", response.content)
return False
def brian_speech(text):
"""Speak text using Brian with the streamelements API"""
tts_url = f"https://api.streamelements.com/kappa/v2/speech?voice=Brian&text={text}"
response = requests.get(tts_url)
if response.status_code == 200:
with mutex_lock:
with open("speech.mp3", "wb") as f:
f.write(response.content)
playsound("speech.mp3")
os.remove("speech.mp3")
return True
else:
print("Request failed with status code:", response.status_code)
print("Response content:", response.content)
return False
def gtts_speech(text):
tts = gtts.gTTS(text)
with mutex_lock:
tts.save("speech.mp3")
playsound("speech.mp3", True)
os.remove("speech.mp3")
def macos_tts_speech(text, voice_index=0):
if voice_index == 0:
os.system(f'say "{text}"')
else:
if voice_index == 1:
os.system(f'say -v "Ava (Premium)" "{text}"')
else:
os.system(f'say -v Samantha "{text}"')
def say_text(text, voice_index=0):
def speak():
if not cfg.elevenlabs_api_key:
if cfg.use_mac_os_tts == "True":
macos_tts_speech(text)
elif cfg.use_brian_tts == "True":
success = brian_speech(text)
if not success:
gtts_speech(text)
else:
gtts_speech(text)
else:
success = eleven_labs_speech(text, voice_index)
if not success:
gtts_speech(text)
queue_semaphore.release()
queue_semaphore.acquire(True)
thread = threading.Thread(target=speak)
thread.start()