MediVox / doctorvoice.py
gauravgulati619's picture
Initial commit: Complete MediVox application
95841bc
raw
history blame
3.87 kB
# if you dont use pipenv uncomment the following:
from dotenv import load_dotenv
load_dotenv()
#Step1a: Setup Text to Speech–TTS–model with gTTS
import os
from gtts import gTTS
def text_to_speech_with_gtts_old(input_text, output_filepath):
language="en"
audioobj= gTTS(
text=input_text,
lang=language,
slow=False
)
audioobj.save(output_filepath)
# input_text="Hi"
# text_to_speech_with_gtts_old(input_text=input_text, output_filepath="gtts_testing.mp3")
#Step1b: Setup Text to Speech–TTS–model with ElevenLabs
import elevenlabs
from elevenlabs.client import ElevenLabs
ELEVENLABS_API_KEY=os.environ.get("ELEVENLABS_API_KEY")
def text_to_speech_with_elevenlabs_old(input_text, output_filepath):
client=ElevenLabs(api_key=ELEVENLABS_API_KEY)
audio=client.generate(
text= input_text,
voice= "Emily",
output_format= "mp3_22050_32",
model= "eleven_turbo_v2"
)
elevenlabs.save(audio, output_filepath)
# text_to_speech_with_elevenlabs_old(input_text, output_filepath="elevenlabs_testing.mp3")
# #Step2: Use Model for Text output to Voice
# when the files of the doctor gets saved, they dont play automatically so we have to do this step 2 in order to automatically run the audio files.
import subprocess
import platform
from pydub import AudioSegment
from pydub.playback import play
import tempfile
def text_to_speech_with_gtts(input_text, output_filepath):
language="en"
audioobj= gTTS(
text=input_text,
lang=language,
slow=False
)
audioobj.save(output_filepath)
os_name = platform.system()
try:
if os_name == "Darwin": # macOS
subprocess.run(['afplay', output_filepath])
elif os_name == "Windows": # Windows
subprocess.run(['powershell', '-c', f'(New-Object Media.SoundPlayer "{output_filepath}").PlaySync();'])
elif os_name == "Linux": # Linux
subprocess.run(['aplay', output_filepath]) # Alternative: use 'mpg123' or 'ffplay'
else:
raise OSError("Unsupported operating system")
except Exception as e:
print(f"An error occurred while trying to play the audio: {e}")
# input_text="Hi"
# #text_to_speech_with_gtts(input_text=input_text, output_filepath="gtts_testing_autoplay.mp3")
def play_audio(file_path):
os_name = platform.system()
try:
if os_name == "Darwin": # macOS
subprocess.run(['afplay', file_path])
elif os_name == "Windows": # Windows
# Load MP3 and convert to WAV for playback
audio = AudioSegment.from_mp3(file_path)
# Create a temporary WAV file
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_wav:
wav_path = temp_wav.name
audio.export(wav_path, format='wav')
# Play the WAV file
subprocess.run(['powershell', '-c', f'(New-Object Media.SoundPlayer "{wav_path}").PlaySync();'])
# Clean up temporary file
os.unlink(wav_path)
elif os_name == "Linux": # Linux
subprocess.run(['mpg123', file_path]) # Using mpg123 for MP3 playback
else:
raise OSError("Unsupported operating system")
except Exception as e:
print(f"An error occurred while trying to play the audio: {e}")
def text_to_speech_with_elevenlabs(input_text, output_filepath):
client = ElevenLabs(api_key=ELEVENLABS_API_KEY)
audio = client.generate(
text=input_text,
voice="Aria",
output_format="mp3_22050_32",
model="eleven_turbo_v2"
)
elevenlabs.save(audio, output_filepath)
# Play the audio
play_audio(output_filepath)
return output_filepath
# text_to_speech_with_elevenlabs(input_text, output_filepath="elevenlabs_testing_autoplay.mp3")