Spaces:
Runtime error
Runtime error
File size: 2,373 Bytes
3ff256e 524b895 66dc160 524b895 3ff256e 66dc160 3ff256e 66dc160 524b895 66dc160 3ff256e 66dc160 3ff256e 524b895 3ff256e 524b895 3ff256e 524b895 3ff256e 524b895 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import gradio as grd
import os
import tempfile
from openai import OpenAI
# Initialize OpenAI client with API key
api_key = os.getenv('OPENAI_API_KEY')
os.environ['OPENAI_API_KEY'] = api_key
openai_client = OpenAI()
def synthesize_speech(input_text, selected_model, selected_voice, audio_format):
# This is a new feature from OpenAI, so please check the documentation for the correct parameter to set the audio format.
# See: https://platform.openai.com/docs/guides/text-to-speech
audio_response = openai_client.audio.speech.create(
model=selected_model,
voice=selected_voice,
input=input_text
# Add the correct parameter for audio format here, if available
)
# Determine the file extension based on the selected audio format
file_extension = f".{audio_format}" if audio_format in [
'mp3', 'aac', 'flac'] else ".opus"
# Save the synthesized speech to a temporary audio file
with tempfile.NamedTemporaryFile(suffix=file_extension, delete=False) as audio_temp:
audio_temp.write(audio_response.content)
audio_file_path = audio_temp.name
return audio_file_path
# Define the Gradio interface
with grd.Blocks() as speech_synthesizer_interface:
grd.Markdown("# <center> Text-to-Speech Synthesizer </center>")
with grd.Row():
model_selector = grd.Dropdown(
choices=['tts-1', 'tts-1-hd'], label='Choose Model', value='tts-1')
voice_selector = grd.Dropdown(choices=[
'alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'], label='Select Voice', value='alloy')
format_selector = grd.Dropdown(
choices=['mp3', 'opus', 'aac', 'flac'], label='Select Format', value='mp3')
input_field = grd.Textbox(
label="Enter your text here", placeholder="Type here and convert to speech.")
synthesis_button = grd.Button("Convert to Speech")
audio_result = grd.Audio(label="Generated Speech")
input_field.submit(fn=synthesize_speech, inputs=[
input_field, model_selector, voice_selector, format_selector], outputs=audio_result)
synthesis_button.click(fn=synthesize_speech, inputs=[
input_field, model_selector, voice_selector, format_selector], outputs=audio_result)
# Launch the interface
speech_synthesizer_interface.launch()
|