Multi-Voice-App / app.py
Shanulhaq's picture
Update app.py
6e94458 verified
raw
history blame
5 kB
#Open API KEY:-
#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
#Eleven Lab API KEY:-
#sk_044c9cf1dd6faa95c78f01af4b538c2a2cdd8b9e9c373510
import os
import gradio as gr
import openai
import tempfile
import logging
from dotenv import load_dotenv
import requests
# Load environment variables from .env file
load_dotenv()
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Set up API keys
openai_api_key = os.getenv('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')
eleven_labs_api_key = os.getenv('sk_044c9cf1dd6faa95c78f01af4b538c2a2cdd8b9e9c373510')
if not openai_api_key:
raise ValueError("OPENAI_API_KEY is not set.")
else:
logger.info("OpenAI API key is set.")
if not eleven_labs_api_key:
raise ValueError("ELEVEN_LABS_API_KEY is not set.")
else:
logger.info("Eleven Labs API key is set.")
# Set the API key for OpenAI
openai.api_key = openai_api_key
def text_to_speech_elevenlabs(text, voice="Rachel"):
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice}"
headers = {
"Accept": "audio/mpeg",
"Content-Type": "application/json",
"xi-api-key": eleven_labs_api_key
}
data = {
"text": text,
"model_id": "eleven_multilingual_v1",
"voice_settings": {
"stability": 0.5,
"similarity_boost": 0.75
}
}
response = requests.post(url, json=data, headers=headers)
response.raise_for_status()
# Save the audio content
temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
with open(temp_audio_file.name, 'wb') as f:
f.write(response.content)
return temp_audio_file.name
def process_audio(audio_file, target_language):
try:
# Transcribe audio using OpenAI Whisper API
with open(audio_file, "rb") as audio_file_for_openai:
result = openai.Audio.transcribe("whisper-1", audio_file_for_openai)
user_text = result['text']
logger.info(f"Transcription successful: {user_text}")
except Exception as e:
logger.error(f"Error in transcribing audio: {e}")
return "Error in transcribing audio.", None
try:
# Translate text using OpenAI's ChatGPT API
messages = [
{
"role": "system",
"content": f"You are a helpful assistant that translates English to {target_language}."
},
{
"role": "user",
"content": f"Please translate the following text to {target_language}: {user_text}"
}
]
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=messages
)
response_text = response['choices'][0]['message']['content'].strip()
logger.info(f"Translation successful: {response_text}")
except Exception as e:
logger.error(f"Error in generating translation with OpenAI API: {e}")
return "Error in generating translation.", None
try:
# Convert translated text to speech using Eleven Labs API directly
temp_audio_file_name = text_to_speech_elevenlabs(response_text)
logger.info("Text-to-speech conversion successful.")
except Exception as e:
logger.error(f"Error in text-to-speech conversion: {e}")
return "Error in text-to-speech conversion.", None
return response_text, temp_audio_file_name
# Optional: Function to list available voices
def list_available_voices():
url = "https://api.elevenlabs.io/v1/voices"
headers = {
"Accept": "application/json",
"xi-api-key": eleven_labs_api_key
}
response = requests.get(url, headers=headers)
response.raise_for_status()
voices = response.json()
print("Available voices:")
for voice in voices['voices']:
print(f"Name: {voice['name']}, Voice ID: {voice['voice_id']}")
# Uncomment the following line to list available voices
# list_available_voices()
# Create Gradio interface
iface = gr.Interface(
fn=process_audio,
inputs=[
gr.Audio(type="filepath", label="Input Audio"),
gr.Dropdown(
choices=["French", "Spanish", "German", "Chinese", "Japanese", "Hindi"],
value="French",
label="Target Language"
)
],
outputs=[
gr.Textbox(label="Translated Text"),
gr.Audio(label="Translated Speech")
],
title="Multilingual Translator",
description="Upload an audio file and translate it into a different language.",
live=False
)
try:
iface.launch()
logger.info("Gradio interface launched successfully.")
except Exception as e:
logger.error(f"Failed to launch Gradio interface: {e}")
raise