VoiceToVoice_V2 / app.py
adnaan05's picture
Update app.py
a41bcbc verified
# Install necessary libraries
#pip uninstall -y whisper
#pip install git+https://github.com/openai/whisper.git
#pip install gradio gtts groq ffmpeg-python
# Import required libraries
import os
import gradio as gr
import whisper
from gtts import gTTS
import io
from groq import Groq
# Set your GROQ_API_KEY
os.environ["GROQ_API_KEY"] = "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"
# Initialize Groq client and Whisper model
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
model = whisper.load_model("base", device="cpu")
# Function to process audio
def process_audio(file_path):
try:
# Ensure the file exists
if not os.path.isfile(file_path):
raise FileNotFoundError(f"The file {file_path} does not exist.")
print(f"Processing file: {file_path}")
# Load and process the audio with Whisper
audio = whisper.load_audio(file_path)
print("Audio loaded successfully.")
# Transcribe the audio
result = model.transcribe(audio)
text = result["text"]
print("Transcription:", text)
# Generate a response using Groq API
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": text}],
model="llama3-8b-8192",
)
response_message = chat_completion.choices[0].message.content.strip()
print("Chatbot response:", response_message)
# Convert the response to audio
tts = gTTS(response_message)
response_audio_io = io.BytesIO()
tts.write_to_fp(response_audio_io)
response_audio_io.seek(0)
# Save the response audio to a file
response_audio_path = "response.mp3"
with open(response_audio_path, "wb") as audio_file:
audio_file.write(response_audio_io.getvalue())
return response_message, response_audio_path
except FileNotFoundError as e:
return f"File not found: {e}", None
except UnicodeDecodeError as e:
return f"Invalid audio file encoding: {e}", None
except Exception as e:
return f"An unexpected error occurred: {e}", None
# Define Gradio interface
title = "Voice-to-Voice Chatbot Application"
description = "Run a voice-to-voice chatbot with transcription and audio response."
article = "### Instructions\n1. Upload an audio file.\n2. Wait for transcription and chatbot's response.\n3. Listen to the response audio."
iface = gr.Interface(
fn=process_audio,
inputs=gr.Audio(type="filepath", label="Upload an Audio File"),
outputs=[
gr.Textbox(label="Response Text"),
gr.Audio(label="Response Audio")
],
live=True,
title=title,
description=description,
article=article
)
# Launch Gradio interface
iface.launch(share=True)