Spaces:
Sleeping
Sleeping
# Install necessary libraries | |
#pip uninstall -y whisper | |
#pip install git+https://github.com/openai/whisper.git | |
#pip install gradio gtts groq ffmpeg-python | |
# Import required libraries | |
import os | |
import gradio as gr | |
import whisper | |
from gtts import gTTS | |
import io | |
from groq import Groq | |
# Set your GROQ_API_KEY | |
os.environ["GROQ_API_KEY"] = "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" | |
# Initialize Groq client and Whisper model | |
client = Groq(api_key=os.environ.get("GROQ_API_KEY")) | |
model = whisper.load_model("base", device="cpu") | |
# Function to process audio | |
def process_audio(file_path): | |
try: | |
# Ensure the file exists | |
if not os.path.isfile(file_path): | |
raise FileNotFoundError(f"The file {file_path} does not exist.") | |
print(f"Processing file: {file_path}") | |
# Load and process the audio with Whisper | |
audio = whisper.load_audio(file_path) | |
print("Audio loaded successfully.") | |
# Transcribe the audio | |
result = model.transcribe(audio) | |
text = result["text"] | |
print("Transcription:", text) | |
# Generate a response using Groq API | |
chat_completion = client.chat.completions.create( | |
messages=[{"role": "user", "content": text}], | |
model="llama3-8b-8192", | |
) | |
response_message = chat_completion.choices[0].message.content.strip() | |
print("Chatbot response:", response_message) | |
# Convert the response to audio | |
tts = gTTS(response_message) | |
response_audio_io = io.BytesIO() | |
tts.write_to_fp(response_audio_io) | |
response_audio_io.seek(0) | |
# Save the response audio to a file | |
response_audio_path = "response.mp3" | |
with open(response_audio_path, "wb") as audio_file: | |
audio_file.write(response_audio_io.getvalue()) | |
return response_message, response_audio_path | |
except FileNotFoundError as e: | |
return f"File not found: {e}", None | |
except UnicodeDecodeError as e: | |
return f"Invalid audio file encoding: {e}", None | |
except Exception as e: | |
return f"An unexpected error occurred: {e}", None | |
# Define Gradio interface | |
title = "Voice-to-Voice Chatbot Application" | |
description = "Run a voice-to-voice chatbot with transcription and audio response." | |
article = "### Instructions\n1. Upload an audio file.\n2. Wait for transcription and chatbot's response.\n3. Listen to the response audio." | |
iface = gr.Interface( | |
fn=process_audio, | |
inputs=gr.Audio(type="filepath", label="Upload an Audio File"), | |
outputs=[ | |
gr.Textbox(label="Response Text"), | |
gr.Audio(label="Response Audio") | |
], | |
live=True, | |
title=title, | |
description=description, | |
article=article | |
) | |
# Launch Gradio interface | |
iface.launch(share=True) | |