# Required Libraries import os import tempfile import whisper from groq import Groq from gtts import gTTS import gradio as gr from pydub import AudioSegment # --------------------------- # 🔑 API Key Configuration # --------------------------- os.environ['GROQ_API_KEY'] = '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' # --------------------------- # 📥 Load Whisper Model # --------------------------- try: whisper_model = whisper.load_model("base") print("[INFO] Whisper model loaded successfully.") except AttributeError: from whisper import Whisper whisper_model = Whisper.load_model("base") print("[INFO] Whisper model loaded using alternative syntax.") # --------------------------- # 🎙️ Audio Processing # --------------------------- def validate_audio_file(audio_file): """Validate if the audio file exists and is not empty.""" if not audio_file or not os.path.exists(audio_file) or os.path.getsize(audio_file) == 0: print(f"[ERROR] Invalid or empty audio file: {audio_file}") return False return True def convert_to_wav(audio_file): """Convert audio file to WAV format if needed.""" try: audio = AudioSegment.from_file(audio_file) wav_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name audio.export(wav_path, format="wav") print(f"[INFO] Audio converted to WAV: {wav_path}") return wav_path except Exception as e: print(f"[ERROR] Audio Conversion Error: {e}") return None def transcribe_audio(audio_file): """Transcribe audio using Whisper.""" try: print(f"[INFO] Transcribing audio file: {audio_file}") if not validate_audio_file(audio_file): raise FileNotFoundError("Audio file not found or invalid path.") audio_file = convert_to_wav(audio_file) if not audio_file: raise Exception("Failed to convert audio to WAV format.") result = whisper_model.transcribe(audio_file) print(f"[INFO] Transcription result: {result['text']}") return result['text'] except Exception as e: print(f"[ERROR] Transcription Error: {e}") return f"Transcription Error: {e}" # --------------------------- # 🤖 LLM Interaction # --------------------------- def get_groq_response(user_input): """Get chatbot response from Groq's API.""" try: print(f"[INFO] Sending input to Groq: {user_input}") client = Groq(api_key=os.environ['GROQ_API_KEY']) chat_completion = client.chat.completions.create( messages=[ {"role": "user", "content": user_input} ], model="llama-3.3-70b-versatile", stream=False, ) response = chat_completion.choices[0].message.content print(f"[INFO] Groq response: {response}") return response except Exception as e: print(f"[ERROR] Groq API Error: {e}") return f"Groq API Error: {e}" # --------------------------- # 🗣️ Text-to-Speech # --------------------------- def text_to_speech(text): """Convert text to speech using gTTS.""" try: print(f"[INFO] Converting text to speech: {text}") tts = gTTS(text) audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name tts.save(audio_path) print(f"[INFO] Audio file saved: {audio_path}") return audio_path except Exception as e: print(f"[ERROR] TTS Error: {e}") return f"TTS Error: {e}" # --------------------------- # 🛠️ Main Interaction Logic # --------------------------- def chatbot(audio_input): """Handle full chatbot interaction.""" try: print(f"[INFO] Audio Input Path: {audio_input}") # Validate Audio File if not validate_audio_file(audio_input): return "Error: Audio file not found or invalid path", None # Step 1: Transcribe Audio text_input = transcribe_audio(audio_input) if "Error" in text_input: return text_input, None # Step 2: Get Response from Groq llm_response = get_groq_response(text_input) if "Error" in llm_response: return llm_response, None # Step 3: Convert Response to Audio audio_output = text_to_speech(llm_response) if "Error" in audio_output: return audio_output, None return llm_response, audio_output except Exception as e: print(f"[ERROR] General Error: {e}") return f"General Error: {e}", None # --------------------------- # 🌐 Gradio Interface # --------------------------- interface = gr.Interface( fn=chatbot, inputs=gr.Audio(type="filepath"), outputs=[ gr.Textbox(label="LLM Response"), gr.Audio(label="Audio Response") ], title="Real-Time Voice-to-Voice Chatbot", description="Speak into the microphone, and the chatbot will respond with audio.", live=True # Ensures real-time interaction ) # Launch Gradio App if __name__ == "__main__": print("[INFO] Starting Gradio Interface...") interface.launch(share=True)