# Install required libraries !pip install gradio transformers torchaudio import gradio as gr import torch from transformers import pipeline import torchaudio from torchaudio.transforms import Resample from pydub import AudioSegment from io import BytesIO # Load Whisper model for speech-to-text whisper_model = pipeline("automatic-speech-recognition", model="openai/whisper-small") # Mock Groq LLM interaction (replace with real Groq LLM API integration) def groq_llm_interaction(prompt): # Replace this with actual Groq LLM API calls return f"Echoing your input: {prompt}" # Text-to-speech conversion def text_to_speech(text, output_path="output.wav"): # Use a simple TTS pipeline (can replace with more advanced models) tts = pipeline("text-to-speech", model="facebook/fastspeech2-en-ljspeech") audio = tts(text, return_tensors=True)["waveform"] torchaudio.save(output_path, audio.squeeze(), 22050) return output_path # Convert raw audio to desired format def process_audio(audio): audio = AudioSegment.from_file(audio) buf = BytesIO() audio.export(buf, format="wav") buf.seek(0) return buf # Chatbot function def voice_to_voice_chat(audio_file): # Process the audio input processed_audio = process_audio(audio_file) # Step 1: Speech-to-text (Whisper) transcript = whisper_model(processed_audio)["text"] # Step 2: LLM interaction response_text = groq_llm_interaction(transcript) # Step 3: Text-to-speech (TTS) response_audio_path = text_to_speech(response_text) return response_text, response_audio_path # Gradio interface interface = gr.Interface( fn=voice_to_voice_chat, inputs=gr.Audio(source="microphone", type="file"), outputs=[ "text", # Transcription and response text "audio" # TTS-generated audio output ], live=True, title="Real-Time Voice-to-Voice Chatbot", description="Speak to interact with the chatbot. It transcribes your speech, processes it with Groq LLM, and responds with synthesized speech.", ) # Launch Gradio app interface.launch()