MuhammadFarhan67's picture
Create app.py
d5b821a verified
raw
history blame
1.66 kB
import gradio as gr
import torch
from transformers import pipeline
from gtts import gTTS
import tempfile
import os
from groq import Groq
# Load the Whisper model from Hugging Face
device = "cuda" if torch.cuda.is_available() else "cpu"
whisper_model = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=device)
# Initialize Groq client
client = Groq(api_key="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")
# Function to handle the voice-to-voice conversation
def voice_to_voice_conversation(audio):
# Read and transcribe audio using Whisper
transcription = whisper_model(audio)["text"]
# Get response from Groq API using Llama 8b
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": transcription}],
model="llama3-8b-8192",
)
response_text = chat_completion.choices[0].message.content
# Convert text to speech using GTTS and save to a temporary file
tts = gTTS(response_text)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
tts.save(tmp_file.name)
tmp_file_path = tmp_file.name
# Load the generated speech as an audio file for Gradio
return transcription, tmp_file_path
# Gradio Interface
interface = gr.Interface(
fn=voice_to_voice_conversation,
inputs=gr.Audio(type="filepath"),
outputs=[gr.Textbox(label="Transcription"), gr.Audio(label="Response Audio")],
title="Voice-to-Voice Chatbot",
description="Speak into the microphone, and the chatbot will respond with a generated voice message.",
live=False
)
# Launch the interface
interface.launch()