from transformers import pipeline import gradio as gr from IPython.display import Audio # Create a pipeline for text-to-speech tts = pipeline("text-to-speech", model="facebook/mms-tts-eng") # Create a pipeline for speech-to-text stt = pipeline("automatic-speech-recognition", model="openai/whisper-medium") # Create a pipeline for text generation chat = pipeline("text-generation", model="openai-community/gpt2-xl") def voice_chat(user_voice): user_text = stt(user_voice)["text"] messages = [{"role": "system", "content": "You are a kind helpful assistant."}] messages.append({"role": "user", "content": user_text}) chat_reply = chat(messages=messages, max_length=100, top_p=0.95, temperature=0.7)[0]["generated_text"] messages.append({"role": "assistant", "content": chat_reply}) audio = tts(chat_reply)["audio"] return chat_reply, audio text_reply = gr.Textbox(label="ChatGPT Text") voice_reply = gr.Audio(type="audio/wav") iface = gr.Interface( fn=voice_chat, inputs=[gr.inputs.Audio(source="microphone", type="filepath")], outputs=[text_reply = gr.Textbox(label="ChatGPT Text") , voice_reply = gr.Audio(label = "ChatGPT Voice")], live=True, title="AI Voice Assistant with ChatGPT AI", ) iface.launch(debug=True)