from transformers import pipeline import gradio as gr # Create a pipeline for text-to-speech tts = pipeline("text-to-speech", model="facebook/mms-tts-eng") # Create a pipeline for speech-to-text stt = pipeline("automatic-speech-recognition", model="openai/whisper-medium") # Create a pipeline for text generation chat = pipeline("text-generation", model="openai-community/gpt2-xl") def voice_chat(user_voice): user_text = stt(user_voice)["text"] messages = [{"role": "system", "content": "You are a kind helpful assistant."}] messages.append({"role": "user", "content": user_text}) chat_reply = chat(messages=messages, max_length=100, top_p=0.95, temperature=0.7)[0]["generated_text"] messages.append({"role": "assistant", "content": chat_reply}) audio = tts(chat_reply)["audio"] return chat_reply, audio text_reply = gr.Textbox(label="ChatGPT Text") voice_reply = gr.Audio('output.wav') gr.Interface( title="AI Voice Assistant with ChatGPT AI", fn=voice_chat, inputs=[Interface.Audio(source="microphone", type="filepath")], outputs=[text_reply, voice_reply], live=True, ).launch(debug=True)