from transformers import pipeline import gradio as gr from IPython.display import Audio # Create pipelines for text-to-speech and speech-to-text tts = pipeline("text-to-speech", model="facebook/mms-tts-eng") stt = pipeline("automatic-speech-recognition", model="openai/whisper-medium") # Create pipeline for text generation, considering using a model trained for dialogue chat = pipeline("text-generation", model="facebook/bart-base-conversational") def handle_user_input(user_text, user_voice): if user_text: user_text = user_text.strip() # Remove leading/trailing whitespace input_type = "text" else: try: user_text = stt(user_voice)["text"] except: user_text = "" input_type = "voice" # Generate response messages = [ {"role": "system", "content": "Hi! How can I help you today?"}, {"role": "user", "content": user_text}, ] chat_reply = chat(messages=messages, max_length=100, top_p=0.95, temperature=0.7)[0]["generated_text"] messages.append({"role": "assistant", "content": chat_reply}) # Generate audio output (only if input was text) audio = None if input_type == "text": audio = tts(chat_reply)["audio"] return chat_reply, audio # Create and launch the Gradio interface iface = gr.Interface( fn=handle_user_input, inputs=[gr.Textbox(label="Enter your text (optional)"), gr.Audio(sources=["microphone"], type="filepath")], outputs=[gr.Textbox(label="Assistant Text"), gr.Audio(label="Assistant Voice (if text input)")], live=True, title="AI Voice Assistant", ) iface.launch(debug=True) """ from transformers import pipeline import gradio as gr from IPython.display import Audio # Create a pipeline for text-to-speech tts = pipeline("text-to-speech", model="facebook/mms-tts-eng") # Create a pipeline for speech-to-text stt = pipeline("automatic-speech-recognition", model="openai/whisper-medium") # Create a pipeline for text generation chat = pipeline("text-generation", model="openai-community/gpt2-xl") def voice_chat(user_voice): user_text = stt(user_voice)["text"] messages = [{"role": "system", "content": "You are a kind helpful assistant."}] messages.append({"role": "user", "content": user_text}) chat_reply = chat(messages=messages, max_length=100, top_p=0.95, temperature=0.7)[0]["generated_text"] messages.append({"role": "assistant", "content": chat_reply}) audio = tts(chat_reply)["audio"] return chat_reply, audio text_reply = gr.Textbox(label="ChatGPT Text") voice_reply = gr.Audio(type="filepath") iface = gr.Interface( fn=voice_chat, inputs=[gr.Textbox(label="Enter your text"), gr.Audio(sources=["microphone"], type="filepath")], outputs=[gr.Textbox(label="ChatGPT Text") , gr.Audio(label = "ChatGPT Voice")], live=True, title="AI Voice Assistant with ChatGPT AI", ) iface.launch(debug=True) """