Spaces:

aminahmed78
/

teacher_for_kids_chatbot

Sleeping

App Files Files Community

teacher_for_kids_chatbot / app.py

aminahmed78

Update app.py

3d78fb4 verified 4 months ago

raw

history blame contribute delete

3.6 kB

	import gradio as gr
	from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
	from gtts import gTTS
	import tempfile

	# Function to initialize models with exception handling
	def initialize_model():
	try:
	# Load ASR (Automatic Speech Recognition) model for voice-to-text
	asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-small")

	# Load Translation model (supports multiple language pairs)
	translation_model = pipeline("translation", model="Helsinki-NLP/opus-mt-en-mul") # Choose a supported model

	# Load conversational model (fine-tuned on dialogues)
	model_name = "microsoft/DialoGPT-medium" # Example conversational model
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name)

	return asr_model, translation_model, tokenizer, model
	except Exception as e:
	print(f"Error initializing models: {e}")
	return None, None, None, None

	# Initialize the models
	asr_model, translation_model, tokenizer, conversation_model = initialize_model()

	def chatbot_speech_to_speech(audio_input, target_language):
	try:
	# Step 1: Convert Audio to Text
	text_input = asr_model(audio_input)["text"]

	# Step 2: Translate Text to English if the input language is not English
	if target_language != "en":
	translated_text = translation_model(text_input, src_lang=target_language, tgt_lang="en")[0]['translation_text']
	else:
	translated_text = text_input

	# Step 3: Generate conversational response using the dialogue model
	inputs = tokenizer.encode(translated_text + tokenizer.eos_token, return_tensors='pt')
	response_ids = conversation_model.generate(inputs, max_length=100, pad_token_id=tokenizer.eos_token_id)
	response_text = tokenizer.decode(response_ids[:, inputs.shape[-1]:][0], skip_special_tokens=True)

	# Step 4: Translate the response text back to the target language
	if target_language != "en":
	final_response = translation_model(response_text, src_lang="en", tgt_lang=target_language)[0]['translation_text']
	else:
	final_response = response_text

	# Step 5: Convert text to speech using gTTS
	tts = gTTS(final_response, lang=target_language)
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	tts.save(temp_file.name)

	return temp_file.name
	except Exception as e:
	return f"Error in processing: {e}"

	# Gradio Interface Function
	def interface(audio, language):
	result = chatbot_speech_to_speech(audio, language)
	return result

	# Define the Gradio app with Blocks using the latest syntax
	with gr.Blocks() as gradio_ui:
	gr.Markdown("# Multilingual Voice-to-Voice Chatbot for Kids")
	gr.Markdown("### Speak to the chatbot in your selected language and receive a spoken response.")

	audio_input = gr.Audio(type="filepath", label="Record your message")
	language_dropdown = gr.Dropdown(choices=["en", "fr", "es", "de", "zh", "ur"], label="Select Language")

	result_audio = gr.Audio(type="filepath", label="Chatbot Response")


	submit_btn = gr.Button("Submit")
	submit_btn.click(fn=interface, inputs=[audio_input, language_dropdown], outputs=result_audio)

	# Launch the app
	if asr_model and translation_model and tokenizer and conversation_model:
	gradio_ui.launch()
	else:
	print("Error initializing one or more models. Please check your model configuration.")