Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import ( | |
WhisperProcessor, WhisperForConditionalGeneration, | |
AutoModelForCausalLM, AutoTokenizer, pipeline, | |
) | |
from huggingface_hub import snapshot_download | |
import sounddevice as sd | |
import numpy as np | |
import torch | |
from gtts import gTTS | |
import pygame | |
class InteractiveChat: | |
def __init__(self, model_name="openai/whisper-large", tts_choice="OpenVoice"): | |
self.whisper_processor = WhisperProcessor.from_pretrained(model_name) | |
self.whisper_model = WhisperForConditionalGeneration.from_pretrained(model_name) | |
self.zephyr_tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta") | |
self.zephyr_model = AutoModelForCausalLM.from_pretrained("HuggingFaceH4/zephyr-7b-beta", device_map="auto") | |
self.zephyr_pipeline = pipeline("text-generation", model=self.zephyr_model, tokenizer=self.zephyr_tokenizer) | |
self.tts_choice = tts_choice | |
def generate_response(self, input_data): | |
input_features = self.whisper_processor(input_data, sampling_rate=16_000, return_tensors="pt").input_features | |
predicted_ids = self.whisper_model.generate(input_features) | |
transcription = self.whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] | |
# Use the transcription as input for Zephyr | |
response = self.zephyr_pipeline(transcription, max_length=1000)[0]["generated_text"] | |
return transcription, response | |
def speak(self, text): | |
try: | |
if self.tts_choice == "OpenVoice": | |
model_path = snapshot_download("facebook/mms-tts-eng") | |
pipe = pipeline("text-to-speech", model=model_path) | |
audio_array = pipe(text).audio | |
pygame.mixer.init() | |
sound = pygame.sndarray.make_sound(audio_array) | |
sound.play() | |
pygame.time.delay(int(sound.get_length() * 1000)) | |
else: # gTTS | |
tts = gTTS(text=text, lang='en') | |
tts.save("response.mp3") | |
pygame.mixer.init() | |
pygame.mixer.music.load("response.mp3") | |
pygame.mixer.music.play() | |
while pygame.mixer.music.get_busy(): | |
pygame.time.Clock().tick(10) | |
except Exception as e: | |
print("Error occurred during speech generation:", e) | |
with gr.Blocks() as demo: | |
model_choice = gr.Dropdown(["openai/whisper-large"], label="Whisper Model", value="openai/whisper-large") | |
tts_choice = gr.Radio(["OpenVoice", "gTTS"], label="TTS Engine", value="OpenVoice") | |
input_data = gr.Audio(source="microphone", type="numpy", label="Speak Your Message") | |
output_text = gr.Textbox(label="Transcription and Response") | |
model_choice.change(lambda x, y: InteractiveChat(x, y), inputs=[model_choice, tts_choice], outputs=None) | |
tts_choice.change(lambda x, y: InteractiveChat(y, x), inputs=[tts_choice, model_choice], outputs=None) | |
input_data.change(lambda x, model: model.generate_response(x), inputs=[input_data, model_choice], outputs=output_text) | |
input_data.change(lambda x, model: model.speak(x[1]), inputs=[output_text, model_choice], outputs=None) # Speak the response | |
demo.launch(share=True) |