my-alexa / app.py
jiuuee's picture
Update app.py
4ac65aa verified
raw
history blame
1.49 kB
import gradio as gr
import torch
import sounddevice as sd
import numpy as np
from nemo.collections.asr.models import ASRModel
# Load the NeMo ASR model
model = ASRModel.from_pretrained("nvidia/canary-1b")
model.eval()
# Load the keyword spotting model
kws_model = torch.hub.load('snakers4/silero-vad', 'silero_vad')
# Constants
TRIGGER_WORD = "hey alexa"
TRIGGER_DURATION = 2 # Duration to record after trigger word is detected, in seconds
SAMPLE_RATE = 16000 # Sample rate for recording
def start_recording():
print("Recording started...")
audio = sd.rec(int(TRIGGER_DURATION * SAMPLE_RATE), samplerate=SAMPLE_RATE, channels=1, dtype='float32')
sd.wait()
return audio.flatten()
def detect_trigger(audio):
# Perform keyword spotting
is_triggered = kws_model(audio, sample_rate=SAMPLE_RATE) >= 0.5
return is_triggered
def transcribe_triggered():
while True:
print("Listening for trigger word...")
# Start recording
recorded_audio = start_recording()
# Check if trigger word is detected
is_triggered = detect_trigger(recorded_audio)
if is_triggered:
print("Trigger word detected. Transcribing...")
# Perform speech recognition
transcription = model.transcribe([recorded_audio])
return transcription[0]
iface = gr.Interface(transcribe_triggered, gr.inputs.NoInput(), "text", title="ASR with NeMo Canary Model (Triggered by 'Hey Alexa')")
iface.launch()