Spaces:

davidscripka
/

openWakeWord

Running

File size: 2,229 Bytes

3d4323f

import gradio as gr
import json
import pandas as pd
import collections
import scipy.signal
from functools import partial
from openwakeword.model import Model

# Load openWakeWord models
model = Model()

# Define function to process audio
def process_audio(audio, state=collections.defaultdict(partial(collections.deque, maxlen=60))):
    # Resample audio to 16khz if needed
    if audio[0] != 16000:
        data = scipy.signal.resample(audio[1], int(float(audio[1].shape[0])/audio[0]*16000))    
    
    # Get predictions
    for i in range(0, len(data), 1280):
        chunk = data[i:i+1280]
        if len(chunk) == 1280:
            prediction = model.predict(chunk)
        for key in prediction:
            #Fill deque with zeros if it's empty
            if len(state[key]) == 0:
                state[key].extend(np.zeros(60))
                
            # Add prediction
            state[key].append(prediction[key])
    
    # Make line plot
    dfs = []
    for key in state.keys():
        df = pd.DataFrame({"x": np.arange(len(state[key])), "y": state[key], "Model": key})
        dfs.append(df)
    
    df = pd.concat(dfs)
    plot = gr.LinePlot().update(value = df, x='x', y='y', color="Model", y_lim = (0,1), tooltip="Model",
                                width=600, height=300, x_title="Time (frames)", y_title="Model Score", color_legend_position="bottom")
    
    # Manually adjust how the legend is displayed
    tmp = json.loads(plot["value"]["plot"])
    tmp["layer"][0]['encoding']['color']['legend']["direction"] = "vertical"
    tmp["layer"][0]['encoding']['color']['legend']["columns"] = 4
    tmp["layer"][0]['encoding']['color']['legend']["labelFontSize"] = 12
    tmp["layer"][0]['encoding']['color']['legend']["titleFontSize"] = 14
    
    plot["value"]['plot'] = json.dumps(tmp)
    
    return plot, state

# Create Gradio interface and launch
gr_int = gr.Interface(
    css = ".flex {flex-direction: column} .gr-panel {width: 100%}",
    fn=process_audio,
    inputs=[
        gr.Audio(source="microphone", type="numpy", streaming=True, show_label=False), 
        "state"
    ],
    outputs=[
        gr.LinePlot(show_label=False),
        "state"
    ],
    live=True)

gr_int.launch()