gpt-omni's picture
init
5e4b316 verified
"""A simple web interactive chat demo based on gradio."""
import os
import time
import gradio as gr
import numpy as np
import spaces
import torch
from inference import OmniInference
device = "cuda" if torch.cuda.is_available() else "cpu"
omni_client = OmniInference('./checkpoint', device)
omni_client.warm_up()
OUT_CHUNK = 4096
OUT_RATE = 24000
OUT_CHANNELS = 1
@spaces.GPU
def process_audio(audio):
filepath = audio
print(f"filepath: {filepath}")
if filepath is None:
return
cnt = 0
tik = time.time()
for chunk in omni_client.run_AT_batch_stream(filepath):
# Convert chunk to numpy array
if cnt == 0:
print(f"first chunk time cost: {time.time() - tik:.3f}")
cnt += 1
audio_data = np.frombuffer(chunk, dtype=np.int16)
audio_data = audio_data.reshape(-1, OUT_CHANNELS)
yield OUT_RATE, audio_data.astype(np.int16)
demo = gr.Interface(
process_audio,
inputs=gr.Audio(type="filepath", label="Microphone"),
outputs=[gr.Audio(label="Response", streaming=True, autoplay=True)],
title="Chat Mini-Omni Demo",
live=True,
)
demo.queue().launch()