s2s / app.py
frogcho123's picture
Update app.py
89f5759
raw
history blame
485 Bytes
import gradio as gr
import whisper
import numpy as np
model = whisper.load_model("base")
def transcribe(audio):
if not isinstance(audio, np.ndarray):
audio = np.array(audio)
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio).to(model.device)
options = whisper.DecodingOptions()
result = whisper.decode(model, mel, options)
return result.text
iface = gr.Interface(fn=transcribe, inputs="audio", outputs="text")
iface.launch()