mywhisper / app.py
deadpool007's picture
Update app.py
0ca44f0
# import whisper
import gradio as gr
import torch
from transformers import pipeline
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model="openai/whisper-large-v2",
chunk_length_s=30,
device=device,
)
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language="English", task="transcribe")
# model = whisper.load_model("base")
def transcribe(audio):
# audio = whisper.load_audio(audio)
# audio = whisper.pad_or_trim(audio)
# result = model.transcribe(audio, verbose = True)
# mel = whisper.log_mel_spectrogram(audio).to(model.device)
# options = whisper.DecodingOptions(fp16 = False)
# result = whisper.decode(model, mel, options)
# result_text = "Question: " + result.text
return pipe(audio)["text"]
gr.Interface(
title = 'Speech to Chat-GPT',
fn=transcribe,
inputs=[
gr.inputs.Audio(source="upload", type="filepath")
],
outputs=[
"textbox"
]).launch()