Spaces:
Sleeping
Sleeping
# import whisper | |
import gradio as gr | |
import torch | |
from transformers import pipeline | |
device = 0 if torch.cuda.is_available() else "cpu" | |
pipe = pipeline( | |
task="automatic-speech-recognition", | |
model="openai/whisper-large-v2", | |
chunk_length_s=30, | |
device=device, | |
) | |
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language="English", task="transcribe") | |
# model = whisper.load_model("base") | |
def transcribe(audio): | |
# audio = whisper.load_audio(audio) | |
# audio = whisper.pad_or_trim(audio) | |
# result = model.transcribe(audio, verbose = True) | |
# mel = whisper.log_mel_spectrogram(audio).to(model.device) | |
# options = whisper.DecodingOptions(fp16 = False) | |
# result = whisper.decode(model, mel, options) | |
# result_text = "Question: " + result.text | |
return pipe(audio)["text"] | |
gr.Interface( | |
title = 'Speech to Chat-GPT', | |
fn=transcribe, | |
inputs=[ | |
gr.inputs.Audio(source="upload", type="filepath") | |
], | |
outputs=[ | |
"textbox" | |
]).launch() |