# import whisper import gradio as gr import torch from transformers import pipeline device = 0 if torch.cuda.is_available() else "cpu" pipe = pipeline( task="automatic-speech-recognition", model="openai/whisper-large-v2", chunk_length_s=30, device=device, ) pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language="English", task="transcribe") # model = whisper.load_model("base") def transcribe(audio): # audio = whisper.load_audio(audio) # audio = whisper.pad_or_trim(audio) # result = model.transcribe(audio, verbose = True) # mel = whisper.log_mel_spectrogram(audio).to(model.device) # options = whisper.DecodingOptions(fp16 = False) # result = whisper.decode(model, mel, options) # result_text = "Question: " + result.text return pipe(audio)["text"] gr.Interface( title = 'Speech to Chat-GPT', fn=transcribe, inputs=[ gr.inputs.Audio(source="upload", type="filepath") ], outputs=[ "textbox" ]).launch()