from typing import Dict, List, Any | |
import torch as torch | |
from transformers import pipeline, WhisperProcessor | |
from scipy.io.wavfile import read | |
class EndpointHandler(): | |
def __init__(self, path=""): | |
device = 0 if torch.cuda.is_available() else "cpu" | |
self.pipe = pipeline( | |
task="automatic-speech-recognition", | |
model="openai/whisper-large", | |
chunk_length_s=30, | |
device=device, | |
) | |
processor = WhisperProcessor.from_pretrained("openai/whisper-large") | |
self.pipe.model.config.forced_decoder_ids = processor.get_decoder_prompt_ids(language="nl", task="transcribe") | |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: | |
""" | |
data args: | |
inputs (:obj: `str`) | |
date (:obj: `str`) | |
Return: | |
A :obj:`list` | `dict`: will be serialized and returned | |
""" | |
#print request | |
print("request") | |
print(data.inputs) | |
# audio_data = read(io.BytesIO(data)) | |
# get inputs, inputs in request body is possible equal to wav or mp3 file | |
inputs = data.pop("inputs", data) | |
print("here comes text") | |
print(self.pipe(inputs)) | |
text = self.pipe(inputs)["text"] | |
print(text) | |
return text | |