|
from transformers import AutoProcessor, AutoModel |
|
import torch |
|
|
|
|
|
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
|
|
|
if device.type != 'cuda': |
|
raise ValueError("need to run on GPU") |
|
|
|
class EndpointHandler(): |
|
def __init__(self, path=""): |
|
self.processor = AutoProcessor.from_pretrained(path) |
|
self.model = AutoModel.from_pretrained(path) |
|
self.model.to(DEVICE) |
|
|
|
def __call__(self, data): |
|
|
|
|
|
input_text = data['inputs'] |
|
|
|
inputs = self.processor( |
|
text=input_text, |
|
return_tensors="pt", |
|
voice_preset = "v2/en_speaker_6" |
|
).to(DEVICE) |
|
|
|
speech_values = self.model.generate(**inputs, do_sample=True) |
|
sample_rate = self.model.generation_config.sample_rate |
|
|
|
|
|
return {'audio': speech_values.tolist(), 'sample_rate': sample_rate} |