SNT-700M / inference.py
saintyboy's picture
Create inference.py
203de57 verified
raw
history blame
No virus
440 Bytes
from inference import Inference
import os
model_path = os.getenv("MODEL_PATH", "saved_model/pytorch_model.bin")
tokenizer_path = os.getenv("TOKENIZER_PATH", "saved_tokenizer")
inference = Inference(model_path, tokenizer_path)
def handler(event, context):
prompt = event["data"]["prompt"]
max_length = event["data"].get("max_length", 100)
response = inference.predict(prompt, max_length)
return {"response": response}