ikeno-ada
/

m2m100_418-mt-bitsandbytes-4bit

text2text-generation

text-generation-inference

Inference Endpoints

4-bit precision

Model card Files Files and versions Community

ikeno-ada commited on Apr 3, 2024

Commit

a25618b

·

verified ·

1 Parent(s): cbda76b

Create handler.py

Files changed (1) hide show

handler.py +28 -0

handler.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from typing import  Dict, List, Any
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+import torch
+class EndpointHandler():
+    def __init__(self, path=""):
+        # load the optimized model
+        self.model = M2M100ForConditionalGeneration.from_pretrained(path, device_map= "auto",torch_dtype=torch.bfloat16)
+        self.tokenizer = M2M100Tokenizer.from_pretrained(path)
+    def __call__(self, data: Dict[str,str]) -> Dict[str, str]:
+        """
+        Args:
+            data (:obj:):
+                includes the input data and the parameters for the inference.
+        """
+        text = data.get("text", data)
+        langId = data.get("langId",data)
+        # tokenize the input
+        encoded = tokenizer(text, return_tensors="pt")
+        # run the model
+        generated_tokens = model.generate(**encoded, forced_bos_token_id=tokenizer.get_lang_id(langId))
+        result = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
+        # return
+        return {"translated": result}