michaelfeil
/

ct2fast-e5-small-v2

@@ -2621,12 +2621,23 @@ and [hf-hub-ctranslate2>=2.0.8](https://github.com/michaelfeil/hf-hub-ctranslate
 - `compute_type=int8`  for `device="cpu"`
 ```python
-from hf_hub_ctranslate2 import TranslatorCT2fromHfHub, GeneratorCT2fromHfHub
 from transformers import AutoTokenizer
 model_name = "michaelfeil/ct2fast-e5-small-v2"
-encoder-only
 ```
 # Licence and other remarks:

 - `compute_type=int8`  for `device="cpu"`
 ```python
 from transformers import AutoTokenizer
 model_name = "michaelfeil/ct2fast-e5-small-v2"
+from hf_hub_ctranslate2 import EncoderCT2fromHfHub
+model = EncoderCT2fromHfHub(
+        # load in int8 on CUDA
+        model_name_or_path=model_name,
+        device="cuda",
+        compute_type="int8_float16",
+        # tokenizer=AutoTokenizer.from_pretrained("{ORG}/{NAME}")
+)
+outputs = model.generate(
+    text=["def fibonnaci(", "User: How are you doing? Bot:"],
+    max_length=64,
+)
+print(outputs.shape, outputs)
 ```
 # Licence and other remarks:

vocabulary.txt ADDED Viewed

The diff for this file is too large to render. See raw diff