michaelfeil
commited on
Commit
·
9680a82
1
Parent(s):
41c5313
Upload intfloat/e5-small-v2 ctranslate fp16 weights
Browse files- README.md +13 -2
- vocabulary.txt +0 -0
README.md
CHANGED
@@ -2621,12 +2621,23 @@ and [hf-hub-ctranslate2>=2.0.8](https://github.com/michaelfeil/hf-hub-ctranslate
|
|
2621 |
- `compute_type=int8` for `device="cpu"`
|
2622 |
|
2623 |
```python
|
2624 |
-
from hf_hub_ctranslate2 import TranslatorCT2fromHfHub, GeneratorCT2fromHfHub
|
2625 |
from transformers import AutoTokenizer
|
2626 |
|
2627 |
model_name = "michaelfeil/ct2fast-e5-small-v2"
|
2628 |
|
2629 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2630 |
```
|
2631 |
|
2632 |
# Licence and other remarks:
|
|
|
2621 |
- `compute_type=int8` for `device="cpu"`
|
2622 |
|
2623 |
```python
|
|
|
2624 |
from transformers import AutoTokenizer
|
2625 |
|
2626 |
model_name = "michaelfeil/ct2fast-e5-small-v2"
|
2627 |
|
2628 |
+
from hf_hub_ctranslate2 import EncoderCT2fromHfHub
|
2629 |
+
model = EncoderCT2fromHfHub(
|
2630 |
+
# load in int8 on CUDA
|
2631 |
+
model_name_or_path=model_name,
|
2632 |
+
device="cuda",
|
2633 |
+
compute_type="int8_float16",
|
2634 |
+
# tokenizer=AutoTokenizer.from_pretrained("{ORG}/{NAME}")
|
2635 |
+
)
|
2636 |
+
outputs = model.generate(
|
2637 |
+
text=["def fibonnaci(", "User: How are you doing? Bot:"],
|
2638 |
+
max_length=64,
|
2639 |
+
)
|
2640 |
+
print(outputs.shape, outputs)
|
2641 |
```
|
2642 |
|
2643 |
# Licence and other remarks:
|
vocabulary.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|