michaelfeil commited on
Commit
6e21974
1 Parent(s): 34b00ef

Upload intfloat/e5-small-v2 ctranslate fp16 weights

Browse files
Files changed (1) hide show
  1. README.md +5 -6
README.md CHANGED
@@ -2614,31 +2614,30 @@ pip install hf-hub-ctranslate2>=2.11.0 ctranslate2>=3.16.0
2614
  ```python
2615
  # from transformers import AutoTokenizer
2616
  model_name = "michaelfeil/ct2fast-e5-small-v2"
2617
- model_name_orig=intfloat/e5-small-v2
2618
 
2619
  from hf_hub_ctranslate2 import EncoderCT2fromHfHub
2620
  model = EncoderCT2fromHfHub(
2621
  # load in int8 on CUDA
2622
  model_name_or_path=model_name,
2623
  device="cuda",
2624
- compute_type="int8_float16",
2625
  )
2626
  outputs = model.generate(
2627
  text=["I like soccer", "I like tennis", "The eiffel tower is in Paris"],
2628
  max_length=64,
2629
- )
2630
- # perform downstream tasks on outputs
2631
  outputs["pooler_output"]
2632
  outputs["last_hidden_state"]
2633
  outputs["attention_mask"]
2634
 
2635
  # alternative, use SentenceTransformer Mix-In
2636
  # for end-to-end Sentence embeddings generation
2637
- # not pulling from this repo
2638
 
2639
  from hf_hub_ctranslate2 import CT2SentenceTransformer
2640
  model = CT2SentenceTransformer(
2641
- model_name_orig, compute_type="int8_float16", device="cuda",
2642
  )
2643
  embeddings = model.encode(
2644
  ["I like soccer", "I like tennis", "The eiffel tower is in Paris"],
 
2614
  ```python
2615
  # from transformers import AutoTokenizer
2616
  model_name = "michaelfeil/ct2fast-e5-small-v2"
2617
+ model_name_orig="intfloat/e5-small-v2"
2618
 
2619
  from hf_hub_ctranslate2 import EncoderCT2fromHfHub
2620
  model = EncoderCT2fromHfHub(
2621
  # load in int8 on CUDA
2622
  model_name_or_path=model_name,
2623
  device="cuda",
2624
+ compute_type="int8_float16"
2625
  )
2626
  outputs = model.generate(
2627
  text=["I like soccer", "I like tennis", "The eiffel tower is in Paris"],
2628
  max_length=64,
2629
+ ) # perform downstream tasks on outputs
 
2630
  outputs["pooler_output"]
2631
  outputs["last_hidden_state"]
2632
  outputs["attention_mask"]
2633
 
2634
  # alternative, use SentenceTransformer Mix-In
2635
  # for end-to-end Sentence embeddings generation
2636
+ # (not pulling from this CT2fast-HF repo)
2637
 
2638
  from hf_hub_ctranslate2 import CT2SentenceTransformer
2639
  model = CT2SentenceTransformer(
2640
+ model_name_orig, compute_type="int8_float16", device="cuda"
2641
  )
2642
  embeddings = model.encode(
2643
  ["I like soccer", "I like tennis", "The eiffel tower is in Paris"],