yushi commited on
Commit
132fbea
1 Parent(s): a748263

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +3 -3
README.md CHANGED
@@ -360,9 +360,9 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
360
  model = AutoModel.from_pretrained(model_name, trust_remote_code=True, attn_implementation="flash_attention_2", torch_dtype=torch.float16).to("cuda")
361
  model.eval()
362
 
363
- # 事实上我们用的是weighted mean pooling,但为了部署方便,我们将一部分pooling步骤集成在model.forward中
364
- # In fact, we will use weighted mean pooling, but we will integrate some pooling steps into model.forward for deployment convenience
365
- def mean_pooling(hidden,attention_mask):
366
  s = torch.sum(hidden * attention_mask.unsqueeze(-1).float(), dim=1)
367
  d = attention_mask.sum(dim=1, keepdim=True).float()
368
  reps = s / d
 
360
  model = AutoModel.from_pretrained(model_name, trust_remote_code=True, attn_implementation="flash_attention_2", torch_dtype=torch.float16).to("cuda")
361
  model.eval()
362
 
363
+ # 由于在 `model.forward` 中缩放了最终隐层表示,此处的 mean pooling 实际上起到了 weighted mean pooling 的作用
364
+ # As we scale hidden states in `model.forward`, mean pooling here actually works as weighted mean pooling
365
+ def mean_pooling(hidden, attention_mask):
366
  s = torch.sum(hidden * attention_mask.unsqueeze(-1).float(), dim=1)
367
  d = attention_mask.sum(dim=1, keepdim=True).float()
368
  reps = s / d