File size: 283 Bytes
2f044c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
model:
  transformer_model: "microsoft/deberta-v3-large"

optimizer:
  lr: 0.0001
  warmup_steps: 5000
  total_steps: ${training.trainer.max_steps}
  total_reset: 1
  weight_decay: 0.0
  lr_decay: 0.9
  no_decay_params:
    - "bias"
    - LayerNorm.weight

entities_per_forward: 100