model: | |
transformer_model: "microsoft/deberta-v3-base" | |
optimizer: | |
lr: 0.0001 | |
warmup_steps: 5000 | |
total_steps: ${training.trainer.max_steps} | |
total_reset: 1 | |
weight_decay: 0.0 | |
lr_decay: 0.8 | |
no_decay_params: | |
- "bias" | |
- LayerNorm.weight | |
entities_per_forward: 100 | |