model: transformer_model: "microsoft/deberta-v3-small" optimizer: lr: 0.00005 warmup_steps: 25000 total_steps: ${training.trainer.max_steps} weight_decay: 0.01 no_decay_params: - "bias" - LayerNorm.weight relations_per_forward: 16 entities_per_forward: