|
{ |
|
"optim": { |
|
"type": "AdamW", |
|
"lr": 0.0005, |
|
"betas": [ |
|
0.9, |
|
0.98 |
|
], |
|
"eps": 1e-12, |
|
"weight_decay": 0.01, |
|
"amsgrad": false, |
|
"fused": null |
|
}, |
|
"optim_mod": { |
|
"name": "none" |
|
}, |
|
"name": "bert-base", |
|
"limited_decay_keys": [ |
|
"bias", |
|
"LayerNorm.bias", |
|
"LayerNorm.weight", |
|
"norm" |
|
], |
|
"warmup_steps": 30000, |
|
"cooldown_steps": 0, |
|
"steps": 900000, |
|
"scheduler": "budget-cosine-decay", |
|
"batch_size": 1536, |
|
"batch_size_ramp": 0, |
|
"gradient_clipping": null, |
|
"pretrain_in_train_mode": false, |
|
"objective": { |
|
"name": "masked-lm", |
|
"mlm_probability": 0.15, |
|
"use_80_20_rule": true, |
|
"disable_mlm": false, |
|
"token_drop": 0.0 |
|
}, |
|
"reverse_dataset_order": false, |
|
"budget": 24 |
|
} |