File size: 893 Bytes
6c5ea3c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
{
"vocab_size": 32128,
"dim_model": 1024,
"num_heads": 32,
"dim_head" : 128,
"dim_ff" : 16384,
"num_encoder_layers" : 24,
"num_decoder_layers" : 24,
"dropout_p" : 0.0,
"emb_init_mean": 0.0,
"emb_init_std": 1.0,
"pos_bias_type": "relative",
"position_bias_num_buckets" : 32,
"position_bias_max_distance" : 128,
"pos_init_mean" : 0.0,
"pos_init_std" : 1.0,
"norm_init_var" : 1.0,
"norm_bias": false,
"norm_eps" : 1e-6,
"att_init_mean" : 0.0,
"att_init_std" : 1.0,
"att_bias": false,
"att_mask_value": "-inf",
"ffn_init_mean" : 0.0,
"ffn_init_std" : 1.0,
"ffn_bias": false,
"ffn_activate_fn": "relu",
"proj_init_mean": 0.0,
"proj_init_std": 1.0,
"proj_bias": false,
"length_scale" : false,
"attn_scale": false,
"half" : true,
"int8" : false,
"tied" : true
} |