|
{ |
|
"vocab_size": 32128, |
|
"dim_model": 1024, |
|
"num_heads": 32, |
|
"dim_head" : 128, |
|
"dim_ff" : 16384, |
|
"num_encoder_layers" : 24, |
|
"num_decoder_layers" : 24, |
|
"dropout_p" : 0.0, |
|
"emb_init_mean": 0.0, |
|
"emb_init_std": 1.0, |
|
"pos_bias_type": "relative", |
|
"position_bias_num_buckets" : 32, |
|
"position_bias_max_distance" : 128, |
|
"pos_init_mean" : 0.0, |
|
"pos_init_std" : 1.0, |
|
"norm_init_var" : 1.0, |
|
"norm_bias": false, |
|
"norm_eps" : 1e-6, |
|
"att_init_mean" : 0.0, |
|
"att_init_std" : 1.0, |
|
"att_bias": false, |
|
"att_mask_value": "-inf", |
|
"ffn_init_mean" : 0.0, |
|
"ffn_init_std" : 1.0, |
|
"ffn_bias": false, |
|
"ffn_activate_fn": "relu", |
|
"proj_init_mean": 0.0, |
|
"proj_init_std": 1.0, |
|
"proj_bias": false, |
|
"length_scale" : false, |
|
"attn_scale": false, |
|
"half" : true, |
|
"int8" : false, |
|
"tied" : true |
|
} |