File size: 893 Bytes
6c5ea3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
{
    "vocab_size": 32128,
    "dim_model": 1024,
    "num_heads": 32,
    "dim_head" : 128,
    "dim_ff" : 16384,
    "num_encoder_layers" : 24,
    "num_decoder_layers" : 24,
    "dropout_p" : 0.0,
    "emb_init_mean": 0.0,
    "emb_init_std": 1.0,
    "pos_bias_type": "relative",
    "position_bias_num_buckets" : 32,
    "position_bias_max_distance" : 128,
    "pos_init_mean" : 0.0,
    "pos_init_std" : 1.0,
    "norm_init_var" : 1.0,
    "norm_bias": false,
    "norm_eps" : 1e-6,
    "att_init_mean" : 0.0, 
    "att_init_std" : 1.0,
    "att_bias": false,
    "att_mask_value": "-inf",
    "ffn_init_mean" : 0.0, 
    "ffn_init_std" : 1.0,
    "ffn_bias": false,
    "ffn_activate_fn": "relu",
    "proj_init_mean": 0.0,
    "proj_init_std": 1.0,
    "proj_bias": false,
    "length_scale" : false,
    "attn_scale": false,
    "half" : true, 
    "int8" : false,
    "tied" : true
}