{ "vocab_size": 32128, "dim_model": 1024, "num_heads": 32, "dim_head" : 128, "dim_ff" : 16384, "num_encoder_layers" : 24, "num_decoder_layers" : 24, "dropout_p" : 0.0, "emb_init_mean": 0.0, "emb_init_std": 1.0, "pos_bias_type": "relative", "position_bias_num_buckets" : 32, "position_bias_max_distance" : 128, "pos_init_mean" : 0.0, "pos_init_std" : 1.0, "norm_init_var" : 1.0, "norm_bias": false, "norm_eps" : 1e-6, "att_init_mean" : 0.0, "att_init_std" : 1.0, "att_bias": false, "att_mask_value": "-inf", "ffn_init_mean" : 0.0, "ffn_init_std" : 1.0, "ffn_bias": false, "ffn_activate_fn": "relu", "proj_init_mean": 0.0, "proj_init_std": 1.0, "proj_bias": false, "length_scale" : false, "attn_scale": false, "half" : true, "int8" : false, "tied" : true }