{ "model_type": "encoder-decoder", "vocab_size": 50257, "max_position_embeddings": 1024, "encoder_layers": 24, "encoder_ffn_dim": 4096, "encoder_attention_heads": 16, "decoder_layers": 24, "decoder_ffn_dim": 4096, "decoder_attention_heads": 16, "dropout": 0.2, "activation_function": "gelu", "initializer_range": 0.02 }