{ | |
"model_type": "encoder-decoder", | |
"vocab_size": 50257, | |
"max_position_embeddings": 1024, | |
"encoder_layers": 24, | |
"encoder_ffn_dim": 4096, | |
"encoder_attention_heads": 16, | |
"decoder_layers": 24, | |
"decoder_ffn_dim": 4096, | |
"decoder_attention_heads": 16, | |
"dropout": 0.2, | |
"activation_function": "gelu", | |
"initializer_range": 0.02 | |
} |