# Note that some of the fields in this template haven't been filled in yet. | |
# Please resolve any `null` fields before launching! | |
precision: amp_bf16 | |
max_seq_len: 2048 | |
# Tokenizer for dataset creation | |
tokenizer_name: bert-base-uncased | |
# Base model config | |
model: | |
name: bert | |
pretrained_model_name: ${tokenizer_name} | |
tokenizer_name: ${tokenizer_name} | |
model_config: | |
num_attention_heads: 12 | |
num_hidden_layers: 12 | |
attention_probs_dropout_prob: 0.0 | |
max_position_embeddings: 2048 | |
monarch_mixer_sequence_mixing: True | |
long_conv_l_max: 2048 | |
long_conv_kernel_learning_rate: 1e-3 | |
hyena_lr_pos_emb: 1e-5 | |
hyena_w: 10 | |
hyena_wd: 0.1 | |
hyena_emb_dim: 5 | |
hyena_filter_order: 128 | |
hyena_training_additions: False | |
bidirectional: true | |
residual_long_conv: true | |
use_glu_mlp: True | |
use_monarch_mlp: True | |
monarch_mlp_nblocks: 4 | |
use_positional_encodings: True | |