0-hero's picture
Add files using upload-large-folder tool
71c6277 verified
raw
history blame
1.84 kB
_wandb:
value:
cli_version: 0.18.1
m: []
python_version: 3.10.12
t:
"1":
- 1
- 55
"2":
- 1
- 55
"3":
- 2
- 13
- 16
- 23
- 55
- 61
"4": 3.10.12
"5": 0.18.1
"8":
- 5
"12": 0.18.1
"13": linux-x86_64
always_save_checkpoint:
value: true
attention_types:
value:
- default
backend:
value: nccl
batch_size:
value: 120
beta1:
value: 0.9
beta2:
value: 0.95
bias:
value: false
block_size:
value: 512
checkpoint_path:
value: ""
collect_activations:
value: false
collect_attention_patterns:
value: false
compile:
value: true
dataset:
value: fineweb
decay_lr:
value: true
device:
value: cuda
dropout:
value: 0
dtype:
value: bfloat16
embedding_types:
value:
- polynomial_legendre
- polynomial_chebyshev
- random_fourier
- wavelet
eval_datasets:
value:
- wikitext-103-v1
- ptb
- lambada
eval_interval:
value: 100
eval_iters:
value: 100
eval_only:
value: false
grad_clip:
value: 1
gradient_accumulation_steps:
value: 40
init_from:
value: scratch
learning_rate:
value: 0.0006
log_interval:
value: 1
lr_decay_iters:
value: 10000
max_iters:
value: 10000
min_lr:
value: 6e-05
n_embd:
value: 256
n_head:
value: 4
n_layer:
value: 4
out_dir:
value: out
seed:
value: 1337
wandb_log:
value: true
wandb_project:
value: gpt2_positional_encodings_100B
wandb_run_name:
value: experiment
warmup_iters:
value: 100
weight_decay:
value: 0.1