pszemraj's picture
Upload folder using huggingface_hub
f973cec verified
raw
history blame
2.31 kB
_wandb:
value:
cli_version: 0.18.1
m: []
python_version: 3.11.10
t:
"1":
- 1
- 11
- 41
- 49
- 50
- 51
- 55
- 71
- 100
"2":
- 1
- 11
- 41
- 49
- 50
- 51
- 55
- 71
- 100
"3":
- 2
- 15
- 16
- 23
- 55
- 61
"4": 3.11.10
"5": 0.18.1
"6": 4.44.2
"8":
- 5
"12": 0.18.1
"13": linux-x86_64
checkpoint:
value:
every_steps: 2500
data:
value:
before_mask_input_length: 1137
input_length: 1024
mean_noise_span_length: 3
mlm_probability: 0.15
num_workers: 16
target_length: 229
device:
value: gpu
eval:
value:
corrected_steps: 500
every_steps: 1000000000
steps: 500
eval_only:
value: false
logging:
value:
every_steps: 25
grad_l2: true
use_wandb: true
wandb_config:
entity: pszemraj
mode: online
project: nanoT5
tags:
- 24x24
- "1024"
weights_l2: true
mode:
value: pt
model:
value:
checkpoint_path: ""
compile: true
klass: hf_t5
name: pszemraj/tFINE-850m-24x24-512ctx
overwrite:
dropout_rate: 0
random_init: false
n_all_param:
value: 853929472
optim:
value:
base_lr: 0.01
batch_size: 128
epochs: -1
final_cosine: 2e-05
grad_acc: 8
grad_clip: 1
lr_scheduler: cosine
name: adamwscale
total_steps: 20000
warmup_steps: 5000
weight_decay: 0
precision:
value: bf16
predict_only:
value: false
seed:
value: 34534
slurm_id:
value: none
tokenizer:
value:
name: BEE-spoke-data/slimpajama_tok-48128-BPE-forT5
working_dir:
value: /workspace/nanoT5/outputs/2024-09-26/05-19-51