0-hero
/

gpt2-pos-encoding-experiment-100B

Model card Files Files and versions Community

gpt2-pos-encoding-experiment-100B / wandb /run-20240926_192831-378lr5yg /files /config.yaml

0-hero's picture

Add files using upload-large-folder tool

71c6277 verified 5 months ago

1.84 kB

	_wandb:
	value:
	cli_version: 0.18.1
	m: []
	python_version: 3.10.12
	t:
	"1":
	- 1
	- 55
	"2":
	- 1
	- 55
	"3":
	- 2
	- 13
	- 16
	- 23
	- 55
	- 61
	"4": 3.10.12
	"5": 0.18.1
	"8":
	- 5
	"12": 0.18.1
	"13": linux-x86_64
	always_save_checkpoint:
	value: true
	attention_types:
	value:
	- default
	backend:
	value: nccl
	batch_size:
	value: 120
	beta1:
	value: 0.9
	beta2:
	value: 0.95
	bias:
	value: false
	block_size:
	value: 512
	checkpoint_path:
	value: ""
	collect_activations:
	value: false
	collect_attention_patterns:
	value: false
	compile:
	value: true
	dataset:
	value: fineweb
	decay_lr:
	value: true
	device:
	value: cuda
	dropout:
	value: 0
	dtype:
	value: bfloat16
	embedding_types:
	value:
	- polynomial_legendre
	- polynomial_chebyshev
	- random_fourier
	- wavelet
	eval_datasets:
	value:
	- wikitext-103-v1
	- ptb
	- lambada
	eval_interval:
	value: 100
	eval_iters:
	value: 100
	eval_only:
	value: false
	grad_clip:
	value: 1
	gradient_accumulation_steps:
	value: 40
	init_from:
	value: scratch
	learning_rate:
	value: 0.0006
	log_interval:
	value: 1
	lr_decay_iters:
	value: 10000
	max_iters:
	value: 10000
	min_lr:
	value: 6e-05
	n_embd:
	value: 256
	n_head:
	value: 4
	n_layer:
	value: 4
	out_dir:
	value: out
	seed:
	value: 1337
	wandb_log:
	value: true
	wandb_project:
	value: gpt2_positional_encodings_100B
	wandb_run_name:
	value: experiment
	warmup_iters:
	value: 100
	weight_decay:
	value: 0.1