{ | |
"model_config": { | |
"model_type": "llama2", | |
"mamba": null, | |
"llama2": { | |
"attention_bias": false, | |
"attention_dropout": 0.0, | |
"bos_token_id": -1, | |
"eos_token_id": -2, | |
"hidden_act": "silu", | |
"hidden_size": 48, | |
"initializer_range": 0.02, | |
"intermediate_size": 48, | |
"max_position_embeddings": 513, | |
"num_attention_heads": 2, | |
"num_hidden_layers": 2, | |
"num_key_value_heads": 2, | |
"pretraining_tp": 1, | |
"rms_norm_eps": 1e-06, | |
"rope_scaling": null, | |
"rope_theta": 10000.0, | |
"tie_word_embeddings": false, | |
"use_cache": true, | |
"vocab_size": 4096 | |
} | |
}, | |
"run_name": "debug__2024_03_19_00_59_57", | |
"output_dir": "/Users/jaidhyani/Library/Application Support/delphi/debug__2024_03_19_00_59_57", | |
"huggingface": { | |
"repo_id": "jaiwithani/testing", | |
"push_checkpoints_to_hub": true | |
}, | |
"device": "auto", | |
"eval_interval": 1, | |
"log_interval": 1, | |
"eval_iters": 1, | |
"eval_only": false, | |
"always_save_checkpoint": true, | |
"init_from": "scratch", | |
"wandb_config": { | |
"log": true, | |
"project": "delphi", | |
"entity": "jaiwithani" | |
}, | |
"batch_size": 64, | |
"max_seq_len": 512, | |
"max_epochs": 1, | |
"grad_clip": 1.0, | |
"optimizer": { | |
"gradient_accumulation_steps": 4, | |
"learning_rate": 0.0005, | |
"weight_decay": 0.1, | |
"beta1": 0.9, | |
"beta2": 0.95, | |
"grad_clip": 1.0, | |
"decay_lr": true, | |
"warmup_iters": 1000, | |
"min_lr": 0.0 | |
}, | |
"train_sample_limit": 1024, | |
"val_sample_limit": -1 | |
} |