llama-tinyshakespeare / config.json
edbraga's picture
Upload LLaMA model
a3647e2 verified
raw
history blame contribute delete
489 Bytes
{
"architectures": [
"LLaMAForHF"
],
"batch_size": 64,
"clip_grad_norm": 0.5,
"context_window": 64,
"dim": 192,
"dropout": 0.1,
"epochs": 5,
"learning_rate": 0.0001,
"max_lr": 0.0003,
"max_seq_len": 128,
"model_type": "llama",
"num_heads": 8,
"num_layers": 4,
"test_split": 0.1,
"torch_dtype": "float32",
"train_split": 0.8,
"transformers_version": "4.41.2",
"val_split": 0.1,
"vocab_size": 2000,
"warmup_steps": 1000,
"weight_decay": 0.1
}