{ | |
"data": { | |
"cache_dir": "data", | |
"train": "openwebtext", | |
"valid": "wikitext103" | |
}, | |
"eval": { | |
"batch_size": 512 | |
}, | |
"graph": { | |
"file": "data", | |
"type": "simplex" | |
}, | |
"model": { | |
"cond_dim": 128, | |
"dropout": 0.1, | |
"embedding": 0, | |
"hidden_size": 768, | |
"length": 1024, | |
"n_blocks": 12, | |
"n_heads": 12, | |
"name": "small", | |
"scale_by_sigma": false, | |
"score_strategy": 0, | |
"type": "ddit" | |
}, | |
"ngpus": 16, | |
"noise": { | |
"sigma_max": 20, | |
"sigma_min": 0.0001, | |
"type": "geometric" | |
}, | |
"optim": { | |
"beta1": 0.9, | |
"beta2": 0.999, | |
"eps": 1e-08, | |
"grad_clip": 1.0, | |
"lr": 0.0003, | |
"optimizer": "AdamW", | |
"warmup": 2500, | |
"weight_decay": 0 | |
}, | |
"sampling": { | |
"corr_steps": 1, | |
"corrector": "none", | |
"noise_removal": false, | |
"predictor": "euler", | |
"roc": 0.01, | |
"steps": 1000 | |
}, | |
"tokens": 50257, | |
"training": { | |
"accum": 1, | |
"batch_size": 512, | |
"ema": 0.9999, | |
"eval_freq": 100, | |
"log_freq": 50, | |
"n_iters": 1300001, | |
"snapshot_freq": 10000, | |
"snapshot_freq_for_preemption": 10000, | |
"snapshot_sampling": true | |
} | |
} |