JingyangOu
/

radd-lambda-dce

Inference Endpoints

Model card Files Files and versions Community

radd-lambda-dce / config.json

JingyangOu's picture

update the right config.json for lambda-dce model

1c6f11b verified 6 months ago

history blame contribute delete

1.37 kB

	{
	"ngpus": 32,
	"tokens": 50257,
	"gpt_dir": "assets/gpt2-large",
	"outdir": "../output",
	"training": {
	"batch_size": 512,
	"accum": 1,
	"n_iters": 1000001,
	"snapshot_freq": 50000,
	"log_freq": 50,
	"eval_freq": 100,
	"snapshot_freq_for_preemption": 10000,
	"weight": "standard",
	"snapshot_sampling": false,
	"ema": 0.9999,
	"loss_type": "lambda_DCE"
	},
	"data": {
	"train": "openwebtext",
	"valid": "wikitext103",
	"cache_dir": "data"
	},
	"noise": {
	"type": "loglinear",
	"sigma_min": 0.0001,
	"sigma_max": 20
	},
	"sampling": {
	"predictor": "euler",
	"steps": 1024
	},
	"eval": {
	"batch_size": 512,
	"perplexity": true,
	"perplexity_batch_size": 16
	},
	"optim": {
	"weight_decay": 0.03,
	"optimizer": "AdamW",
	"lr": 0.0003,
	"beta1": 0.9,
	"beta2": 0.999,
	"eps": 1e-08,
	"warmup": 2500,
	"grad_clip": 1.0
	},
	"model": {
	"name": "small_wotsm",
	"type": "ddit_wot",
	"hidden_size": 768,
	"cond_dim": 128,
	"length": 1024,
	"n_blocks": 12,
	"n_heads": 12,
	"dropout": 0.02,
	"use_checkpoint": false,
	"dtype": "float16"
	}
	}