ShaneEP77 commited on
Commit
ed5f224
·
1 Parent(s): ba79911

Upload ds_config.json

Browse files
Files changed (1) hide show
  1. ds_config.json +50 -0
ds_config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fp16": {
3
+ "enabled": "auto",
4
+ "loss_scale": 0,
5
+ "loss_scale_window": 1000,
6
+ "initial_scale_power": 16,
7
+ "hysteresis": 2,
8
+ "min_loss_scale": 1
9
+ },
10
+ "optimizer": {
11
+ "type": "AdamW",
12
+ "params": {
13
+ "lr": "auto",
14
+ "betas": "auto",
15
+ "eps": "auto",
16
+ "weight_decay": "auto"
17
+ }
18
+ },
19
+ "scheduler": {
20
+ "type": "WarmupLR",
21
+ "params": {
22
+ "warmup_min_lr": "auto",
23
+ "warmup_max_lr": "auto",
24
+ "warmup_num_steps": "auto"
25
+ }
26
+ },
27
+ "zero_optimization": {
28
+ "stage": 3,
29
+ "allgather_partitions": true,
30
+ "allgather_bucket_size": 2e8,
31
+ "overlap_comm": true,
32
+ "reduce_scatter": true,
33
+ "reduce_bucket_size": 2e8,
34
+ "contiguous_gradients": true,
35
+ "offload_optimizer": {
36
+ "device": "cpu"
37
+ },
38
+ "offload_param": {
39
+ "device": "cpu"
40
+ },
41
+ "stage3_gather_16bit_weights_on_model_save": true
42
+ },
43
+ "gradient_accumulation_steps": "auto",
44
+ "gradient_clipping": "auto",
45
+ "communication_data_type": "fp32",
46
+ "steps_per_print": 1000000000000000,
47
+ "train_batch_size": "auto",
48
+ "train_micro_batch_size_per_gpu": "auto",
49
+ "wall_clock_breakdown": false
50
+ }