|
{ |
|
"train_micro_batch_size_per_gpu": 2, |
|
"gradient_accumulation_steps": 1, |
|
"steps_per_print": 100, |
|
"gradient_clipping": 1.0, |
|
"fp16": { |
|
"enabled": true, |
|
"loss_scale": 0, |
|
"loss_scale_window": 2000, |
|
"hysteresis": 2, |
|
"min_loss_scale": 0.0 |
|
}, |
|
"zero_optimization": { |
|
"stage": 2, |
|
"reduce_bucket_size": 50000000, |
|
"overlap_comm": true |
|
}, |
|
"sparse_attention": { |
|
"mode": "fixed", |
|
"block": 16, |
|
"different_layout_per_head": true, |
|
"num_local_blocks": 8, |
|
"num_global_blocks": 1, |
|
"attention": "unidirectional", |
|
"horizontal_global_attention": false, |
|
"num_different_global_patterns": 8 |
|
} |
|
} |
|
|