# https://huggingface.co/docs/transformers/v4.32.1/en/main_classes/trainer#using-accelerate-launcher-with-trainer compute_environment: LOCAL_MACHINE deepspeed_config: deepspeed_multinode_launcher: standard gradient_accumulation_steps: 1 offload_optimizer_device: none offload_param_device: none zero3_init_flag: false zero_stage: 2 gradient_clipping: 1.0 distributed_type: DEEPSPEED downcast_bf16: 'no' gpu_ids: all machine_rank: 0 # change this for each node main_process_ip: localhost # the machines on Azure is inter-connected, so you can directly config it according to `~/.ssh/config` main_process_port: 11451 # change this as you like main_training_function: main mixed_precision: fp16 num_machines: 1 # change this for all nodes num_processes: 1 # changet this for all nodes. all the gpu processes among the nodes. rdzv_backend: static same_network: true tpu_env: [] tpu_use_cluster: false tpu_use_sudo: false use_cpu: false