main_process_port: 13378

compute_environment: LOCAL_MACHINE
debug: false

distributed_type: DEEPSPEED
downcast_bf16: 'auto'
machine_rank: 0
main_training_function: main
mixed_precision: 'bf16'
num_machines: 2
num_processes: 2
rdzv_backend: c10d
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false

deepspeed_config:
  gradient_clipping: 1.0
  zero_optimization:
    stage: 2
    offload_optimizer:
      device: "none"
    offload_param:
      device: "none"
    overlap_comm: true
    contiguous_gradients: true
  bf16:
    enabled: true
  fp16:
    enabled: false
  deepspeed_multinode_launcher: standard
