OMP_NUM_THREADS=1 torchrun \
  --nnodes=1 \
  --nproc_per_node=8 \
  --node_rank=0 \
  --rdzv_endpoint="address:port" \
  train.py --use_bf16 \
    --fake_data_path "your_fake_data_path" \
    --fake_file_path "your_fake_file_path" \