description: bitnet

target:
  service: sing
  name: msroctovc
  resource_group: gcr-singularity-octo
  # workspace_name: Workspace_NLC
  workspace_name: NLC_Workspace

environment:
  image: amlt-sing/acpt-2.3.1-py3.10-cuda12.1
  setup:
  - echo "master_addr:" "$$MASTER_ADDR"
  - echo "master_port:" $$MASTER_PORT
  - echo "node_rank:" $$OMPI_COMM_WORLD_RANK



code:
  local_dir: $CONFIG_DIR/..

storage:
  qilongma:
    storage_account_name: msranlpintern
    container_name: qilongma

search:
  job_template:
    name: sleep
    sku: 1x40G8
    identity: managed
    mpi: True
    process_count_per_node: 1
    command:
    - echo $${rank}
    - sleep infinity
    submit_args:
      env:
        {"SINGULARITY_MPI_ENV":"-mca pml ucx --mca btl ^vader,tcp,openib -x NCCL_SOCKET_IFNAME=bond0 -x NCCL_IB_HCA=mlx5_0,mlx5_3,mlx5_4,mlx5_5,mlx5_6,mlx5_9,mlx5_10,mlx5_11 -x NCCL_DEBUG=INFO"}
    tags: [Project_Name:1.58-bit-LLMs, ProjectID:PRJ-0349-A54, Experiment:BitNet-scaling]

  type: grid
  max_trials: 500
  params:
    - name: rank
      spec: discrete
      values: ['1']

