#!/bin/bash

# Set the environment variables.
export HF_ALLOW_CODE_EVAL=1
export LD_LIBRARY_PATH="$CONDA_PREFIX/lib:$LD_LIBRARY_PATH"
export TOKENIZERS_PARALLELISM=true

## Reference: https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html#nccl-debug
export NCCL_DEBUG=INFO

## Reference: https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html#nccl-p2p-level
# export NCCL_P2P_LEVEL=LOC
# export NCCL_P2P_LEVEL=PHB
export NCCL_P2P_LEVEL=SYS

## Reference: https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html#nccl-p2p-disable
# export NCCL_P2P_DISABLE=1

## Reference: https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html#nccl-shm-disable
# export NCCL_SHM_DISABLE=1

GPU_IDS="0,1"

# If you want to run more than one job on the same machine, you need to specify different MASTER_PORT for each job.
MASTER_PORT=48763

# The config file path.
# CONFIG_PATH="configs/tasks/c4/0d3b_1024.yaml"
# CONFIG_PATH="configs/tasks/c4/0d03b_1024.yaml"
CONFIG_PATH="configs/tasks/c4/0d3b_2048.yaml"
# CONFIG_PATH="configs/tasks/c4/0d03b_2048.yaml"
# CONFIG_PATH="configs/tasks/lamini/0d1b_1024.yaml"
# CONFIG_PATH="configs/tasks/lamini/0d01b_1024.yaml"
# CONFIG_PATH="configs/tasks/lamini/0d1b_2048.yaml"
# CONFIG_PATH="configs/tasks/lamini/0d01b_2048.yaml"

# The molos model config file path.
# MODEL_CONFIG_PATH="configs/molos/sequence_1o4_0d6_2048_aopw.json"
# MODEL_CONFIG_PATH="configs/molos/token_1o4_0d6_0d5_2048_aopw.json"
# MODEL_CONFIG_PATH="configs/molos/token_1o4_0d6_0d25_2048_aopw.json"
# MODEL_CONFIG_PATH="configs/molos/token_1o4_0d6_0d75_2048_aopw.json"
# MODEL_CONFIG_PATH="configs/molos/token_1o4_0d6_1d0_2048_aopw.json"
# MODEL_CONFIG_PATH="configs/molos/token_2o4_0d6_0d01_2048_aopw.json"
# MODEL_CONFIG_PATH="configs/molos/token_2o4_0d6_0d25_1024_aopw.json"
# MODEL_CONFIG_PATH="configs/molos/token_2o4_0d6_0d25_2048_aopw.json"
# MODEL_CONFIG_PATH="configs/molos/token_2o4_0d25_0d1_2048_aopw.json"
MODEL_CONFIG_PATH="configs/molos/token_2o4_0d25_0d5_2048_aopw.json"

# The DeepSpeed config file path.
DS_CONFIG_PATH="configs/deepspeed/zero-3_c4_0d3b_2_5e8_1e9.json"
# DS_CONFIG_PATH="configs/deepspeed/zero-3_c4_0d03b_2_5e8_1e9.json"

# The LoRA config file path.
# LORA_CONFIG_PATH="configs/lora/v0.json"
# LORA_CONFIG_PATH="configs/lora/v1.json"
# LORA_CONFIG_PATH="configs/lora/v2.json"
# LORA_CONFIG_PATH="configs/lora/v3.json"
LORA_CONFIG_PATH="configs/lora/v4.json"
# LORA_CONFIG_PATH="configs/lora/v5.json"

echo "GCC Version: $(gcc --version | head -n 1)"
echo "G++ Version: $(g++ --version | head -n 1)"
echo "CUDA Toolkit Version: $(nvcc --version | grep release | sed 's/.*release //; s/,//')"

# deepspeed \
#     --include=localhost:${GPU_IDS} \
#     --master_port ${MASTER_PORT} \
#     ds_main.py \
#     -c ${CONFIG_PATH} \
#     -mc ${MODEL_CONFIG_PATH} \
#     -dsc ${DS_CONFIG_PATH} \
#     -m train \
#     -mt molos \
#     -cei -1

deepspeed \
    --include=localhost:${GPU_IDS} \
    --master_port ${MASTER_PORT} \
    ds_main.py \
    -c ${CONFIG_PATH} \
    -mc ${MODEL_CONFIG_PATH} \
    -dsc ${DS_CONFIG_PATH} \
    -lc ${LORA_CONFIG_PATH} \
    -m train \
    -mt molos \
    -cei -1
