#! /usr/bin/env bash

# set -xe
# set -o pipefail

# ARES DEBUG
export ARES_LOG_NAME=PPO
export ARES_LOG_LEVEL=DEBUG

# TORCH DEBUG
export TORCH_CPP_LOG_LEVEL=INFO
export TORCH_DISTRIBUTED_DEBUG=INFO

# Ascend toolkit environment setup
source /usr/local/Ascend/ascend-toolkit/set_env.sh
export LD_LIBRARY_PATH=$ASCEND_OPP_PATH/vendors/customize/op_api/lib/:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/driver:$LD_LIBRARY_PATH
export HCCL_IF_BASE_PORT=65100
export HCCL_RDMA_TC=100
export HCCL_RDMA_SL=3
export HCCL_ASYNC_ERROR_HANDLING=0

export ENABLE_DECOED_TP_AIV=1
export P2P_HCCL_BUFFSIZE=0

# Deterministic mode settings
export HCCL_DETERMINISTIC=True
export DETERMINISTIC_MODE=True
export HCCL_INTRA_ROCE_ENABLE=1
export HCCL_INTRA_PCIE_ENABLE=0
export A2A_EP_HCCL_BUFF_SIZE=350
export HCCL_ALGO="allgather=level0:NA;level1:pipeline/allreduce=level0:NA;level1:NHR/reducescatter=level0:NA;level1:NHR"
export HCCL_CONNECT_TIMEOUT=1200
export HCCL_EXEC_TIMEOUT=1800

# Python and system environment
export PYTHONPATH=${PYTHONPATH}:3rdparty/megatron-mlp/:./:3rdparty/MARIO_EVAL
export PATH=/usr/local/conda/bin:$PATH
export PYTORCH_CUDA_ALLOC_CONF='max_split_size_mb:512'
export TOKENIZERS_PARALLELISM=false
export OMP_NUM_THREADS=4
export HADOOP_HOME=/opt/hadoop
export CUDA_DEVICE_MAX_CONNECTIONS=1
export PYTHONINTMAXSTRDIGITS=0

# Network configuration based on link layer type
link_layer=$(cat /sys/class/infiniband/mlx5_6/ports/1/link_layer)

if [[ "${link_layer}" == "Ethernet" ]]; then
    # A100/H800-RCoE configuration
    source /workdir/export_gid_index.sh
    export NCCL_NVLS_ENABLE=0
elif [[ "${link_layer}" == "InfiniBand" ]]; then
    # H800-IB configuration
    export NCCL_IB_HCA=mlx5_0:1,mlx5_3:1,mlx5_4:1,mlx5_5:1,mlx5_6:1,mlx5_9:1,mlx5_10:1,mlx5_11:1
    export LD_LIBRARY_PATH=/usr/local/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
    export NCCL_COLLNET_ENABLE=1
    export USE_SHARP=1
fi

# Package installation
pip3 install experiment_tracking==3.6.8 -i http://pypi.example.com/simple/ --trusted-host pypi.example.com --force-reinstall --no-cache
pip3 install easydict -i http://pypi.example.com/simple/ --trusted-host pypi.example.com --force-reinstall --no-cache --no-deps
pip3 install datasets -i http://pypi.example.com/simple/ --trusted-host pypi.example.com --force-reinstall --no-cache --no-deps
pip3 install torchtyping -i http://pypi.example.com/simple/ --trusted-host pypi.example.com --force-reinstall --no-cache --no-deps
pip3 install typeguard -i http://pypi.example.com/simple/ --trusted-host pypi.example.com --force-reinstall --no-cache --no-deps
pip3 install pandas -i http://pypi.example.com/simple/ --trusted-host pypi.example.com --force-reinstall --no-cache --no-deps
pip3 install triton -i http://pypi.example.com/simple/ --trusted-host pypi.example.com --force-reinstall --no-cache --no-deps
pip3 install pyarrow -i http://pypi.example.com/simple/ --trusted-host pypi.example.com --force-reinstall --no-cache --no-deps
pip3 install multiprocess -i http://pypi.example.com/simple/ --trusted-host pypi.example.com --force-reinstall --no-cache --no-deps
pip3 install dill -i http://pypi.example.com/simple/ --trusted-host pypi.example.com --force-reinstall --no-cache --no-deps
pip3 install xxhash -i http://pypi.example.com/simple/ --trusted-host pypi.example.com --force-reinstall --no-cache --no-deps
pip3 install dynamo -i http://pypi.example.com/simple/ --trusted-host pypi.example.com --force-reinstall --no-cache --no-deps

# Additional package installations
pip3 install experiment_tracking==3.6.8 jieba -i http://pypi.example.com/simple/ --trusted-host pypi.example.com --force-reinstall --no-cache
pip3 install langdetect -i http://pypi.example.com/simple/ --trusted-host pypi.example.com
pip3 install antlr4-python3-runtime==4.11.1 -i http://pypi.example.com/simple/ --trusted-host pypi.example.com
pip3 install word2number==1.1 -i http://pypi.example.com/simple/ --trusted-host pypi.example.com
pip3 install transformers==4.44.2 -i http://pypi.example.com/simple/ --trusted-host pypi.example.com
pip3 install sentencepiece openai==1.47.1 msgspec==0.18.6 gguf==0.10.0 cloudpickle==3.0.0 mistral-common==1.4.3 -i http://pypi.example.com/simple/ --trusted-host pypi.example.com
pip3 install timeout-decorator==0.5.0 -i http://pypi.example.com/simple/ --trusted-host pypi.example.com

# Execute the training script
THIS_PATH=$(cd $(dirname "$0");pwd)
exec python3 -m ares.runner.general_runner --config-py-file gspo_config.py --rpc-timeout=720000.0