AUDENPATH=/apdcephfs_cq10/share_1603164/user/jamelynli/workspace/auden_refactor
LHOTSEPATH=/apdcephfs_cq10/share_1603164/user/jamelynli/workspace/lhotse
export PYTHONPATH=$AUDENPATH:$LHOTSEPATH:$PYTHONPATH

# export TORCH_DISTRIBUTED_DEBUG=OFF  # Reduce log overhead
# export NCCL_DEBUG=WARN  # Log only important NCCL issues
# export NCCL_SOCKET_IFNAME=eth1  # Use the correct network interface
# export NCCL_IB_DISABLE=0  # Enable InfiniBand if available
# export NCCL_P2P_DISABLE=0  # Allow GPU peer-to-peer
# export NCCL_ASYNC_ERROR_HANDLING=1  # Keep this for stability
# export NCCL_TIMEOUT=600  # Keep this in case of slow nodes
export OMP_NUM_THREADS=8
# export NCCL_DEBUG=INFO
export NCCL_SOCKET_IFNAME=eth1
export NCCL_IB_GID_INDEX=3
#export NCCL_IB_HCA=mlx5_1:1,mlx5_2:1,mlx5_3:1
export NCCL_IB_SL=3
export NCCL_CHECK_DISABLE=1
export NCCL_P2P_DISABLE=0
export NCCL_LL_THRESHOLD=16384
export NCCL_IB_CUDA_SUPPORT=1
export NCCL_IB_DISABLE=0
export HYDRA_FULL_ERROR=1
