export CUDA_VISIBLE_DEVICES="0"
#!/bin/bash
export API_TYPE='xxxx'
export DEPLOYMENT='xxx'
export ENDPOINT='xxxxx'
export VERSION='xxxx'
export API_KEY='xxxx'
export KEY_HF="xxxxx"
export CUDA_LAUNCH_BLOCKING=0
export HF_HOME="/cm/shared/anonymous/LibMoE/evaluate"
export TMPDIR="/cm/shared/anonymous/tmp"
export TOOLKIT_DIR="/cm/shared/anonymous"  # Path to the LibMoE directory
export PYTHONPATH="/cm/shared/anonymous/LibMoE/moe_pretrain_model":$PYTHONPATH
export tensorboard="/home/anonymous/miniconda3/envs/moe/lib/python3.9/site-packages/tensorboard"
gpu_list="${CUDA_VISIBLE_DEVICES:-0}"
IFS=',' read -ra GPULIST <<< "$gpu_list"

NUM_GPUS=$(echo $CUDA_VISIBLE_DEVICES | tr ',' '\n' | wc -l)

# Set the master address (address of the master server, use master server IP or localhost)
MASTER_ADDR="127.0.0.3"  # Example with localhost, change as needed

# Set the master port (default port is 12345, can be changed if needed)
MASTER_PORT=12313

# Set the environment variable for PORT if needed (if not using default value in code)
export MASTER_PORT=$MASTER_PORT
cd /cm/shared/anonymous/LibMoE/moe_pretrain_model
# Run the distributed training using torch.distributed.run (instead of torch.distributed.launch)
python /cm/shared/anonymous/LibMoE/moe_pretrain_model/paper/moe_universal/run_tests.py