export WANDB_PROJECT="stlm-logic-reasoning"

################### runtime env ###################
# export TOKENIZERS_PARALLELISM=false
# export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:64
export NCCL_P2P_DISABLE=1          # saves GPU RAM on single‑node jobs
export NCCL_IB_DISABLE=1
export TORCH_NCCL_BLOCKING_WAIT=1
export TORCH_NCCL_ASYNC_ERROR_HANDLING=1
export NCCL_DEBUG=INFO
export NCCL_DEBUG_SUBSYS=ALL
export TORCH_DISTRIBUTED_DEBUG=INFO
###################################################

# If there are cuda  mismatch issues
export CUDA_HOME=$CONDA_PREFIX

# # # 2. Add this debug line to verify the variable is set correctly.
# # echo "--- SCRIPT DEBUG: CUDA_HOME is set to: $CUDA_HOME ---"

# # # 2. Tell the C++ linker where to find CUDA libraries at COMPILE time.
# # #    Use ${VAR:-} to handle cases where the variable is initially unset.
export LIBRARY_PATH="$CUDA_HOME/lib:${LIBRARY_PATH:-}"

# # # 3. Tell the system's dynamic linker where to find CUDA libraries at RUN time.
# # #    Use ${VAR:-} here as well for safety.
export LD_LIBRARY_PATH="$CUDA_HOME/lib:${LD_LIBRARY_PATH:-}"

# TRAIN_DATA_PATH="/mnt/data/data/clinical-reasoning-dataset/final_prepared_dataset_mimic_test/0000_final_combined_dataset_test.feather"
# MODEL_NAME="meta-llama/Llama-3.2-1B-Instruct"
# MODEL_NAME="Qwen/Qwen2.5-0.5B-Instruct"
# MODEL_NAME="Qwen/Qwen2.5-1.5B-Instruct"

# For saelens
# MODEL_NAME="Qwen/Qwen2-1.5B-Instruct"
# MODEL_NAME="/mnt/shared/data/stlm-logic/grpo_v2_expt_lr_1e-6_best_move/Qwen_Qwen2-1.5B-Instruct_nl_canconical-symmetry-grouping_best_move/checkpoint-300"


# MODEL_NAME="/mnt/shared/data/stlm-logic/grpo_v2_expt_updated/meta-llama_Llama-3.2-1B-Instruct_special_updated"
# MODEL_NAME="/mnt/shared/data/stlm-logic/grpo_v2_expt_updated/Qwen_Qwen2.5-0.5B-Instruct_nl_canconical-symmetry-grouping/checkpoint-150"

# MODEL_NAME="/mnt/data/data/stlm-logic/grpo_v2_expt/Qwen_Qwen2.5-1.5B-Instruct_special_random_80_10_10/checkpoint-200"

# MODEL_NAME="/mnt/shared/data/stlm-logic/grpo_v2_expt_lr_1e-6/Qwen_Qwen2.5-1.5B-Instruct_nl_canconical-symmetry-grouping/checkpoint-750"
# MODEL_NAME="/mnt/shared/data/stlm-logic/grpo_v2_expt_lr_1e-6/Qwen_Qwen2.5-1.5B-Instruct_nl_random_80_10_10/checkpoint-750"

# In best_move folder the results without the "best_move" prefix are from already trained models on legal move
# MODEL_NAME="/mnt/shared/data/stlm-logic/grpo_v2_expt_lr_1e-6_best_move/Qwen_Qwen2.5-1.5B-Instruct_nl_random_80_10_10/checkpoint-1350"
# MODEL_NAME="/mnt/shared/data/stlm-logic/grpo_v2_expt_lr_1e-6_best_move/Qwen_Qwen2.5-1.5B-Instruct_nl_canconical-symmetry-grouping/checkpoint-1650"

# MODEL_NAME="/mnt/shared/data/stlm-logic/grpo_v2_expt_lr_1e-6_best_move/Qwen_Qwen2.5-0.5B-Instruct_nl_canconical-symmetry-grouping_best_move/checkpoint-1800"
# MODEL_NAME="/mnt/shared/data/stlm-logic/grpo_v2_expt_lr_1e-6/Qwen_Qwen2.5-0.5B-Instruct_nl_random_80_10_10/checkpoint-600"

# best move from pretrained continued after stopping
# MODEL_NAME="/mnt/shared/data/stlm-logic/grpo_v2_expt_lr_1e-6_best_move_from_legal/Qwen_Qwen2.5-0.5B-Instruct_nl_random_80_10_10/checkpoint-750"
# MODEL_NAME="/mnt/shared/data/stlm-logic/grpo_v2_expt_lr_1e-6_best_move_from_legal/Qwen_Qwen2.5-0.5B-Instruct_nl_canconical-symmetry-grouping_best_move/checkpoint-2100"

# MODEL_NAME="/mnt/shared/data/stlm-logic/grpo_v2_expt_lr_1e-6/meta-llama_Llama-3.2-1B-Instruct_nl_canconical-symmetry-grouping/checkpoint-600"
# MODEL_NAME="/mnt/shared/data/stlm-logic/grpo_v2_expt_lr_1e-6/meta-llama_Llama-3.2-1B-Instruct_nl_random_80_10_10/checkpoint-450"

# Continue training for meta best move from legal after 1950 steps
# Saved in the same folder as best move from pretrained for now but does not contain the _best_move suffix
# MODEL_NAME="/mnt/shared/data/stlm-logic/grpo_v2_expt_lr_1e-6_best_move/meta-llama_Llama-3.2-1B-Instruct_nl_random_80_10_10/checkpoint-1950"
MODEL_NAME="/mnt/shared/data/stlm-logic/grpo_v2_expt_lr_1e-6_best_move/meta-llama_Llama-3.2-1B-Instruct_nl_canconical-symmetry-grouping/checkpoint-1050"

export TRL_VLLM_GROUP_PORT=61036

CUDA_VISIBLE_DEVICES=2 trl vllm-serve --model $MODEL_NAME --gpu-memory-utilization 0.20 --dtype auto --port 8006 --tensor_parallel_size 1