EPOCH=3
NUM_GPUS=2

BASE_MODEL=Qwen3-8B
MODEL_ORG="Qwen"
TRAIN_DATASET=qwen3-8B-vanilla-no-think-different-tag

CHECK_POINT_ROOT_PATH="/path/to/home/checkpoints"
CONVERTED_CHECK_POINT_ROOT_PATH="/path/to/home/converted_checkpoint"

SYSTEM_PROMPT="/no_think You are a highly capable assistant. Your task is to estimate the output of the given Python code. The reasoning process and output are enclosed within <think></think> and <answer></answer> tags, respectively, i.e., <think> reasoning process here </think> <answer> output here </answer>" \

LATEST_ITERATION_FILE="${CHECK_POINT_ROOT_PATH}/${BASE_MODEL}/${TRAIN_DATASET}_lr_2e-5-minlr_4e-6_GB_64_${EPOCH}epoch/latest_iteration.txt"

if [ -f "$LATEST_ITERATION_FILE" ]; then
  ITERATION=$(cat "$LATEST_ITERATION_FILE")
else
  ITERATION=0
fi

FORMATTED_ITERATION=$(printf "iter_%07d" "$ITERATION")
INFERENCE_MODEL_PATH="${MODEL_ORG}/${BASE_MODEL}"

USE_GUIDED_DECODING=false
