#!/bin/bash
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7

python evaluate_tau_bench_internalization.py \
   --split retail \
   --model /checkpoints/jiateng-sandbox/saves/qwen3-32b/tau_bench_self_cot_cpt_sft \
   --max_tasks 115 \
   --start_index 0 