TASK_IDS=(20000 20001 20002 20003 20004 20010 20011 20012 20013 20014 20020 20021 20022 20023 20024 20030 20031 20032 20033 20034 20040 20041 20042 20043 20044 20050 20051 20052 20053 20054 20060 20061 20062 20063 20064 20070 20071 20072 20073 20074 20080 20081 20082 20083 20084 20090 20091 20100 20101 20102 20103 20104 20110 20111 20112 20113 20114 20120 20121 20122 20123 20124 20130 20131 20132 20133 20134 20140 20141 20142 20143 20144 20150 20151 20152 20153 20154 20160 20161 20162 20170 20171 20172 20173 20174 20180 20181 20182 20183 20184 20190 20191 20192 20193 20194 20200 20201 20202 20203 20204 20210 20211 20220 20221 20222 20223 20224 20230 20231 20232 20233 20240 20241 20242 20243 20244 20245)
export CALC_AVAILABLE=False

source env_setup_wa.sh
TYPE="auto"
MODEL="claude-sonnet-4-20250514-reasoning"

for task_id in "${TASK_IDS[@]}"
do
RESULT_DIR="results/shopping/${TYPE}/${MODEL}/webchorearena.${task_id}"

  if [ -d "$RESULT_DIR" ]; then
    echo "Skipping ${task_id} (already exists)"
    continue
  fi
python run.py --action_space webarena --max_new_tokens 8000 --tips True --observation_type ${TYPE} --task_name webchorearena.${task_id} --model_name ${MODEL} --result_dir results/shopping/${TYPE}/${MODEL}/
done
