#!/usr/bin/env bash

AGENT="wsrl"
CONFIG="configs/train_config.py:antmaze_wsrl"
PROJECT="antmaze-ultra_diverse-baselines"
NUM_OFFLINE=1_000_000
NUM_ONLINE=400_000
ENV="antmaze-ultra-diverse-v2"

SEED_VALUES=(10 20 30 40 50)

for SEED in "${SEED_VALUES[@]}"; do
  echo "==== Running with --seed ${SEED} ===="
  python3 finetune.py \
    --exp_name "antmaze_ultra_diverse_${AGENT}" \
    --agent "calql" \
    --config "$CONFIG" \
    --project "$PROJECT" \
    --reward_scale 10.0 \
    --reward_bias -5.0 \
    --num_offline_steps "$NUM_OFFLINE" \
    --num_online_steps "$NUM_ONLINE" \
    --env "$ENV" \
    --seed "$SEED" \
    --batch_size 512 \
    --utd 16 \
    --warmup_steps 5000 \
  echo
done

AGENT="iql"
CONFIG="configs/train_config.py:antmaze_iql"

for SEED in "${SEED_VALUES[@]}"; do
  echo "==== Running with --seed ${SEED} ===="
  python3 finetune.py \
    --exp_name "antmaze_ultra_diverse_${AGENT}" \
    --agent "$AGENT" \
    --config "$CONFIG" \
    --project "$PROJECT" \
    --reward_scale 10.0 \
    --reward_bias -5.0 \
    --num_offline_steps "$NUM_OFFLINE" \
    --num_online_steps "$NUM_ONLINE" \
    --env "$ENV" \
    --seed "$SEED" \
    --batch_size 512 \
  echo
done

AGENT="cql"
CONFIG="configs/train_config.py:antmaze_cql"

for SEED in "${SEED_VALUES[@]}"; do
  echo "==== Running with --seed ${SEED} ===="
  python3 finetune.py \
    --exp_name "antmaze_ultra_diverse_${AGENT}" \
    --agent "$AGENT" \
    --config "$CONFIG" \
    --project "$PROJECT" \
    --reward_scale 10.0 \
    --reward_bias -5.0 \
    --num_offline_steps "$NUM_OFFLINE" \
    --num_online_steps "$NUM_ONLINE" \
    --env "$ENV" \
    --seed "$SEED" \
    --batch_size 512 \
    --use_redq \
  echo
done

AGENT="calql"
CONFIG="configs/train_config.py:antmaze_calql"

for SEED in "${SEED_VALUES[@]}"; do
  echo "==== Running with --seed ${SEED} ===="
  python3 finetune.py \
    --exp_name "antmaze_ultra_diverse_${AGENT}" \
    --agent "$AGENT" \
    --config "$CONFIG" \
    --project "$PROJECT" \
    --reward_scale 10.0 \
    --reward_bias -5.0 \
    --num_offline_steps "$NUM_OFFLINE" \
    --num_online_steps "$NUM_ONLINE" \
    --env "$ENV" \
    --seed "$SEED" \
    --batch_size 512 \
  echo
done

AGENT="sac"
CONFIG="configs/train_config.py:antmaze_sac"

for SEED in "${SEED_VALUES[@]}"; do
  echo "==== Running with --seed ${SEED} ===="
  python3 finetune.py \
    --exp_name "antmaze_ultra_diverse_${AGENT}" \
    --agent "sac" \
    --config "$CONFIG" \
    --project "$PROJECT" \
    --reward_scale 10.0 \
    --reward_bias -5.0 \
    --num_offline_steps "$NUM_OFFLINE" \
    --num_online_steps "$NUM_ONLINE" \
    --env "$ENV" \
    --seed "$SEED" \
    --batch_size 512 \
    --save_final_checkpoint \
  echo
done


AGENT="porl"
CONFIG="configs/train_config.py:antmaze_wsrl"

for RESUME_PATH in "${RESUME_PATHS[@]}"; do
  python3 finetune.py \
    --exp_name "antmaze_ultra_diverse_${AGENT}" \
    --agent "calql" \
    --config "$CONFIG" \
    --project "$PROJECT" \
    --reward_scale 10.0 \
    --reward_bias -5.0 \
    --num_offline_steps "$NUM_OFFLINE" \
    --num_online_steps "$NUM_ONLINE" \
    --env "$ENV" \
    --resume_path "$RESUME_PATH" \
    --batch_size 512 \
    --utd 16 \
    --warmup_steps 20000 \
    --resume_actor_only \
    --train_critic_during_warmup \
  echo
done

AGENT="cql"
CONFIG="configs/train_config.py:antmaze_soar"

for SEED in "${SEED_VALUES[@]}"; do
  echo "==== Running with --seed ${SEED} ===="
  python3 finetune.py \
    --exp_name "antmaze_ultra_diverse_soar" \
    --agent "$AGENT" \
    --config "$CONFIG" \
    --project "$PROJECT" \
    --reward_scale 10.0 \
    --reward_bias -5.0 \
    --num_offline_steps "$NUM_OFFLINE" \
    --num_online_steps "$NUM_ONLINE" \
    --env "$ENV" \
    --seed "$SEED" \
    --batch_size 512 \
    --use_redq \
    --warmup_steps 5000 \
    --use_offline_data_ratio_annealing \
    --min_offline_data_ratio 0.0 \
    --max_offline_data_ratio 0.25 \
    --anneal_interval 40000 \
  echo
done
