#!/usr/bin/env bash

AGENT="wsrl"
CONFIG="configs/train_config.py:adroit_wsrl"
PROJECT="pen-binary-baselines"
NUM_OFFLINE=40_000
NUM_ONLINE=400_000
ENV="pen-binary-v0"

SEED_VALUES=(10 20 30 40 50)

for SEED in "${SEED_VALUES[@]}"; do
  echo "==== Running with --seed ${SEED} ===="
  python3 finetune.py \
    --exp_name "pen_binary_${AGENT}" \
    --agent "calql" \
    --config "$CONFIG" \
    --project "$PROJECT" \
    --reward_scale 10.0 \
    --reward_bias 5.0 \
    --num_offline_steps "$NUM_OFFLINE" \
    --num_online_steps "$NUM_ONLINE" \
    --env "$ENV" \
    --seed "$SEED" \
    --batch_size 512 \
    --utd 16 \
    --warmup_steps 5000 \
  echo
done

AGENT="iql"
CONFIG="configs/train_config.py:adroit_iql"

for SEED in "${SEED_VALUES[@]}"; do
  echo "==== Running with --seed ${SEED} ===="
  python3 finetune.py \
    --exp_name "pen_binary_${AGENT}" \
    --agent "$AGENT" \
    --config "$CONFIG" \
    --project "$PROJECT" \
    --reward_scale 10.0 \
    --reward_bias 5.0 \
    --num_offline_steps "$NUM_OFFLINE" \
    --num_online_steps "$NUM_ONLINE" \
    --env "$ENV" \
    --seed "$SEED" \
    --batch_size 512 \
  echo
done

AGENT="cql"
CONFIG="configs/train_config.py:adroit_cql"

for SEED in "${SEED_VALUES[@]}"; do
  echo "==== Running with --seed ${SEED} ===="
  python3 finetune.py \
    --exp_name "pen_binary_${AGENT}" \
    --agent "$AGENT" \
    --config "$CONFIG" \
    --project "$PROJECT" \
    --reward_scale 10.0 \
    --reward_bias 5.0 \
    --num_offline_steps "$NUM_OFFLINE" \
    --num_online_steps "$NUM_ONLINE" \
    --env "$ENV" \
    --seed "$SEED" \
    --batch_size 512 \
    --use_redq \
  echo
done

AGENT="calql"
CONFIG="configs/train_config.py:adroit_calql"

for SEED in "${SEED_VALUES[@]}"; do
  echo "==== Running with --seed ${SEED} ===="
  python3 finetune.py \
    --exp_name "pen_binary_${AGENT}" \
    --agent "$AGENT" \
    --config "$CONFIG" \
    --project "$PROJECT" \
    --reward_scale 10.0 \
    --reward_bias 5.0 \
    --num_offline_steps "$NUM_OFFLINE" \
    --num_online_steps "$NUM_ONLINE" \
    --env "$ENV" \
    --seed "$SEED" \
    --batch_size 512 \
  echo
done

AGENT="sac"
CONFIG="configs/train_config.py:adroit_sac"

for SEED in "${SEED_VALUES[@]}"; do
  echo "==== Running with --seed ${SEED} ===="
  python3 finetune.py \
    --exp_name "pen_binary_${AGENT}" \
    --agent "sac" \
    --config "$CONFIG" \
    --project "$PROJECT" \
    --reward_scale 10.0 \
    --reward_bias 5.0 \
    --num_offline_steps "$NUM_OFFLINE" \
    --num_online_steps "$NUM_ONLINE" \
    --env "$ENV" \
    --seed "$SEED" \
    --batch_size 512 \
    --save_final_checkpoint \
  echo
done

AGENT="porl"
CONFIG="configs/train_config.py:adroit_wsrl"

for RESUME_PATH in "${RESUME_PATHS[@]}"; do
  python3 finetune.py \
    --exp_name "pen_binary_${AGENT}" \
    --agent "calql" \
    --config "$CONFIG" \
    --project "$PROJECT" \
    --reward_scale 10.0 \
    --reward_bias 5.0 \
    --num_offline_steps "$NUM_OFFLINE" \
    --num_online_steps "$NUM_ONLINE" \
    --env "$ENV" \
    --resume_path "$RESUME_PATH" \
    --batch_size 512 \
    --utd 16 \
    --warmup_steps 5000 \
    --resume_actor_only \
    --train_critic_during_warmup \
  echo
done

AGENT="cql"
CONFIG="configs/train_config.py:adroit_soar"

for SEED in "${SEED_VALUES[@]}"; do
  echo "==== Running with --seed ${SEED} ===="
  python3 finetune.py \
    --exp_name "pen_binary_soar" \
    --agent "$AGENT" \
    --config "$CONFIG" \
    --project "$PROJECT" \
    --reward_scale 10.0 \
    --reward_bias 5.0 \
    --num_offline_steps "$NUM_OFFLINE" \
    --num_online_steps "$NUM_ONLINE" \
    --env "$ENV" \
    --seed "$SEED" \
    --batch_size 512 \
    --use_redq \
    --warmup_steps 5000 \
    --conservative_penalty_annealing \
    --use_offline_data_ratio_annealing \
    --min_offline_data_ratio 0.0 \
    --max_offline_data_ratio 0.25 \
    --min_cql_alpha 0.0 \
    --anneal_interval 20000 \
    --cql_alpha_anneal_interval 40000 \
    --cql_alpha_anneal_method exponential \
  echo
done
