#!/usr/bin/env bash

AGENT="wsrl"
CONFIG="configs/train_config.py:kitchen_wsrl"
PROJECT="kichen-partial-baselines"
NUM_OFFLINE=250_000
NUM_ONLINE=400_000
ENV="kitchen-partial-v0"

SEED_VALUES=(10 20 30 40 50)

for SEED in "${SEED_VALUES[@]}"; do
  echo "==== Running with --seed ${SEED} ===="
  python3 finetune.py \
    --exp_name "kitchen_partial_${AGENT}" \
    --agent "calql" \
    --config "$CONFIG" \
    --project "$PROJECT" \
    --reward_scale 1.0 \
    --reward_bias -4.0 \
    --num_offline_steps "$NUM_OFFLINE" \
    --num_online_steps "$NUM_ONLINE" \
    --env "$ENV" \
    --seed "$SEED" \
    --batch_size 512 \
    --utd 16 \
    --warmup_steps 5000 \
  echo
done

AGENT="iql"
CONFIG="configs/train_config.py:kitchen_iql"

for SEED in "${SEED_VALUES[@]}"; do
  echo "==== Running with --seed ${SEED} ===="
  python3 finetune.py \
    --exp_name "kitchen_partial_${AGENT}" \
    --agent "$AGENT" \
    --config "$CONFIG" \
    --project "$PROJECT" \
    --reward_scale 1.0 \
    --reward_bias -4.0 \
    --num_offline_steps "$NUM_OFFLINE" \
    --num_online_steps "$NUM_ONLINE" \
    --env "$ENV" \
    --seed "$SEED" \
    --batch_size 512 \
  echo
done

AGENT="cql"
CONFIG="configs/train_config.py:kitchen_cql"

for SEED in "${SEED_VALUES[@]}"; do
  echo "==== Running with --seed ${SEED} ===="
  python3 finetune.py \
    --exp_name "kitchen_partial_${AGENT}" \
    --agent "$AGENT" \
    --config "$CONFIG" \
    --project "$PROJECT" \
    --reward_scale 1.0 \
    --reward_bias -4.0 \
    --num_offline_steps "$NUM_OFFLINE" \
    --num_online_steps "$NUM_ONLINE" \
    --env "$ENV" \
    --seed "$SEED" \
    --batch_size 512 \
    --use_redq \
  echo
done

AGENT="calql"
CONFIG="configs/train_config.py:kitchen_calql"

for SEED in "${SEED_VALUES[@]}"; do
  echo "==== Running with --seed ${SEED} ===="
  python3 finetune.py \
    --exp_name "kitchen_partial_${AGENT}" \
    --agent "$AGENT" \
    --config "$CONFIG" \
    --project "$PROJECT" \
    --reward_scale 1.0 \
    --reward_bias -4.0 \
    --num_offline_steps "$NUM_OFFLINE" \
    --num_online_steps "$NUM_ONLINE" \
    --env "$ENV" \
    --seed "$SEED" \
    --batch_size 512 \
  echo
done

AGENT="sac"
CONFIG="configs/train_config.py:kitchen_sac"

for SEED in "${SEED_VALUES[@]}"; do
  echo "==== Running with --seed ${SEED} ===="
  python3 finetune.py \
    --exp_name "kitchen_partial_${AGENT}" \
    --agent "sac" \
    --config "$CONFIG" \
    --project "$PROJECT" \
    --reward_scale 1.0 \
    --reward_bias -4.0 \
    --num_offline_steps "$NUM_OFFLINE" \
    --num_online_steps "$NUM_ONLINE" \
    --env "$ENV" \
    --seed "$SEED" \
    --batch_size 512 \
    --save_final_checkpoint \
  echo
done

AGENT="porl"
CONFIG="configs/train_config.py:kitchen_wsrl"

for RESUME_PATH in "${RESUME_PATHS[@]}"; do
  python3 finetune.py \
    --exp_name "kitchen_partial_${AGENT}" \
    --agent "calql" \
    --config "$CONFIG" \
    --project "$PROJECT" \
    --reward_scale 1.0 \
    --reward_bias -4.0 \
    --num_offline_steps "$NUM_OFFLINE" \
    --num_online_steps "$NUM_ONLINE" \
    --env "$ENV" \
    --resume_path "$RESUME_PATH" \
    --batch_size 512 \
    --utd 16 \
    --warmup_steps 20000 \
    --resume_actor_only \
    --train_critic_during_warmup \
  echo
done

AGENT="cql"
CONFIG="configs/train_config.py:kitchen_soar"

for SEED in "${SEED_VALUES[@]}"; do
  echo "==== Running with --seed ${SEED} ===="
  python3 finetune.py \
    --exp_name "kitchen_partial_soar" \
    --agent "$AGENT" \
    --config "$CONFIG" \
    --project "$PROJECT" \
    --reward_scale 1.0 \
    --reward_bias -4.0 \
    --num_offline_steps "$NUM_OFFLINE" \
    --num_online_steps "$NUM_ONLINE" \
    --env "$ENV" \
    --seed "$SEED" \
    --batch_size 512 \
    --use_redq \
    --warmup_steps 5000 \
    --conservative_penalty_annealing \
    --use_offline_data_ratio_annealing \
    --min_offline_data_ratio 0.0 \
    --max_offline_data_ratio 0.9 \
    --min_cql_alpha 0.0 \
    --anneal_interval 80000 \
    --cql_alpha_anneal_interval 160000 \
    --cql_alpha_anneal_method exponential \
  echo
done
