#!/bin/bash

# choose XPID from train_scripts/, find XPID for the algorithm, and subsitute this

XPID="ued-BipedalWalker-Adversarial-Easy-v0-domain_randomization-noexpgrad-lr0.0003-epoch5-mb32-v0.5-gc0.5-henv0.01-ha0.001-plr0.9-rho0.5-n1000-st0.5-positive_value_loss-rank-t0.1-editor1.0-random-n3-baseeasy-tl_0"


TEST_ENVS="BipedalWalker-v3,BipedalWalkerHardcore-v3,BipedalWalker-Med-Stairs-v0,BipedalWalker-Med-PitGap-v0,BipedalWalker-Med-StumpHeight-v0,BipedalWalker-Med-Roughness-v0"


# following is the example training model you want to evaluate, input the path of its logging direcotry

RUN_LOGS=(
  "logs/bw_path_1/"
)

echo "[INFO] XPID=${XPID}"
echo "[INFO] TEST_ENVS=${TEST_ENVS}"
echo "[INFO] RUN_LOGS count = ${#RUN_LOGS[@]}"
echo

for RUN_LOG in "${RUN_LOGS[@]}"; do

  TAG="$(basename "${RUN_LOG}")"

  echo "============================================================"
  echo "[RUN] log_dir=${RUN_LOG}"
  echo "[RUN] tag=${TAG}"
  echo "============================================================"

  python -m eval_bw_general \
    --xpid="${XPID}" \
    --env_name=BipedalWalker-Adversarial-Easy-v0 \
    --use_gae=True \
    --gamma=0.99 \
    --gae_lambda=0.9 \
    --seed=55 \
    --num_control_points=12 \
    --recurrent_arch=lstm \
    --recurrent_agent=False \
    --recurrent_adversary_env=False \
    --recurrent_hidden_size=1 \
    --use_global_critic=False \
    --lr=0.0003 \
    --num_steps=2048 \
    --num_processes=16 \
    --num_env_steps=2000000000 \
    --ppo_epoch=5 \
    --num_mini_batch=32 \
    --entropy_coef=0.001 \
    --value_loss_coef=0.5 \
    --clip_param=0.2 \
    --clip_value_loss=False \
    --adv_entropy_coef=0.01 \
    --max_grad_norm=0.5 \
    --algo=ppo \
    --ued_algo=domain_randomization \
    --use_plr=True \
    --level_replay_prob=0.9 \
    --level_replay_rho=0.5 \
    --level_replay_seed_buffer_size=1000 \
    --level_replay_score_transform=rank \
    --level_replay_temperature=0.1 \
    --staleness_coef=0.5 \
    --no_exploratory_grad_updates=True \
    --use_editor=True \
    --level_editor_prob=1.0 \
    --level_editor_method=random \
    --num_edits=3 \
    --base_levels=easy \
    --use_accel_paired=False \
    --accel_paired_score_function=paired \
    --use_lstm=False \
    --use_behavioural_cloning=False \
    --kl_loss_coef=0.0 \
    --kl_update_step=1 \
    --use_kl_only_agent=False \
    --log_interval=10 \
    --screenshot_interval=0 \
    --log_grad_norm=True \
    --normalize_returns=True \
    --checkpoint_basis=student_grad_updates \
    --archive_interval=500 \
    --reward_shaping=True \
    --use_categorical_adv=True \
    --use_skip=False \
    --choose_start_pos=False \
    --sparse_rewards=False \
    --handle_timelimits=True \
    --level_replay_strategy=positive_value_loss \
    --test_env_names="${TEST_ENVS}" \
    --log_dir="${RUN_LOG}" \
    --test_interval=1000 \
    --test_num_episodes=128 \
    --test_num_processes=2 \
    --log_plr_buffer_stats=True \
    --log_replay_complexity=True \
    --checkpoint=True \
    --log_action_complexity=False

  echo "[DONE] ${TAG}"
  echo
done

echo "[ALL DONE]"