#!/bin/bash

mkdir -p logs

for p in $(seq 0 0.1 1); do
  p_tag=$(printf "%.1f" "$p")
  ts=$(date +%Y%m%d_%H%M%S)
  LOG=./logs/hopper_final_p${p_tag}_${ts}.log

  echo "===== Running p=${p_tag}, log=${LOG} ====="

  python -u ./rl/RAPC-PPO.py \
    --EXP_NAME='HopperAvoidCeiling' \
    --DIR="hopper_avoid_ceiling_p${p_tag}" \
    --NUM_ENVS=512 \
    --NUM_STEPS=400 \
    --TOTAL_TIMESTEPS=80_000_000 \
    --STEP_SCAN=4 \
    --UPDATE_EPOCHS=10 \
    --NUM_MINIBATCHES=32 \
    --TASK_PROB="${p_tag}" \
    --GAMMA_ENERGY=0.99 \
    --GAMMA_REACH_INIT=0.999 \
    --GAMMA_REACH_FINAL=0.999 \
    --GAE_LAMBDA=0.95 \
    --CLIP_EPS=0.2 \
    --ENT_COEF=0.01 \
    --VF_COEF=2.0 \
    --MAX_GRAD_NORM=0.5 \
    --POLICY_LR=3e-4 \
    --VALUE_LR=1e-3 \
    --LAGRANGE_LR=5e-5 \
    --PHI_LR=1e-3 \
    --CUDA_USE=2 \
    --ANNEAL_ENT \
    --NAME="hopper_final_p${p_tag}" \
    > "$LOG" 2> >(grep -v "ptx86" >> "$LOG")

  echo "===== Finished p=${p_tag} ====="
done
