SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
PROJECT_ROOT=$( cd -- "$SCRIPT_DIR/../../.." &> /dev/null && pwd )

source "$PROJECT_ROOT/.venv/bin/activate"

cd "$PROJECT_ROOT"

NUM_JOBS=20
SKIP_EVAL=1
NUM_ENVS=256
LOAD_EXISTING=0
SHAPE="medium"
NUM_POLICIES=10000
SEEDS=25
ENV_NAME="MountainCarContinuous-v0"

LEARNING_RATE=0.0001
LATENT_DIM=(1 2 3)

export OMP_NUM_THREADS=$NUM_JOBS
SEED=100
: '
for ((i=0; i<$SEEDS; i++)); do
  echo "--> Running with Seed: $SEED"
  echo "Generate Dataset"

  ARGS=(
    "--policy_shape" "$SHAPE"
    "--param_range" "2.5"
    "--num_policies" "$NUM_POLICIES"
    "--chunk_size" "$NUM_POLICIES"
    "--seed" "$SEED"
    "--num_envs" "$NUM_ENVS"
    "--num_jobs" "$NUM_JOBS"
    "--skip_eval"
  )
  python scripts/generate_policies.py "${ARGS[@]}"

  echo "Select Policies"
  
  ARGS=(
          "--source_dir" "./generated_policies/${ENV_NAME}_${SHAPE}_${NUM_POLICIES}_seed_${SEED}"
          "--k_neighbors" "15"
          "--num_states" "3000"
          "--num_jobs" "$NUM_JOBS"
          "--skip_stats"
        )
  python ./scripts/rejection_sampling.py "${ARGS[@]}"
  ((SEED++))
done

python scripts/evaluate_selected.py --seed 0 --num_envs 500 --num_jobs 2 --filter_seeds 

SEED=100
for ((i=0; i<$SEEDS; i++)); do
  for latent_dim in "${LATENT_DIM[@]}"; do
    if [ "$latent_dim" -eq 1 ]; then
      NUM_POINTS=100
      NUM_ENVS=100
    elif [ "$latent_dim" -eq 2 ]; then
      NUM_POINTS=50
      NUM_ENVS=500
    elif [ "$latent_dim" -eq 3 ]; then
      NUM_POINTS=17
      NUM_ENVS=500
    fi
    python scripts/train_autoencoder.py \
      --source_dir ./selected_policies/"$ENV_NAME"_"$SHAPE"_"$NUM_POLICIES"_seed_"$SEED" \
      --seed $SEED \
      --learning_rate $LEARNING_RATE \
      --latent_dim $latent_dim \
      --epochs 50 \
      --percentile 0.1
    echo "Testing latent space"
    python scripts/test_latent.py \
      --source_dir ./trained_autoencoders_keep_10p_lr_"$LEARNING_RATE"_dim_"$latent_dim"/"$ENV_NAME"_"$SHAPE"_"$NUM_POLICIES"_seed_"$SEED" \
      --seed $SEED \
      --num_points $NUM_POINTS \
      --num_envs  $NUM_ENVS \
      --num_jobs $NUM_JOBS
  done
  ((SEED++))
done
'

POPSIZE=4
CENTER_LR=0.05
STDEV=0.6
REWARDS=("height" "left" "standard" "speed")

for reward in "${REWARDS[@]}"; do
  SEED=100  
  for ((i=0; i<$SEEDS; i++)); do
    for latent_dim in "${LATENT_DIM[@]}"; do
      python scripts/run_pgpe.py \
        --source_dir ./trained_autoencoders_keep_10p_lr_"$LEARNING_RATE"_dim_"$latent_dim"/"$ENV_NAME"_"$SHAPE"_"$NUM_POLICIES"_seed_"$SEED" \
        --popsize $POPSIZE \
        --num_envs $POPSIZE \
        --seed $SEED \
        --center_learning_rate $CENTER_LR \
        --stdev_init $STDEV \
        --reward_type $reward \
        --stdev_learning_rate 0.1 \
        --num_generations 50 \
        --num_runs 1
    done
    ((SEED++))
  done
done


echo "All policies generated."