#!/bin/bash
#SBATCH --job-name=PPO-alpha
#SBATCH --partition=CLUSTER
#SBATCH -t 7-00:00

### e.g. request 2 nodes with 1 gpu each, totally 2 gpus (WORLD_SIZE==2)
### Note: --gres=gpu:x should equal to ntasks-per-node
#SBATCH --nodelist=compute-0-1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=20
#SBATCH --mem-per-cpu=1GB
#SBATCH -o ./output/%x-%j.out
#SBATCH -e ./output/%x-%j.err


module load python/anaconda3
module load cuda/cuda-11.4

source activate tf_tc

n_views="3"
agent_random="True False"
target_case="1 2 3 4"
action_step="0.3"  # decay to 0.01


for v in $n_views
do
  for ar in $agent_random
  do
      for as in $action_step
      do
        for tc in $target_case
        do
          python main_CV_alpha_01_decay_case.py \
          --n-views $v \
          --num-steps 512 \
          --agent-random $ar \
          --action-step $as \
          --target-case $tc \
          --learning-rate 8e-4 \
          --num-minibatches 4 \
          --seed 42 \
          --total-timesteps 80000 \
          --anneal-lr True \
          --max-grad-norm 0.5
        done
      done
  done
done