#!/bin/bash
#
#SBATCH -N 1
#SBATCH -t 1-00:00
#SBATCH -o ./log/%j.out
#SBATCH -e ./log/%j.err
#SBATCH -a 0-9

source ./env.sh

if [ -z $SHUFFLE ]; then
    SHUFFLE=""
else
    SHUFFLE="--shuffle"
fi

if [ -z $EPOCH ]; then
    EPOCH=200
fi

if [ -z $N_ROUNDS ]; then
    N_ROUNDS=100
fi

if [ -z $EPS_EPISODES ]; then
    EPS_EPISODES=0.8
fi

if [ -z $EPS_STEPS ]; then
    EPS_STEPS=0.4
fi

if [ -z $VICTIM_ITERS ]; then
    VICTIM_ITERS=30
fi

if [ -z $VICTIM_LR ]; then
    VICTIM_LR=0.00003
fi

if [ -z $ATTACKER_ITERS ]; then
    ATTACKER_ITERS=20
fi

if [ -z $ATTACKER_LR ]; then
    ATTACKER_LR=0.01
fi

if [ -z $BUDGET ]; then
    BUDGET=5.0
fi

SEED=$SLURM_ARRAY_TASK_ID

setting_params="--env miniworld --n_envs 60000 --arch 1 $SHUFFLE --context_len 250 --n_actions 4 --n_epochs 1000 --epoch $EPOCH"
adv_params="--n_envs_eval 40 --n_rounds $N_ROUNDS --eps_episodes $EPS_EPISODES --eps_steps $EPS_STEPS --victim_iters $VICTIM_ITERS --victim_lr $VICTIM_LR --attacker_iters $ATTACKER_ITERS --attacker_lr $ATTACKER_LR --max_poison_diff $BUDGET"

xvfb-run -a -s "-screen 0 1024x768x24 -ac +extension GLX +render -noreset" python3 mdp_train_adv.py \
    $setting_params \
    $adv_params \
    --attacker_against $AGAINST \
    --seed $SEED
