# run as .sh env feedback cuda seed

SEED=$4
MAXFEEDBACK=$2
MYCUDA_DEVID=$3
MY_ENV_CONFIG_NAME=$1 # single word strings can be written this way
BASE="python train_PEBBLE.py env=$MY_ENV_CONFIG_NAME seed=$SEED agent.params.actor_lr=0.0003 agent.params.critic_lr=0.0003 gradient_update=1 activation=tanh num_unsup_steps=9000 num_train_steps=1000000 agent.params.batch_size=512 double_q_critic.params.hidden_dim=256 double_q_critic.params.hidden_depth=3 diag_gaussian_actor.params.hidden_dim=256 diag_gaussian_actor.params.hidden_depth=3 reward_update=10 num_interact=1 reward_batch=50 feed_type=1 teacher_beta=-1 teacher_gamma=1 teacher_eps_mistake=0 teacher_eps_skip=0 teacher_eps_equal=0 wandb=true device=cuda:$MYCUDA_DEVID agent=sac"

###### CONFIGS
$BASE adloss.weight=3 tloss.weight=0.5 adloss=base tloss=base surf_loss=null max_feedback=$MAXFEEDBACK
sleep 60
$BASE adloss.weight=3 tloss.weight=0.5 adloss=none tloss=base surf_loss=null max_feedback=$MAXFEEDBACK
sleep 60
$BASE adloss.weight=3 tloss.weight=0.5 adloss=base tloss=none surf_loss=null max_feedback=$MAXFEEDBACK
sleep 60
$BASE adloss.weight=3 tloss.weight=0.5 adloss=none tloss=none surf_loss=null max_feedback=$MAXFEEDBACK
sleep 60
$BASE adloss.weight=3 tloss.weight=0.5 adloss=none tloss=none surf_loss=true max_feedback=$MAXFEEDBACK
sleep 60

python train_SAC.py env=$MY_ENV_CONFIG_NAME seed=$SEED agent.params.actor_lr=0.0001 agent.params.critic_lr=0.0001 num_train_steps=1000000 wandb=true device=cuda:$MYCUDA_DEVID