#!/bin/bash

# export CUDA_VISIBLE_DEVICES=0

label=iclr
seeds=($(seq 20))
layouts=(
  cramped_room
  asymm_advantages
  coord_ring
  forced_coord
  counter_circuit
)

for seed in "${seeds[@]}"; do
for layout in "${layouts[@]}"; do
  arguments=(
    ++WANDB_MODE=online
    ++ENTITY=XXXX-2
    ++PROJECT=qfix-jaxmarl

    ++alg.TEST_INTERVAL=0.01
    ++alg.TAU=0.5
    ++alg.MAX_GRAD_NORM=0.25

    ++alg.ENV_KWARGS.layout="$layout"
    ++SEED="$seed"
    ++alg.WANDB_LABEL="$label"
  )

  python baselines/QLearning/vdn_cnn_overcooked.py +alg=ql_cnn_overcooked "${arguments[@]}" ++alg.ALG_NAME=vdn
  python baselines/QLearning/iql_cnn_overcooked.py +alg=ql_cnn_overcooked "${arguments[@]}" ++alg.ALG_NAME=iql

  # implements QMIX for Overcooked, by taking the QFIX implementation and disabling the fixing network
  arguments+=(
    ++alg.DEBUG_NORMALIZE_QI=false
    ++alg.QFIX.DEBUG_RECOVER_FIXEE=true
    ++alg.QFIX.DEBUG_RECOVER_FIXEE_W=false
    ++alg.QFIX.DEBUG_RECOVER_FIXEE_B=false
    ++alg.DEBUG_W_GT_LINEAR_SCHEDULE=false
    ++alg.DEBUG_W_GT_LINEAR_SCHEDULE_INIT_VALUE=0.0
    ++alg.DEBUG_W_GT_LINEAR_SCHEDULE_END_VALUE=-1.0
    ++alg.DEBUG_W_GT_LINEAR_SCHEDULE_TRANSITION_STEPS=2_500_000
    ++alg.DEBUG_W_GT_LINEAR_SCHEDULE_TRANSITION_BEGIN=2_500_000
    ++alg.DEBUG_INTERVENTION_REGULARIZATION=false
    ++alg.DEBUG_INTERVENTION_REGULARIZATION_INIT_VALUE=1.0
    ++alg.DEBUG_INTERVENTION_REGULARIZATION_END_VALUE=0.0
    ++alg.DEBUG_INTERVENTION_REGULARIZATION_TRANSITION_STEPS=500_000
    ++alg.DEBUG_INTERVENTION_REGULARIZATION_TRANSITION_BEGIN=0
  )
  python baselines/QLearning/qfix_cnn_overcooked.py +alg=q+fix-mono_cnn_overcooked.yaml "${arguments[@]}" ++alg.ALG_NAME=qmix

  arguments+=(
    ++alg.DEBUG_NORMALIZE_QI=false
    ++alg.QFIX.DEBUG_RECOVER_FIXEE=false
    ++alg.QFIX.DEBUG_RECOVER_FIXEE_W=false
    ++alg.QFIX.DEBUG_RECOVER_FIXEE_B=false
    ++alg.DEBUG_W_GT_LINEAR_SCHEDULE=false
    ++alg.DEBUG_W_GT_LINEAR_SCHEDULE_INIT_VALUE=0.0
    ++alg.DEBUG_W_GT_LINEAR_SCHEDULE_END_VALUE=-1.0
    ++alg.DEBUG_W_GT_LINEAR_SCHEDULE_TRANSITION_STEPS=2_500_000
    ++alg.DEBUG_W_GT_LINEAR_SCHEDULE_TRANSITION_BEGIN=2_500_000
    ++alg.DEBUG_INTERVENTION_REGULARIZATION=true
    ++alg.DEBUG_INTERVENTION_REGULARIZATION_INIT_VALUE=1.0
    ++alg.DEBUG_INTERVENTION_REGULARIZATION_END_VALUE=0.0
    ++alg.DEBUG_INTERVENTION_REGULARIZATION_TRANSITION_STEPS=500_000
    ++alg.DEBUG_INTERVENTION_REGULARIZATION_TRANSITION_BEGIN=0
  )
  python baselines/QLearning/qfix_cnn_overcooked.py +alg=q+fix-sum_cnn_overcooked.yaml "${arguments[@]}" ++alg.ALG_NAME=q+fix-sum
  python baselines/QLearning/qfix_cnn_overcooked.py +alg=q+fix-mono_cnn_overcooked.yaml "${arguments[@]}" ++alg.ALG_NAME=q+fix-mono
  python baselines/QLearning/qfix_cnn_overcooked.py +alg=q+fix-lin_cnn_overcooked.yaml "${arguments[@]}" ++alg.ALG_NAME=q+fix-lin

done
done
