# Shell script for running minigrid experiments.
# To launch via slurm, append the following to each command
# +launcher=slurm hydra.launcher.timeout_min=$TIME &; sleep 2
# which uses the slurm launcher and sets the time to $TIME.

N="range(3)"  # How many jobs to run?
TAG="NeurIPS"  # Expt identifier

# key3, key4
EXPTS="key3 key4"
TIME="600"

# key5
# EXPTS="key5"
# TIME="1440"

# maze1d
# EXPTS="maze1d"
# TIME="400"

# maze2 mazeq
# EXPTS="maze2 maze1q"
# TIME="1440"

# KEY GENERATOR HYPERPARAMS - uncomment if running key envs.
GCOUNTS="10"
GTHRESH="-0.3"
GMAX="100"

# MAZE GENERATOR HYPERPARAMS - uncomment if running maze envs.
# GCOUNTS="30"
# GTHRESH="-0.2"
# GMAX="150"

for EXPT in $EXPTS; do
    # Baseline
    OMP_NUM_THREADS=1 python -m langexplore.train -m +experiment=$EXPT group="$TAG-$EXPT-baseline" generator=false language_goals=null n="$N" batch_size=32 lr=0.0001 momentum=0 epsilon=1e-5 partial_obs=true

    # Naive message reward baseline
    OMP_NUM_THREADS=1 python -m langexplore.train -m +experiment=$EXPT group="$TAG-$EXPT-naive-reward" generator=false language_goals=null n="$N" batch_size=32 lr=0.0001 momentum=0 epsilon=1e-5 partial_obs=true naive_message_reward=0.1

    # AMIGO (null) vs L-AMIGo (online_grounding) vs L-AMIGo (no grounder) (online_naive)
    OMP_NUM_THREADS=1 python -m langexplore.train -m +experiment=$EXPT group="$TAG-$EXPT-\${language_goals}" language_goals=online_grounding,null,online_naive generator_threshold="$GTHRESH" n="$N" batch_size=32 lr=0.0001 momentum=0 epsilon=1e-5 generator_maximum="$GMAX" generator_counts="$GCOUNTS" partial_obs=true intrinsic_reward_coef=0.5

    # NovelD
    OMP_NUM_THREADS=1 python -m langexplore.train -m +experiment=$EXPT group="$TAG-$EXPT-noveld-\${noveld_novelty_coef}" generator=false language_goals=achievable noveld=true noveld_novelty_coef=0.5 rnd_lr=0.0001 n="$N" batch_size=32 lr=0.0001 momentum=0 epsilon=1e-5 partial_obs=true

    # L-NovelD
    OMP_NUM_THREADS=1 python -m langexplore.train -m +experiment=$EXPT group="$TAG-$EXPT-l-noveld-\${noveld_novelty_coef}-\${separate_message_novelty_coef}" generator=false language_goals=achievable noveld=true noveld_novelty_coef=0.5 rnd_lr=0.0001 n="$N" batch_size=32 lr=0.0001 momentum=0 epsilon=1e-5 partial_obs=true separate_message_novelty_coef=0.5 separate_message_novelty=true

    # L-NovelD ablation: combined input
    OMP_NUM_THREADS=1 python -m langexplore.train -m +experiment=$EXPT group="$TAG-$EXPT-l-noveld-\${noveld_novelty_coef}-combined" generator=false language_goals=achievable noveld=true noveld_novelty_coef=0.1 rnd_lr=0.0001 n="$N" batch_size=32 lr=0.0001 momentum=0 epsilon=1e-5 partial_obs=true separate_message_novelty_coef=0.5 separate_message_novelty=false noveld_combined=true

    # L-NovelD ablation: message only
    OMP_NUM_THREADS=1 python -m langexplore.train -m +experiment=$EXPT group="$TAG-$EXPT-l-noveld-message-only-\${separate_message_novelty_coef}" generator=false language_goals=achievable noveld=true noveld_novelty_coef=0.0 rnd_lr=0.0001 n="$N" batch_size=32 lr=0.0001 momentum=0 epsilon=1e-5 partial_obs=true separate_message_novelty_coef=0.5 separate_message_novelty=true

    # ==== ONEHOT ====
    # L-AMIGo
    OMP_NUM_THREADS=1 python -m langexplore.train -m +experiment=$EXPT group="$TAG-$EXPT-\${language_goals}-onehot" language_goals=online_grounding onehot_language_goals=true generator_threshold="$GTHRESH" n="$N" batch_size=32 lr=0.0001 momentum=0 epsilon=1e-5 generator_maximum="$GMAX" generator_counts="$GCOUNTS" partial_obs=true intrinsic_reward_coef=0.5

    # L-NovelD
    OMP_NUM_THREADS=1 python -m langexplore.train -m +experiment=$EXPT group="$TAG-$EXPT-l-noveld-\${noveld_novelty_coef}-\${separate_message_novelty_coef}-onehot" generator=false language_goals=achievable onehot_language_goals=true noveld=true noveld_novelty_coef=0.5 rnd_lr=0.0001 n="$N" batch_size=32 lr=0.0001 momentum=0 epsilon=1e-5 partial_obs=true separate_message_novelty_coef=0.5 separate_message_novelty=true
done
