## Config for experiments

env: BabyAI-BossLevel-v0
wrapper: LanguageWrapper
wrapper_kwargs:
  max_len: 36
  pad: true

time_limit: 344

alg: BehaviorCloning
alg_kwargs:
  dataset: /shared/ANONYMIZED/babyai_datasets/BossLevelAgressiveMask_partial_50k_l36_s92_seq2seq_dt.pkl
  validation_dataset: /shared/ANONYMIZED/babyai_datasets/BossLevelAgressiveMask_partial_50k_l36_s92_valid_seq2seq_dt.pkl
  dataset_fraction: 1.0
  batch_size: 32
  optim_kwargs:
    lr: 0.0001
    eps: 0.00000001
  pretraining: False
  lang_coeff: 0.7
  unsup_coeff: 0.0

train_kwargs:
  total_steps: 1000000
  log_freq: 1000
  eval_ep: -1 # Don't run evaluation episodes because inferences takes soooo long.
  eval_freq: 10000
  validation_metric: accuracy

network: HierarchicalSeq2SeqDT
network_kwargs:
  n_enc_layer: 4
  n_dec_layer: 1
  n_head: 2
  n_embd: 128
  mlp_ratio: 2
