defaults:
  - _self_
  - tokenizer: default
  - world_model: default
  - actor_critic: default
  - env: default
  - datasets: default

wandb:
  mode: online
  project: iris
  entity: null
  name: null
  group: null
  tags: null
  notes: null

initialization:
  path_to_checkpoint: null
  load_tokenizer: False
  load_world_model: False
  load_actor_critic: False

common:
  epochs: 600
  device: cuda:0
  do_checkpoint: True
  seed: 0
  sequence_length: ${world_model.max_blocks}
  resume: False # set by resume.sh script only.

collection:
  train:
    num_envs: 1
    stop_after_epochs: 500
    num_episodes_to_save: 10
    config:
      epsilon: 0.01
      should_sample: True
      temperature: 1.0
      num_steps: 200
      burn_in: ${training.actor_critic.burn_in}
  test:
    num_envs: 8
    num_episodes_to_save: ${collection.train.num_episodes_to_save}
    config:
      epsilon: 0.0
      should_sample: True
      temperature: 0.5
      num_episodes: 16
      burn_in: ${training.actor_critic.burn_in}

training:
  should: True
  learning_rate: 0.0001
  sampling_weights: [0.125, 0.125, 0.25, 0.5]
  tokenizer:
    batch_num_samples: 256
    grad_acc_steps: 1
    max_grad_norm: 10.0
    start_after_epochs: 5
    steps_per_epoch: 200
  world_model:
    batch_num_samples: 64
    grad_acc_steps: 1
    max_grad_norm: 10.0
    weight_decay: 0.01
    start_after_epochs: 25
    steps_per_epoch: 200
  actor_critic:
    batch_num_samples: 64
    grad_acc_steps: 1
    max_grad_norm: 10.0
    start_after_epochs: 50
    steps_per_epoch: 200
    imagine_horizon: ${common.sequence_length}
    burn_in: 20
    gamma: 0.995
    lambda_: 0.95
    entropy_weight: 0.001

evaluation:
  should: True
  every: 5
  tokenizer:
    batch_num_samples: ${training.tokenizer.batch_num_samples}
    start_after_epochs: ${training.tokenizer.start_after_epochs}
    save_reconstructions: True
  world_model:
    batch_num_samples: ${training.world_model.batch_num_samples}
    start_after_epochs: ${training.world_model.start_after_epochs}
  actor_critic:
    num_episodes_to_save: ${training.actor_critic.batch_num_samples}
    horizon: ${training.actor_critic.imagine_horizon}
    start_after_epochs: ${training.actor_critic.start_after_epochs}
