tokenizer:
  vocab_size: 512
  embed_dim: 512
  enc_dec_config:
    _target_: iris.tokenizer.EncoderDecoderConfig
    resolution: 64
    in_channels: 3
    z_channels: 512
    ch: 64
    ch_mult:
    - 1
    - 1
    - 1
    - 1
    - 1
    num_res_blocks: 2
    attn_resolutions:
    - 8
    - 16
    out_ch: 3
    dropout: 0.0
gpt:
  _target_: iris.TransformerConfig
  tokens_per_block: 17
  max_blocks: 21
  attention: causal
  num_layers: 10
  num_heads: 4
  embed_dim: 256
  embed_pdrop: 0.1
  resid_pdrop: 0.1
  attn_pdrop: 0.1
world_model:
  _target_: models.diffuser.WorldModelConfig
  image_size: 64
  image_channels: 3
  num_actions: null
  num_steps_conditioning: 4
  sigma_data: 0.5
  sigma_offset_noise: 0.3
env:
  train:
    _target_: envs.make_atari
    id: BreakoutNoFrameskip-v4
    max_episode_steps: 20000
    noop_max: 30
    done_on_life_loss: true
  keymap: atari/${.train.id}
