balanced_eval: false
batch_size: 128
data_collection:
  n_envs: 1024
  n_samples: 150000
  policy: random
  seed: 0
datasets:
  grid_2d:
    img_size: 32
    thickness: 1.0
    type: grid_2d
  img_size: 32
  multi_object:
    img_size: 32
    n_objects: 1
    type: multi_object
  multi_object_selection:
    img_size: 32
    n_objects: 3
    type: multi_object_selection
  taxi:
    grid_size: 10
    img_size: 32
    n_passengers: 1
    type: taxi
  taxi_gymnax:
    grid_size: 10
    img_size: 32
    n_passengers: 1
    type: taxi_gymnax
  taxi_suff:
    grid_size: 10
    img_size: 32
    n_passengers: 1
    type: taxi_suff
device: gpu
dropout: 0
env: navix
envs:
  asterix:
    autoreset: true
    env_name: gymnax_Asterix-MinAtar
    env_params:
      noise_sigma: 0.01
    n_envs: 512
    render: true
  breakout:
    autoreset: true
    env_name: gymnax_Breakout-MinAtar
    env_params:
      noise_sigma: 0.01
    n_envs: 512
    render: true
  cartpole:
    autoreset: true
    env_name: gymnax_CartPole-v1
    env_params: {}
    n_envs: 512
  freeway:
    autoreset: true
    env_name: gymnax_Freeway-MinAtar
    env_params:
      noise_sigma: 0.01
    n_envs: 512
    render: true
  mountaincar:
    autoreset: true
    env_name: gymnax_MountainCar-v0
    env_params: {}
    n_envs: 512
  navix:
    action_space: full
    autoreset: true
    env_name: navix_DoorKey-Uniform-8x8-v0
    env_params:
      max_steps: 500
    img_size: 32
    n_envs: 512
    noise_sigma: 0.01
    observation_space: rgb
  navix_doorkey:
    action_space: full
    autoreset: true
    env_name: navix_DoorKey-8x8-v0
    env_params:
      max_steps: 1000
    img_size: 32
    n_envs: 512
    observation_space: rgb
  pinball:
    autoreset: true
    env_name: pinball_pinball
    env_params:
      level: easy
    n_envs: 512
  pong:
    autoreset: true
    env_name: gymnax_Pong-misc
    env_params: {}
    n_envs: 512
  spaceinvaders:
    autoreset: true
    env_name: gymnax_SpaceInvaders-MinAtar
    env_params:
      noise_sigma: 0.01
    n_envs: 512
    render: true
  taxi_gymnax:
    action_space: full
    autoreset: true
    env_name: taxi_gymnax
    env_params:
      max_steps: 500
    n_envs: 512
    observation_space: rgb
epochs: 100
eval_conditioning: null
eval_every: 25000
eval_max_length: 10000
eval_n_episodes: 10
evaluator_config:
  batch_size: 128
  lr: 0.0001
  n_epochs: 50
  predictor:
    activation: silu
    hidden_dims:
    - 512
    - 512
    normalize: rms
    outact: none
    type: mlp
exclude_states: key_*
exp_id: dms_baselines_doorkey_uniform_8_141__lr_3.53323e-04__reps.dms.params.gumbeltemp_7.41668__reps.dms.params.elboconst_4.73743__reps.dms.params.l2regconst_0.00150__reps.dms.params.gactionconst_3.53571__reps.dms.params.gtimeconst_2.55687__seed_1
horizon: 2
importance_weight_exp:
  end: 1.0
  start: 0.6
log_every: 2500
lr: 0.000353323
n_envs: 512
original_exp_id: dms_baselines_doorkey_uniform_8_141__lr_3.53323e-04__reps.dms.params.gumbeltemp_7.41668__reps.dms.params.elboconst_4.73743__reps.dms.params.l2regconst_0.00150__reps.dms.params.gactionconst_3.53571__reps.dms.params.gtimeconst_2.55687__seed_1
outdir: rl_experiments/baselines/doorkey_uniform_8/dms
prioritized: false
rep: dms
reps:
  acf:
    batch_size: 128
    decoder:
      activation: silu
      hidden_dims:
      - 512
      - 512
      normalize: 'false'
      outact: none
      type: mlp
    decoder_pixel:
      cnn_activation: silu
      cnn_blocks: 2
      depth: 24
      min_resolution: 4
      mlp_activation: silu
      mlp_layers:
      - 7
      - 256
      - 256
      type: residual_decoder
    dynamics:
      activation: silu
      hidden_dims:
      - 256
      normalize: 'false'
      outact: none
      output_dim: 70
      type: mlp
    encoder:
      activation: silu
      dropout: 0
      hidden_dims:
      - 512
      - 512
      normalize: 'false'
      outact: tanh
      output_dim: 7
      type: mlp
    encoder_pixel:
      cnn_activation: silu
      cnn_blocks: 2
      depth: 24
      dropout: 0
      final_activation: tanh
      min_resolution: 4
      mlp_activation: silu
      mlp_layers:
      - 256
      - 256
      - 7
      type: residual_encoder
    energy:
      activation: silu
      hidden_dims:
      - 256
      normalize: 'false'
      outact: none
      output_dim: 10
      type: mlp
    info_nce: true
    inverse:
      activation: silu
      hidden_dims:
      - 128
      input_dim: 14
      normalize: 'false'
      outact: none
      output_dim: 10
      type: mlp
    latent_dim: 7
    n_actions: 10
    noise_std: 0.005
    params:
      forward_const: 1.0
      grounding_const: 0.0
      inverse_const: 1.0
      inverse_model_const: 1.0
      per_action_forward_const: 0.0
      policy_const: 1.0
      recons_const: 0.0
    per_factor: false
    pi:
      activation: silu
      hidden_dims:
      - 256
      - 256
      normalize: 'false'
      outact: none
      output_dim: 10
      type: mlp
    projector:
      activation: silu
      hidden_dims:
      - 128
      normalize: 'false'
      outact: none
      output_dim: 7
      type: mlp
    use_action_weights: true
    vars_per_factor: 1
  autoencoder:
    decoder:
      activation: silu
      hidden_dims:
      - 128
      - 128
      input_dim: 7
      normalize: 'false'
      outact: none
      type: mlp
    decoder_pixel:
      cnn_activation: silu
      cnn_blocks: 2
      depth: 24
      min_resolution: 4
      mlp_activation: silu
      mlp_layers:
      - 7
      - 256
      - 256
      type: residual_decoder
    encoder:
      activation: silu
      dropout: 0
      hidden_dims:
      - 128
      - 128
      normalize: 'false'
      outact: none
      output_dim: 7
      type: mlp
    encoder_pixel:
      cnn_activation: silu
      cnn_blocks: 2
      depth: 24
      dropout: 0
      min_resolution: 4
      mlp_activation: silu
      mlp_layers:
      - 256
      - 256
      - 7
      output_dim: 7
      type: residual_encoder
    latent_dim: 7
    n_actions: 10
    params:
      recons_const: 1.0
    vars_per_factor: 1
  base:
    encoder:
      activation: silu
      dropout: 0
      hidden_dims:
      - 128
      normalize: 'false'
      outact: none
      output_dim: 7
      type: mlp
    encoder_pixel:
      cnn_activation: silu
      cnn_blocks: 2
      depth: 24
      dropout: 0
      min_resolution: 4
      mlp_activation: silu
      mlp_layers:
      - 256
      - 256
      - 7
      output_dim: 7
      type: residual_encoder
    latent_dim: 7
    n_actions: 10
  dms:
    decoder:
      activation: silu
      hidden_dims:
      - 512
      - 512
      normalize: 'false'
      outact: none
      type: mlp
    decoder_pixel:
      cnn_activation: silu
      cnn_blocks: 2
      depth: 24
      input_dim: 7
      min_resolution: 4
      mlp_activation: silu
      mlp_layers:
      - 7
      - 256
      - 256
      output_shape:
      - 32
      - 32
      - 3
      type: residual_decoder
    encoder:
      activation: silu
      dropout: 0
      hidden_dims:
      - 512
      - 512
      normalize: 'false'
      outact: none
      output_dim: 7
      type: mlp
    encoder_pixel:
      cnn_activation: silu
      cnn_blocks: 2
      depth: 24
      dropout: 0
      final_activation: none
      input_shape:
      - 32
      - 32
      - 3
      min_resolution: 4
      mlp_activation: silu
      mlp_layers:
      - 256
      - 256
      - 14
      type: residual_encoder
    is_pixel: true
    latent_dim: 7
    n_actions: 10
    obs_dim:
    - 32
    - 32
    - 3
    params:
      elbo_const: 4.73743
      g_action_const: 3.53571
      g_time_const: 2.55687
      gumbel_temp: 7.41668
      l2_reg_const: 0.0015
    transition:
      activation: silu
      hidden_dims:
      - 256
      input_dim: 17
      normalize: 'false'
      outact: none
      output_dim: 1
      type: mlp
    use_ground_truth_states: false
    vars_per_factor: 1
  dreamervae:
    categoricals: 7
    decoder:
      activation: silu
      hidden_dims:
      - 256
      - 256
      input_dim: 7
      normalize: rms
      outact: tanh
      type: mlp
    decoder_pixel:
      cnn_activation: silu
      cnn_blocks: 2
      depth: 24
      input_dim: 1024
      min_resolution: 4
      mlp_activation: silu
      mlp_layers:
      - 1024
      - 256
      - 256
      outact: sigmoid
      type: residual_decoder
    dynamics:
      activation: silu
      hidden_dims:
      - 256
      - 256
      input_dim: 263
      normalize: 'false'
      outact: none
      output_dim: 7
      type: mlp
    embed:
      activation: silu
      hidden_dims:
      - 256
      normalize: rms
      outact: none
      output_dim: 256
      type: mlp
    encoder:
      activation: silu
      hidden_dims:
      - 256
      input_dim: 256
      normalize: 'false'
      outact: none
      output_dim: 7
      type: mlp
    hidden_dim: 256
    latent_dim: 7
    n_actions: 10
    n_values: 32
    obs_embed:
      activation: silu
      hidden_dims:
      - 256
      - 256
      normalize: rms
      outact: none
      output_dim: 256
      type: mlp
    obs_embed_pixel:
      cnn_activation: silu
      cnn_blocks: 2
      depth: 24
      min_resolution: 4
      mlp_activation: silu
      mlp_layers:
      - 256
      - 256
      - 256
      output_dim: 256
      type: residual_encoder
    params:
      dyn_const: 1.0
      free_nats: 1.0
      recons_const: 1.0
      rep_const: 0.1
    pixels: false
    state_dim: 7
    type: gaussian
    vars_per_factor: 1
  dropout: 0
  gcl:
    batch_size: 128
    decoder:
      activation: silu
      hidden_dims:
      - 512
      - 512
      normalize: 'false'
      outact: none
      type: mlp
    decoder_pixel:
      cnn_activation: silu
      cnn_blocks: 2
      depth: 24
      min_resolution: 4
      mlp_activation: silu
      mlp_layers:
      - 7
      - 256
      - 256
      type: residual_decoder
    encoder:
      activation: silu
      dropout: 0
      hidden_dims:
      - 512
      - 512
      normalize: 'false'
      outact: tanh
      output_dim: 7
      type: mlp
    encoder_pixel:
      cnn_activation: silu
      cnn_blocks: 2
      depth: 24
      dropout: 0
      min_resolution: 4
      mlp_activation: silu
      mlp_layers:
      - 256
      - 256
      - 7
      type: residual_encoder
    energy:
      activation: silu
      hidden_dims:
      - 128
      - 128
      normalize: rms
      outact: none
      output_dim: 10
      type: mlp
    info_nce: true
    latent_dim: 7
    n_actions: 10
    noise_std: 0.01
    params:
      energy_const: 10.0
      recons_const: 0.0
    per_factor: false
    use_action_weights: false
    vars_per_factor: 1
  ivae:
    decoder:
      activation: silu
      hidden_dims:
      - 512
      - 512
      normalize: 'false'
      outact: none
      output_dim: 256
      type: mlp
    decoder_pixel:
      cnn_activation: silu
      cnn_blocks: 2
      depth: 24
      min_resolution: 4
      mlp_activation: silu
      mlp_layers:
      - 256
      - 256
      - 256
      type: residual_decoder
    encoder:
      activation: silu
      dropout: 0
      hidden_dims:
      - 512
      - 512
      normalize: 'false'
      outact: none
      output_dim: 256
      type: mlp
    encoder_pixel:
      cnn_activation: silu
      cnn_blocks: 2
      depth: 24
      dropout: 0
      final_activation: none
      min_resolution: 4
      mlp_activation: silu
      mlp_layers:
      - 256
      - 256
      - 256
      type: residual_encoder
    hidden_dim: 256
    inference:
      activation: silu
      hidden_dims:
      - 256
      input_dim: 273
      normalize: 'false'
      outact: none
      output_dim: 14
      type: mlp
    latent_dim: 7
    n_actions: 10
    params:
      elbo_const: 1.0
    vars_per_factor: 1
  latent_dim: 7
  markov:
    encoder:
      activation: silu
      dropout: 0
      hidden_dims:
      - 128
      - 128
      normalize: 'false'
      outact: tanh
      output_dim: 7
      type: mlp
    encoder_pixel:
      cnn_activation: silu
      cnn_blocks: 2
      depth: 24
      dropout: 0
      min_resolution: 4
      mlp_activation: silu
      mlp_layers:
      - 256
      - 256
      - 7
      output_dim: 7
      type: residual_encoder
    inverse:
      activation: silu
      hidden_dims:
      - 128
      - 128
      normalize: 'false'
      outact: none
      type: mlp
    latent_dim: 7
    n_actions: 10
    params:
      inverse_const: 10.0
      ratio_const: 1.0
      smoothness_const: 1.0
    ratio:
      activation: silu
      hidden_dims:
      - 128
      - 128
      normalize: 'false'
      outact: none
      output_dim: 1
      type: mlp
    ratio_batch_size: 128
    smoothness_thresh: 0.01
    vars_per_factor: 1
  multistep_acf:
    acf_config:
      batch_size: 128
      decoder:
        activation: silu
        hidden_dims:
        - 512
        - 512
        normalize: 'false'
        outact: none
        type: mlp
      decoder_pixel:
        cnn_activation: silu
        cnn_blocks: 2
        depth: 24
        min_resolution: 4
        mlp_activation: silu
        mlp_layers:
        - 7
        - 256
        - 256
        type: residual_decoder
      dynamics:
        activation: silu
        hidden_dims:
        - 256
        normalize: 'false'
        outact: none
        output_dim: 70
        type: mlp
      encoder:
        activation: silu
        dropout: 0
        hidden_dims:
        - 512
        - 512
        normalize: 'false'
        outact: tanh
        output_dim: 7
        type: mlp
      encoder_pixel:
        cnn_activation: silu
        cnn_blocks: 2
        depth: 24
        dropout: 0
        final_activation: tanh
        min_resolution: 4
        mlp_activation: silu
        mlp_layers:
        - 256
        - 256
        - 7
        type: residual_encoder
      energy:
        activation: silu
        hidden_dims:
        - 256
        normalize: 'false'
        outact: none
        output_dim: 10
        type: mlp
      info_nce: true
      inverse:
        activation: silu
        hidden_dims:
        - 128
        input_dim: 14
        normalize: 'false'
        outact: none
        output_dim: 10
        type: mlp
      latent_dim: 7
      n_actions: 10
      noise_std: 0.005
      params:
        forward_const: 1.0
        grounding_const: 0.0
        inverse_const: 1.0
        inverse_model_const: 1.0
        per_action_forward_const: 0.0
        policy_const: 1.0
        recons_const: 0.0
      per_factor: false
      pi:
        activation: silu
        hidden_dims:
        - 256
        - 256
        normalize: 'false'
        outact: none
        output_dim: 10
        type: mlp
      projector:
        activation: silu
        hidden_dims:
        - 128
        normalize: 'false'
        outact: none
        output_dim: 7
        type: mlp
      use_action_weights: true
      vars_per_factor: 1
    embed:
      dim: 128
      max_offset: 16
    latent_dim: 7
    multistep_classifier:
      activation: silu
      hidden_dims:
      - 512
      - 512
      normalize: 'false'
      outact: none
      output_dim: 10
      type: mlp
    n_actions: 10
    params:
      multistep_inv_const: 1.0
    vars_per_factor: 1
  n_actions: 10
  pretrain_steps: 0
  recurrentacf:
    batch_size: 128
    decoder:
      activation: silu
      hidden_dims:
      - 512
      - 512
      normalize: 'false'
      outact: none
      type: mlp
    decoder_pixel:
      cnn_activation: silu
      cnn_blocks: 2
      depth: 24
      min_resolution: 4
      mlp_activation: silu
      mlp_layers:
      - 135
      - 256
      - 256
      type: residual_decoder
    encoder:
      activation: silu
      dropout: 0
      hidden_dims:
      - 512
      - 512
      normalize: 'false'
      outact: none
      output_dim: 128
      type: mlp
    encoder_pixel:
      cnn_activation: silu
      cnn_blocks: 2
      depth: 24
      dropout: 0
      final_activation: none
      min_resolution: 4
      mlp_activation: silu
      mlp_layers:
      - 256
      - 256
      - 128
      type: residual_encoder
    energy:
      activation: silu
      hidden_dims:
      - 256
      normalize: 'false'
      outact: none
      output_dim: 10
      type: mlp
    hidden_dim: 128
    info_nce: true
    latent_dim: 7
    memory:
      hidden_dim: 128
      input_dim: 7
      type: gru
    memory_action:
      activation: silu
      hidden_dims:
      - 256
      - 256
      normalize: 'false'
      outact: none
      output_dim: 1280
      type: mlp
    n_actions: 10
    noise_std: 0.005
    params:
      forward_const: 1.0
      inverse_const: 1.0
      inverse_model_const: 1.0
      per_action_forward_const: 0.0
      policy_const: 0.0
      recons_const: 0.0
    per_factor: false
    pi:
      activation: silu
      hidden_dims:
      - 256
      - 256
      normalize: 'false'
      outact: none
      output_dim: 10
      type: mlp
    posterior:
      activation: silu
      hidden_dims:
      - 256
      - 256
      normalize: 'false'
      outact: tanh
      type: mlp
    recurrent: true
    use_action_weights: false
    vars_per_factor: 1
  spr:
    encoder:
      activation: silu
      hidden_dims:
      - 128
      - 128
      normalize: 'false'
      outact: tanh
      output_dim: 7
      type: mlp
    encoder_pixel:
      cnn_activation: silu
      cnn_blocks: 2
      depth: 24
      min_resolution: 4
      mlp_activation: silu
      mlp_layers:
      - 256
      - 256
      - 7
      output_dim: 7
      type: residual_encoder
    latent_dim: 7
    n_actions: 10
    params: {}
    predictor:
      activation: silu
      hidden_dims:
      - 128
      - 128
      normalize: 'false'
      outact: none
      output_dim: 1
      type: mlp
    projection:
      activation: silu
      hidden_dims:
      - 128
      - 128
      normalize: 'false'
      outact: none
      output_dim: 1
      type: mlp
    tau: 0.01
    transition:
      activation: silu
      hidden_dims:
      - 128
      - 128
      normalize: 'false'
      outact: none
      type: mlp
    vars_per_factor: 1
  vae:
    beta: 1.0
    decoder:
      activation: silu
      hidden_dims:
      - 128
      - 128
      input_dim: 7
      normalize: 'false'
      outact: none
      type: mlp
    decoder_pixel:
      cnn_activation: silu
      cnn_blocks: 2
      depth: 24
      min_resolution: 4
      mlp_activation: silu
      mlp_layers:
      - 7
      - 256
      - 256
      type: residual_decoder
    encoder:
      activation: silu
      dropout: 0
      hidden_dims:
      - 128
      - 128
      normalize: 'false'
      outact: none
      type: mlp
    encoder_pixel:
      cnn_activation: silu
      cnn_blocks: 2
      depth: 24
      dropout: 0
      final_activation: none
      min_resolution: 4
      mlp_activation: silu
      mlp_layers:
      - 256
      - 256
      - 14
      type: residual_encoder
    latent_dim: 7
    n_actions: 10
    params:
      kl_const: 1.0
      recons_const: 1.0
    vars_per_factor: 1
seed: 1
thickness: 1.0
training_steps: 117187
use_ground_truth_states: false
weight_decay: 0.001
