---
# Config parameters of low-level goal conditioned policy
policy_class: "stable_contrastive_rl"
checkpoint_path: "checkpoints/iterated_offline_rl/crl_just_bridge_1840000"
mini_dataset_path: "mini_dataset"
image_size: 256
rollout_timesteps: 25
sample_and_rank:
  activated: false
  num_samples: 100
exploration:
  make_traj_deterministic_prob: 0.5
  sampling_temperature: 0.2
  gripper_open_prob: 0.005
  gripper_close_prob: 0.01
open_gripper_if_nothing_grasped: false
restrict_action_space: false
dataset_kwargs:
  shuffle_buffer_size: 25000
  augment: true
  augment_next_obs_goal_differently: false
  augment_kwargs:
    random_resized_crop:
      scale:
        - 0.8
        - 1.0
      ratio:
        - 0.9
        - 1.1
    random_brightness:
      - 0.2
    random_contrast:
      - 0.8
      - 1.2
    random_saturation:
      - 0.8
      - 1.2
    random_hue:
      - 0.1
    augment_order:
      - "random_resized_crop"
      - "random_brightness"
      - "random_contrast"
      - "random_saturation"
      - "random_hue"
  goal_relabeling_strategy: "geometric"
  goal_relabeling_kwargs:
    reached_proportion: 0.0
    discount: 0.98
  relabel_actions: true
  normalization_type: "normal"
  dataset_contains_commanded_goals: false
ACT_MEAN:
  - 1.9296819e-04
  - 1.3667766e-04
  - -1.4583133e-04
  - -1.8390431e-04
  - -3.0808983e-04
  - 2.7425270e-04
  - 5.9716219e-01
ACT_STD:
  - 0.00912848
  - 0.0127196
  - 0.01229497
  - 0.02606696
  - 0.02875283
  - 0.07807977
  - 0.48710242
ACT_MIN:
  - -0.0437546
  - -0.052831028
  - -0.035931006
  - -0.14489305
  - -0.15591072
  - -0.26039174
  - -0.780331
ACT_MAX:
  - 0.04158026
  - 0.05223833
  - 0.05382493
  - 0.15559858
  - 0.142592
  - 0.25956747
  - 0.79311615
agent_kwargs:
  critic_network_kwargs:
    hidden_dims:
      - 256
      - 256
      - 256
    use_layer_norm: true
  critic_kwargs:
    init_final: 1.0e-12
    repr_dim: 16
    twin_q: true
  policy_network_kwargs:
    hidden_dims:
      - 256
      - 256
      - 256
    dropout_rate: 0.1
  policy_kwargs:
    tanh_squash_distribution: false
    std_parameterization: "fixed"
    fixed_std:
      - 1
      - 1
      - 1
      - 1
      - 1
      - 1
      - 1
  learning_rate: 3.0e-4
  warmup_steps: 2000
  actor_decay_steps: 2000000
  use_td: true
  gcbc_coef: 0.20
  discount: 0.99
  temperature: 1.0
  target_update_rate: 0.002
  shared_encoder: false
  early_goal_concat: false
  shared_goal_encoder: true
  use_proprio: false
encoder:
  type: "resnetv1-34-bridge"
  config:
    act: "swish"
    add_spatial_coordinates: false
    pooling_method: "avg"
