seed: 1
cuda: -1
env:
  env_name: Ant-v4
  T: 100
  state_indices: all
  delayed: 0
sac:
  epochs: 10000
  log_step_interval: 500
  update_every: 5
  update_num: 1
  random_explore_episodes: 10
  batch_size: 100
  lr: 1e-3
  alpha: 0.2
  automatic_alpha_tuning: false
  buffer_size: 1000000
  num_test_episodes: 10
expert:
  samples_episode: 64
reward:
  use_bn: false
  residual: false
  hid_act: relu
  hidden_sizes: [64, 64]
  clamp_magnitude: 10
  lr: 0.0001
  weight_decay: 1e-3
  gradient_step: 1
  momentum: 0.9
