obj: maxentirl
IS: false
seed: 23
cuda: -1
env:
  env_name: MotionPlanning
  T: 250
  state_indices: all
  x_lim: 12
  y_lim: 8
  initial_state: [0.0,1.0]
  goal: [11.5,7.5]
  goal_radius: 0.5
  is_goal_circle: False
irl:
  training_trajs: 1
  n_itrs: 250
  save_interval: 0
  eval_episodes: 20
  expert_episodes: 1
  resample_episodes: 1
sac:
  k: 1
  epochs: 5
  log_step_interval: 1250  #five times the time horizon of the MDP
  update_every: 1
  random_explore_episodes: 1
  update_num: 1
  batch_size: 100
  lr: 0.001
  alpha: 0.2
  automatic_alpha_tuning: false
  buffer_size: 1000000
  num_test_episodes: 10
  reinitialize: false

reward:
  use_bn: false
  residual: false
  hid_act: relu
  hidden_sizes: [64, 64]
  clamp_magnitude: 10
  lr: 0.0001
  weight_decay: 1e-3
  gradient_step: 1
  momentum: 0.9
disc:
  reinit: false
  model_type: resnet_disc
  num_layer_blocks: 3
  hid_dim: 128
  hid_act: tanh
  use_bn: false
  clamp_magnitude: 10.0
  batch_size: 800
  lr: 0.0003
  weight_decay: 0.0001
  momentum: 0.9
  iter: 1200
critic:
  lam: 0.5
  model_type: resnet_disc
  num_layer_blocks: 3
  hid_dim: 128
  hid_act: tanh
  use_bn: false
  batch_size: 800
  lr: 0.0003
  weight_decay: 0.0001
  momentum: 0.0
  iter: 1200
