seed: 1
cuda: -1
env:
  env_name: MotionPlanning
  T: 250
  state_indices: all
  x_lim: 12
  y_lim: 8
  initial_state: [0.0,1.0]
  goal: [11.5,7.5]
  goal_radius: 0.5
  is_goal_circle: False
sac:
  epochs: 200
  log_step_interval: 1250   #this number is five times the env_T (the time horizon of the MDP)
  update_every: 50
  update_num: 1
  random_explore_episodes: 10
  batch_size: 100
  lr: 1e-3
  alpha: 0.2
  automatic_alpha_tuning: false
  buffer_size: 1000000
  num_test_episodes: 10
expert:
  training_episode: 50
  eval_episode: 1
  sample_episode: 10
