defaults:
  - _self_
  - logger: cli
  - env: pointmass_obstacle
  - evaluator: default
  
env_settings: {}

obs_shape: ???
action_dim: ???
total_num_updates: ???
action_is_discrete: ???

num_steps: 128
num_envs: 32
device: "cpu"
only_eval: False
seed: 31
num_eval_episodes: 100
num_env_steps: 1e7
recurrent_hidden_state_size: 128
gamma: 0.99

# Intervals
log_interval: 1
eval_interval: -1
save_interval: 100000000000

# Saving / Loading
load_checkpoint: null
load_policy: True
resume_training: False

policy:
  _target_: imitation_learning.policy_opt.policy.Policy
  hidden_size: 128
  recurrent_hidden_size: 128
  is_recurrent: False
  obs_shape: ${obs_shape}
  action_dim: ${action_dim}
  action_is_discrete: ${action_is_discrete}
  std_init: 0.0

policy_updater:
  _target_: imitation_learning.policy_opt.ppo.PPO
  _recursive_: False

  use_clipped_value_loss: True
  clip_param: 0.2
  value_loss_coef: 0.5
  entropy_coef: 0.0001
  max_grad_norm: 0.5
  num_epochs: 2
  num_mini_batch: 4
  num_envs: ${num_envs}
  num_steps: ${num_steps}

  # Returns calculation
  gae_lambda: 0.95
  use_gae: True
  gamma: ${gamma}

  optimizer_params:
    _target_: torch.optim.Adam
    lr: 3e-4

storage:
  _target_: imitation_learning.policy_opt.storage.RolloutStorage
  num_steps: ${num_steps}
  num_processes: ${num_envs}
  recurrent_hidden_state_size: ${policy.recurrent_hidden_size}
  obs_shape: ${obs_shape}
  action_dim: ${action_dim}
  action_is_discrete: ${action_is_discrete}
  fetch_final_obs: True

hydra:
  run:
    dir: ./
