# Note:  Hyperparameters have been changed slightly from
# the paper to allow for things to easily run on 1 GPU

BASE_TASK_CONFIG_PATH: "configs/tasks/pointnav_gibson.yaml"
TRAINER_NAME: "ddppo"
ENV_NAME: "NavRLEnv"
SIMULATOR_GPU_ID: 0
TORCH_GPU_ID: 0
VIDEO_OPTION: []
# Can be uncommented to generate videos.
# VIDEO_OPTION: ["disk", "tensorboard"]
VIDEO_DIR: "video_dir"
# Evaluate on all episodes
TEST_EPISODE_COUNT: -1
EVAL_CKPT_PATH_DIR: "data/new_checkpoints"
# This was 6 for mp3d and 8 for gibson in the paper
NUM_PROCESSES: 6
# Note:  To train the an RGB only model,
# you may need to use 8 processes with 4 mini batches,
# If so, the number of updates should be cut in half
SENSORS: ["DEPTH_SENSOR"]
NUM_UPDATES: -1
TOTAL_NUM_STEPS: 75e6
LOG_INTERVAL: 25

EVAL:
  USE_CKPT_CONFIG: False
  SPLIT: val

POLICY_NAME: "ResNetPolicy"

RL:
  PPO:
    # ppo params
    decay_factor: 0.33
    clip_param: 0.2
    ppo_epoch: 4
    num_mini_batch: 2
    value_loss_coef: 0.5
    entropy_coef: 0.01
    lr: 2.5e-4
    eps: 1e-5
    max_grad_norm: 0.5
    num_steps: 128
    hidden_size: 512
    use_gae: True
    gamma: 0.99
    tau: 0.95
    use_linear_clip_decay: True
    use_linear_lr_decay: True
    reward_window_size: 250

    use_normalized_advantage: False
