VERBOSE: False
BASE_TASK_CONFIG_PATH: "configs/tasks/imagenav_gibson.yaml"
TRAINER_NAME: "ddppo"
ENV_NAME: "NavRLEnv"
SIMULATOR_GPU_ID: 0
TORCH_GPU_ID: 0
VIDEO_OPTION: []
TENSORBOARD_DIR: "tb"
VIDEO_DIR: "video_dir"
TEST_EPISODE_COUNT: -1
EVAL_CKPT_PATH_DIR: "data/new_checkpoints"
NUM_ENVIRONMENTS: 4
SENSORS: ["RGB_SENSOR", "DEPTH_SENSOR"]
CHECKPOINT_FOLDER: "data/new_checkpoints"
NUM_UPDATES: -1
TOTAL_NUM_STEPS: 1e9
LOG_INTERVAL: 100
NUM_CHECKPOINTS: 100
# Force PyTorch to be single threaded as
# this improves performance considerably
FORCE_TORCH_SINGLE_THREADED: True

RL:
  SUCCESS_REWARD: 2.5
  SLACK_REWARD: -1e-4

  POLICY:
    name: "PointNavResNetPolicy"

  PPO:
    # ppo params
    clip_param: 0.2
    ppo_epoch: 2
    num_mini_batch: 2
    value_loss_coef: 0.5
    entropy_coef: 0.01
    lr: 2.5e-4
    eps: 1e-5
    max_grad_norm: 0.2
    num_steps: 64
    use_gae: True
    gamma: 0.99
    tau: 0.95
    use_linear_clip_decay: False
    use_linear_lr_decay: False
    reward_window_size: 50

    use_normalized_advantage: False

    hidden_size: 512

    # Use double buffered sampling, typically helps
    # when environment time is similar or large than
    # policy inference time during rollout generation
    use_double_buffered_sampler: False
  DDPPO:
    sync_frac: 0.6
    # The PyTorch distributed backend to use
    distrib_backend: NCCL
    # Visual encoder backbone
    pretrained_weights: data/ddppo-models/gibson-2plus-resnet50.pth
    # Initialize with pretrained weights
    pretrained: False
    # Initialize just the visual encoder backbone with pretrained weights
    pretrained_encoder: False
    # Whether or not the visual encoder backbone will be trained.
    train_encoder: True
    # Whether or not to reset the critic linear layer
    reset_critic: True

    # Model parameters
    backbone: resnet50
    rnn_type: LSTM
    num_recurrent_layers: 2
