VERBOSE: False
TRAINER_NAME: "mddppo"
ENV_NAME: "SimpleRLEnv"
SENSORS: ["RGB_SENSOR"]

VIDEO_OPTION: []
VIDEO_DIR: ${hydra:sweep.dir}/video
TENSORBOARD_DIR: ${hydra:sweep.dir}/logs
EVAL_CKPT_PATH_DIR: ${hydra:sweep.dir}/checkpoints
CHECKPOINT_FOLDER: ${hydra:sweep.dir}/checkpoints
LOG_DIR: ${hydra:sweep.dir}/logs
LOG_FILE: ${hydra:sweep.dir}/train.log

NUM_ENVIRONMENTS: 10
LOG_INTERVAL: 100
NUM_CHECKPOINTS: 100
NUM_UPDATES: -1
TOTAL_NUM_STEPS: 500e6

FORCE_TORCH_SINGLE_THREADED: True

EVAL:
  SPLIT: "val"
  USE_CKPT_CONFIG: True
  EVAL_FREQ: 5

RL:
  REWARD_MEASURE: "simple_reward"
  SUCCESS_MEASURE: "success"

  POLICY:
    name: "EAIPolicy"
    hidden_size: 512
    input_resize_size: 480 # resize short edge (640x480)
    input_crop_size: 640 # crop (pad) to square image (640x640)
    rnn_type: "LSTM"
    num_recurrent_layers: 2
    use_augmentations: True
    use_augmentations_test_time: True
    freeze_batchnorm: True
    freeze_backbone: False
    global_pool: False
    use_cls: False
    num_downsample_layers: 2


  PPO:
    clip_param: 0.2
    ppo_epoch: 2
    num_mini_batch: 2
    value_loss_coef: 0.5
    entropy_coef: 0.01
    lr: 2.5e-4
    encoder_lr: 1.5e-6
    wd: 1e-6
    eps: 1e-5
    max_grad_norm: 0.2
    num_steps: 64
    use_gae: True
    use_linear_lr_decay: False
    use_linear_clip_decay: False
    gamma: 0.99
    tau: 0.95
    reward_window_size: 50
    use_normalized_advantage: False
    hidden_size: 512
    use_double_buffered_sampler: False

  DDPPO:
    sync_frac: 0.6
    distrib_backend: "NCCL"

    # Model parameters
    backbone: resnet50
    rnn_type: LSTM
    num_recurrent_layers: 2
