# scene
single_scene: false # train on single scene or a scene dataset

# for original pointgoal dataset
dataset_type: PointNav-v1
split: val_mini  #[train, val, val_mini] # for pointgoal dataset
dataset_path: "/dataset/pointnav_gibson_v1/{split}/{split}.json.gz" # where pointgoal episode split data store
pointgoal_dataset_path: "/dataset/pointnav_gibson_v1"
content_scenes: ["*"]
scenes_dir: "/dataset"  
# -----------------------------------------------
# BC training data location
behavior_dataset_path: "/dataset/behavior_dataset_gibson_4_scene_2000"

# experiments
algorithm_name: "ppo"
experiment_name:  "pointgoal_baseline_multienv_1_scene_Rancocas_2000_observation_rnn_5m_4_actions" 

# dummy initial setup 
scene_id: ~/habitat-sim/data/scene_datasets/habitat-test-scenes/van-gogh-room.glb
agent_initial_position: [3, 0, 1]
agent_initial_rotation: [0, 70, 0]
goal_position: [4, 0, -1]

# robot
forward_resolution: 0.25 # meter
rotate_resolution: 10 # degree
action_number: 4 

# observation space (env)
state_sensor: true
color_sensor: true
depth_sensor: false 
semantic_sensor: false
image_width: 224 
image_height: 224 
normalize_depth: true # if true, scale depth to [0,1] according to min and max depth
min_depth: 0
max_depth: 10.0 
dictionary_observation_space: true

# task
dark_mode: false 
flashlight_z: 0.2 
measurements: ['steps', 'collisions', 'distance_to_goal', 'success', 'done', 'spl', 'softspl', 'point_goal_reward', 'return']
blind_agent: false

# state
state_coord_system: polar # [polar, cartesian]
state_dimension: 2 
cos_augmented_state: false 
state_relative_to_origin: true
state_only: false # true when only use coordinates as input to the agent

# goal
goal_conditioned: true 
goal_format: pointgoal # imagegoal
goal_relative_to_origin: false 
goal_gps_compass: true # false use absolute goal location relative to the start location
goal_coord_system: polar # [polar, cartesian]
goal_dimension: 2 
cos_augmented_goal: false 
goal_input_location: baseline #value_function 

# reward
success_distance: 0.2 # l2 distance in meter
success_reward: 2.5 
slack_reward: -1.0e-4 
goal_reward: true 
intrinsic_reward_coef: 0 
prev_state_novelty_coef: 0.5
depth_reward: false
stop_depend_success: true

# episode termination condition
max_steps_per_episode: 500 
max_collisions_per_episode: 200 

# dummy: to be consistent with imitation learning config 
goal_form:  "rel_goal"
gpu_id: 0

# neural network
visual_encoder: ResNet #ResNet # CNN 
pretrained_visual_encoder: false  
pretrained_whole_model: false
# path to the saved whole model or visual encoder only
pretrained_model_path: data/ddppo-models/gibson-2plus-resnet50.pth
# Whether or not the visual encoder backbone will be trained
train_encoder: true
rnn_policy: true #true #false #true #false
rnn_type: gru #gru  # lstm
attention: false #false # true
attention_type: caption #fc

# sampling
use_double_buffered_sampler: false
num_steps: 128  # rollout buffer length

# vector env (for training and evaluation, train 4, eval 14)
num_environments: 4 #6 #4 #14 #8 #6

# training
torch_gpu_id: 0
simulator_gpu_id: 0
seed: 1

# train for how long: one of them must be -1
# num_updates: how many times agent.update is called, i.e. how many times the training process loops
# num of backprop inside each agent update = ppo_epoch * num_mini_batch
num_updates: -1 
# total_num_steps: steps collected in all environments (num_environments)  
total_num_steps: 5000000 
# checkpoint
# only one of them can be used
num_checkpoints: 10 
checkpoint_interval: -1  
# observation transformations
enabled_transforms: []

# training stats and log
verbose: true 
log_interval: 25  # log stats every n training updates
reward_window_size: 50 # compute stats every n episodes
tensorboard_dir: "tensorboard"

# PPO
clip_param: 0.2 
kl_coef: 0
# ppo_epoch: # of training epoches inside each time agent update 
# num of backprop inside each agent update = ppo_epoch * num_mini_batch
ppo_epoch: 4
# batches per each epoch
# each batch include steps from num_environments / num_mini_batch rollouts
# each rollout include num_steps steps
# use one batch for each parameter update 
num_mini_batch: 1
value_loss_coef: 0.5
entropy_coef: 0.01
lr: 2.5e-4 
eps: 1.0e-5 # Adam eps
max_grad_norm: 0.5
# visual encoder output size = state encoder output size = rnn hidden size           
hidden_size: 512  
use_gae: true
gamma: 0.99
tau: 0.95 
use_linear_clip_decay: true
use_linear_lr_decay: true
use_normalized_advantage: false

# evaluation
# evaluate this folder
eval_checkpoint_folder: "checkpoints"
# if eval_checkpoint_file is an existing file in eval_checkpoint_folder, eval single checkpoint
# otherwise, eval all checkpoints in eval_checkpoint_folder
# if a list, need to ensure that checkpoint index is increasing
eval_checkpoint_file: ["ckpt.9.pth"] 
# use checkpoint config file
eval_use_ckpt_config: false
# save evaluation results here
eval_dir: "evaluation"
# number of evaluation episodes 
# -1: test on all episodes in the dataset
# number of evaluation episodes in a single scene
test_episode_count: -1 
eval_splits: ["same_scene_val", "across_scene_val", "same_start_goal_val"]
eval_experiment_folder: "pointgoal_baseline_multienv_1_scene_Rancocas_2000_observation_mlp_5m"  

# dummy
eval_video_option: [] 

# distributed
# use distributed computation (DDPPO) or not
force_distributed: true
# Append the slurm job ID to the resume state filename if running a slurm job
# This is useful when you want to have things from a different job but same
# same checkpoint dir not resume.
preemption_append_slurm_job_id: false
# Save resume states only when running with slurm
# This is nice if you don't want debug jobs to resume
preemption_save_state_batch_only: false
# Number of gradient updates between saving the resume state
preemption_save_resume_state_interval: 100
# The PyTorch distributed backend to use
distrib_backend: GLOO
# for DDPPO
sync_frac: 0.6
# default port
default_port: 8860 #8860 #8738 # 8865














