defaults:
  - paths: config_path
  - override hydra/launcher: joblib


frame_stack : 3
multi_target : true
sawyer_wall_env : false


# env
env: ???
action_repeat: 1
seed: 1
# eval
eval_frequency: 10000
train_episode_video_freq : 200
num_eval_episodes: 10
# misc
log_frequency_step: 10000
log_save_tb: true
save_video: true
save_model: false
save_buffer: false
save_pixels: false
save_frequency: 500000
buffer_save_frequency : ${save_frequency}
device: "cuda"


logging_frequency : 1000
goal_env : true


done_on_success : true
consider_done_true_in_critic : false

normalize_rl_obs : true 

num_train_steps: 3000000
num_random_steps: ??? 
num_seed_steps : ??? 
replay_buffer_capacity: 3000000
randomwalk_buffer_capacity : 50000 
max_episode_timesteps : ??? 

fc_layer_norm_for_obs : false # true


grad_norm_clipping : 0 # 15.0
grad_value_clipping : 0 # -1.0
q_clip : false


use_residual_randomwalk : true 
use_uncertainty_for_randomwalk : 'd2c'
randomwalk_num_candidate : 10 
randomwalk_random_noise : ??? 
randomwalk_method : 'randgoal' #'rand_action'

use_d2c : true
d2c_reward_type : positive
d2c_kwargs:  
  goal_condition : true # arbitrary goal conditioned classifier or fixed goal like example-based RL 
  goal_candidate_type : uniform # buffer
  n_goal_candidates : 50
  n_noise_augment_per_goal : 10
  noise_scale : ???
  mode : mi
  reduction : mean
  aux_weight : 1  
  normalize : true
  batch_size : 512
  train_every_k : 3000 # unit : step
  num_update : 16
  num_init_update : 100
  temperature : 0.1
  lr : 1e-3 # important
  use_randomwalk_buffer : false

d2c_cfg:
  _target_: d2c.d2c.Network
  feature_dim: ???
  hidden_dim: 256
  hidden_depth : 2
  heads : 2
  net_type : default # layer_norm




use_hgg : true
adam_eps : 1e-8 
optim : adam

sparse_reward_type : negative
rl_reward_type: d2c
hgg_cost_type: d2c


hgg_kwargs:
  hgg_sampler_update_frequency : 20 # unit : episode
  trajectory_pool_kwargs:
    pool_length: 2000 # number of trajectories in pool
  match_sampler_kwargs:
    num_episodes : ??? #${hgg_kwargs.hgg_sampler_update_frequency} 
    add_noise_to_goal : true 
    cost_type : ${hgg_cost_type}
    max_episode_timesteps : ${max_episode_timesteps}
    split_type : last
    split_ratio : 0.5 # ratio compared to max timesteps    
    gamma : ${agent.discount} 
    hgg_c : 3.0 
    hgg_L : 50 
    device : ${device}
    hgg_gcc_path : ${paths.default_hgg_gcc_path}
    goal_condition : ${d2c_kwargs.goal_condition}
    




agent:
  _target_: d2c_sac.D2CAgent  
  obs_shape: ??? # to be specified later
  action_shape: ??? # to be specified later
  action_range: ??? # to be specified later
  device: ${device}
  encoder_cfg: ${encoder} 
  # encoder_target_cfg: ${encoder}
  critic_cfg: ${critic}
  critic_target_cfg: ${critic}
  goal_dim : none
  actor_cfg: ${actor}  
  discount: 0.99
  init_temperature: 0.3 
  alpha_auto : true
  lr: 1e-4
  adam_eps : ${adam_eps}
  optim : ${optim}
  actor_update_frequency: 2
  critic_target_tau: 0.01 
  critic_target_update_frequency: 2 
  # encoder_target_tau: 0.05
  # encoder_update_frequency: 2
  batch_size: 512
  num_seed_steps: ${num_seed_steps}
  env_name : ${env}
  consider_done_true_in_critic : ${consider_done_true_in_critic}
  
  normalize_rl_obs : ${normalize_rl_obs}
  randomwalk_method : ${randomwalk_method}  
  rl_reward_type : ${rl_reward_type}

  use_d2c : ${use_d2c}
  d2c_cfg : ${d2c_cfg}
  d2c_kwargs : ${d2c_kwargs}
  d2c_reward_type : ${d2c_reward_type}

  grad_norm_clipping : ${grad_norm_clipping}
  grad_value_clipping : ${grad_value_clipping}
  q_clip : ${q_clip}
  sparse_reward_type : ${sparse_reward_type}
  d2c_feature_dim : none
  




critic:
  _target_: d2c_core.StateCritic  
  repr_dim : ???
  feature_dim: 50
  action_shape: ${agent.action_shape}
  hidden_dim: 512
  hidden_depth: 3
  fc_layer_norm_for_obs : ${fc_layer_norm_for_obs}

actor:
  _target_: d2c_core.StateActor  
  repr_dim : ???
  feature_dim: 50
  action_shape: ${agent.action_shape}
  hidden_depth: 3
  hidden_dim: 512
  log_std_bounds: [-5, 1] #[-10, 2]
  fc_layer_norm_for_obs : ${fc_layer_norm_for_obs}

encoder:
  _target_: d2c_core.IdentityEncoder  
  obs_shape: ${agent.obs_shape}
  



# hydra configuration
experiment: bench
save_path_prefix : ${paths.default_save_path_prefix}
env_path : ${paths.default_env_path}


hydra:
  # name: ${env}
  run: # single process        
    dir: ${save_path_prefix}/${env}/${now:%Y.%m.%d}/${now:%H%M%S}_test
    
  sweep: # multi process
    dir: ${save_path_prefix}/${env}/${now:%Y.%m.%d}/${now:%H%M%S}_test
    subdir: ${seed} 
  
