defaults:
  - paths: config_path
  - override hydra/launcher: joblib


# env
env: ???
action_repeat: 1
seed: 1
# eval
eval_frequency: 10000
train_episode_video_freq : 100
num_eval_episodes: 10
# misc
log_frequency_step: 10000
log_save_tb: true
save_video: true
save_model: false
save_buffer: false
save_pixels: false
save_frequency: 500000
buffer_save_frequency : ${save_frequency}
device: "cuda"

reset_at_goal : false
logging_frequency : 1000
goal_env : true
state_env : true




aim_n_sampled_goal : 4 # inf, 4 
done_on_success : true
consider_done_true_in_critic : true



normalize_nml_obs : false
normalize_f_obs : true #false
normalize_rl_obs : true #false

num_train_steps: 3000000
num_random_steps: ??? #4000 # Ant : 1000->9000 (100 episode) Fetch : 500  Sawyer Door : 1000
num_seed_steps : ??? #4000 # Ant : 1000->9000 (100 episode) Fetch : 500 Sawyer Door : 1000
replay_buffer_capacity: 3000000
aim_disc_replay_buffer_capacity : 50000 # 20000
randomwalk_buffer_capacity : ${aim_disc_replay_buffer_capacity}
max_episode_timesteps : ??? # 300 # Ant : 300 AntMaze : 400 Fetch : 50 Sawyer Door : 100
use_aim : true
use_TD3 : false # true #td3_dependency


aim_num_precollect_init_state : 100

curriculum_buffer : default # aim



use_residual_randomwalk : true #true
use_uncertainty_for_randomwalk : 'nml' #'nml' 'f' 'none' # NOTE: nml can be slow to evaluate uncertainty if randomwalk_num_candidate is large (10 corresponds to 0.3~0.4s)
randomwalk_num_candidate : 10 #  5
randomwalk_random_noise : ??? # 2.5 # 2.5, 5
randomwalk_method : 'randgoal' #'rand_action', 'randgoal', 'expl_policy'(not implemented yet)


use_meta_nml : true #true
use_hgg : true
use_aim_disc_ensemble : true #false
adam_eps : 1e-8 # 1e-8
optim : adam
# for ablation study
rl_reward_type: aim #aim or sparse
hgg_cost_type: meta_nml_aim_f # meta_nml, aim_f

# pool_length : 500, split_ratio_for_meta_nml : 0.05 -> update time ~60s when the resource is enough
hgg_kwargs:
  hgg_sampler_update_frequency : 20 # 50 # unit : episode
  trajectory_pool_kwargs:
    pool_length: 100 # 200 # number of trajectories in pool
  match_sampler_kwargs:
    num_episodes : ${hgg_kwargs.hgg_sampler_update_frequency} # 50 # number of cycles per epoch in original HGG
    add_noise_to_goal : true # false
    cost_type : ${hgg_cost_type}
    max_episode_timesteps : ${max_episode_timesteps}
    split_type_for_meta_nml : last
    split_ratio_for_meta_nml : 0.5 #0.3 # 0.1 # ratio compared to max timesteps
    normalize_aim_output : true # false
    gamma : ${agent.discount} # 0.99 
    hgg_c : 3.0 # not used currently
    hgg_L : 50 # 0.5 
    device : ${device}
    hgg_gcc_path : ${paths.default_hgg_gcc_path}
    



  


meta_nml_kwargs:
  equal_pos_neg_test: true
  meta_nml_negatives_only: false
  meta_nml_train_every_k : 3000 # unit : step, roughly correspond to 1 epoch 
  meta_nml_train_on_positives : True
  meta_nml_use_preprocessor : False
  meta_nml_custom_embedding_key : None
  meta_task_batch_size : 1
  meta_nml_shuffle_states : False
  # meta_nml_reset_frequency : 20 # NotImplemented yet
  num_initial_meta_epochs : 3 # 10
  num_meta_epochs : 1
  nml_grad_steps : 1 # k-shot adaptation
  test_strategy : sample
  accumulation_steps : 16  
  meta_train_sample_size : 512 #256
  meta_test_sample_size : 2048
  meta_test_batch_size : 2048
  mixup_alpha : 0    
  meta_nml_temperature : 0.1 # for weighted sampling
  

meta_nml :        
  _target_: maml.meta_nml.MetaNML
  hidden_sizes : [2048,2048]
  input_dim : ??? # 2
  points_per_task : 64
  equal_pos_neg_test: ??? #equal_pos_neg_test and not meta_nml_negatives_only, 
  dist_weight_thresh: 1 
  query_point_weight: 1 
  do_metalearning: true
  train_vae: false
  num_finetuning_layers: None
  device : ${device}
  num_workers : 1 #8 



  

inv_weight_curriculum_kwargs:
  inv_weight_curriculum_temperature : 1 # 10 
  inv_weight_curriculum_batch_size : 256 # 128
  inv_weight_curriculum_type : softmin #  topk
  inv_weight_curriculum_add_noise : false # false, first, last
  inv_weight_curriculum_logit_type : disc # disc, reward
  inv_weight_curriculum_noise_scale : 3 # 0.5 for normal (not implemented yet), 3 for uniform
  inv_weight_curriculum_mix_ratio : 0.8 # 0.8
  curriculum_buffer: ${curriculum_buffer}
  use_Vf_to_inv_curriculum : 'softmax' #'add'
  Vf_coef_inv_curriculum : 0.1
  inv_weight_curriculum_n_sample_filtering : 128 
  inv_weight_curriculum_temperature_Vf : 1
  use_ensemble : false # true
  use_rpf : false # true
  n_ensemble : 5 
  curriculum_sampling_order : aim_f_last # aim_f_only #aim_f_last  # aim_f_first, aim_f_last
  inv_weight_curriculum_aim_topk : 0.1

outpace_kwargs:
  outpace_curriculum_temperature : 0.1  
  curriculum_buffer: ${curriculum_buffer}

aim_discriminator_cfg:  
  _target_: outpacesac.DiscriminatorEnsemble
  n_ensemble : 5
  x_dim : ??? # goal dim*2
  reward_type : aim
  lr : 1e-4 #1e-3
  lipschitz_constant: 0.1 #0.1
  output_activation : None # None, tanh
  device: ${device}
  env_name : ${env}
  tanh_constant : 1 # 1
  lambda_coef : 25 # 10
  adam_eps : ${adam_eps}
  optim : ${optim}

aim_kwargs:
  aim_disc_update_frequency: 1000 # from git, 100 : Reach, 1000 : others (Slide, Push, Pick&Place)
  aim_discriminator_steps : 10 # from git, 5 : Reach, 10 : others others (Slide, Push, Pick&Place)
  aim_rew_std : 1.0
  aim_rew_mean : 0.0
  aim_reward_normalize : true
  aim_reward_norm_offset : 0.1
  aim_input_type : default # default, ag, ag_hand





agent:
  _target_: outpacesac.OUTPACEAgent  
  obs_shape: ??? # to be specified later
  action_shape: ??? # to be specified later
  action_range: ??? # to be specified later
  device: ${device}
  encoder_cfg: ${encoder}
  encoder_target_cfg: ${encoder}
  critic_cfg: ${critic}
  critic_target_cfg: ${critic}
  expl_critic_cfg: ${critic}
  expl_critic_target_cfg: ${critic}
  use_TD3 : ${use_TD3}
  criticTD3_cfg: ${criticTD3}
  criticTD3_target_cfg: ${criticTD3}
  expl_criticTD3_cfg: ${criticTD3}
  expl_criticTD3_target_cfg: ${criticTD3}
  # vf_cfg : ${vf}
  # vf_target_cfg : ${vf}
  goal_dim : ???
  actor_cfg: ${actor}
  expl_actor_cfg: ${actor}
  actorTD3_cfg: ${actorTD3}
  actorTD3_target_cfg: ${actorTD3}
  expl_actorTD3_cfg: ${actorTD3}
  expl_actorTD3_target_cfg: ${actorTD3}
  discount: 0.99
  init_temperature: 0.3 #0.1
  lr: 1e-4
  adam_eps : ${adam_eps}
  optim : ${optim}
  actor_update_frequency: 2
  critic_target_tau: 0.01 # 0.005    #td3_dependency 0.005 sac 0.01
  critic_target_update_frequency: 2 # 2     #td3_dependency 10 sac 2
  encoder_target_tau: 0.05
  encoder_update_frequency: 2
  batch_size: 512
  
  num_seed_steps: ${num_seed_steps}
  state_env : ${state_env}
  env_name : ${env}
  consider_done_true_in_critic : ${consider_done_true_in_critic}
  use_aim : ${use_aim}
  aim_discriminator_cfg : ${aim_discriminator_cfg}
  aim_kwargs : ${aim_kwargs}  
  inv_weight_curriculum_kwargs : ${inv_weight_curriculum_kwargs}
  use_meta_nml : ${use_meta_nml}
  meta_nml_cfg : ${meta_nml}
  meta_nml_kwargs : ${meta_nml_kwargs}
  normalize_nml_obs : ${normalize_nml_obs}
  normalize_f_obs : ${normalize_f_obs}
  normalize_rl_obs : ${normalize_rl_obs}
  randomwalk_method : ${randomwalk_method}
  use_aim_disc_ensemble : ${use_aim_disc_ensemble}
  rl_reward_type : ${rl_reward_type}

criticTD3:
  _target_: outpace_core.StateCriticTD3  
  feature_dim: ???
  action_shape: ${agent.action_shape}
  hidden_dim: 512
  hidden_depth: 3
actorTD3:
  _target_: outpace_core.StateActorTD3  
  feature_dim: ???
  action_shape: ${agent.action_shape}
  hidden_depth: 3
  hidden_dim: 512
  
critic:
  _target_: outpace_core.StateCritic  
  feature_dim: ???
  action_shape: ${agent.action_shape}
  hidden_dim: 512
  hidden_depth: 3



actor:
  _target_: outpace_core.StateActor  
  feature_dim: ???
  action_shape: ${agent.action_shape}
  hidden_depth: 3
  hidden_dim: 512
  log_std_bounds: [-10, 2]

encoder:
  _target_: outpace_core.IdentityEncoder  
  obs_shape: ${agent.obs_shape}
  # project_for_state_input : true


visualize_debug : false

# hydra configuration
experiment: bench
save_path_prefix : ${paths.default_save_path_prefix}
env_path : ${paths.default_env_path}


hydra:
  # name: ${env}
  run: # single process        
    dir: ${save_path_prefix}/${env}/${now:%Y.%m.%d}/${now:%H%M%S}_test

  sweep: # multi process
    dir: ${save_path_prefix}/${env}/${now:%Y.%m.%d}/${now:%H%M%S}_test
    subdir: ${seed} 
  
