defaults:
  - networks@policy_config: policy_config.yaml
  - networks@value_config: value_config.yaml

# @package agent
_target_: agent.gc_diayn.GC_DIAYN_Agent
_recursive_: false
name: gc_diayn

reward_free: ${reward_free}
obs_type: ??? # to be specified later
obs_shape: ??? # to be specified later
action_shape: ??? # to be specified later
cuda_id: ${cuda_id}
lr: 1e-4
critic_target_tau: 0.01
update_every_steps: 2
use_tb: ${use_tb}
use_wandb: ${use_wandb}
num_expl_steps: ??? # to be specified later
hidden_dim: 1024
pred_hidden_dim: 1024
feature_dim: 50 # Feature dim doesn't matter when not dealing with images
stddev_schedule: 0.2
stddev_clip: 0.3


batch_size: 1024
init_critic: true
update_encoder: ${update_encoder}
trans: false  # whether discriminator takes in two states or one states

nstep: ??? # 1 # 3 originally
domain: ??? # to be specified later
env_config: ???

skill_dim: ???
diayn_scale: 1.0
update_skill_every_step: ??? # this is to be consistent with the previous setting
update_skill_inter_episode: true

# Parted
parted: true
actor_type: mono
anti: false
anti_coef: 0.1

# SAC
sac: ???
init_temperature: ???  #  0.1 in moma2d, particle, 0,02 in igibson (switched to ddpg eventually)
update_alpha: false
log_std_bounds: [-10, 2]

# MCP
use_gate: false

# Ind
ind_type: dense # sparse # this is just for moma2d with ind actor

critic_type: ???

# newly added
mask_out_transitions: true
step_count_threshold: ???

# Controls whether we start updating skills after a certain number of steps
update_skill_threshold: 0
use_spectral_norm: ???

add_task_reward: false  # this will append an additional task reward besides the diayn reward

Q_range: null # 100
return_dist: true  # whether we what to use dist reward (otherwise we would use the diayn reward)

training_params:
    particle:
        sac: true
        init_temperature: 0.3
        update_skill_every_step: 50
        use_spectral_norm: false
        critic_type: mask_unwt
        step_count_threshold: 10
        skill_dim: 5
        nstep: 1
    igibson:
        sac: false
        init_temperature: 0.02
        update_skill_every_step: 200
        use_spectral_norm: true
        critic_type: mask_unwt
        step_count_threshold: 40
        skill_dim: 4
        nstep: 3
    moma2d:
        sac: true
        init_temperature: 0.1
        update_skill_every_step: 50
        use_spectral_norm: false
        critic_type: mask_unwt
        step_count_threshold: 10
        skill_dim: 5
        nstep: 1
    quadruped:
        sac: false
        init_temperature: 0.1
        update_skill_every_step: 500
        use_spectral_norm: false
        critic_type: mask_unwt
        step_count_threshold: 0
        skill_dim: 8
        nstep: 3
    walker:
        sac: true
        init_temperature: 0.1
        update_skill_every_step: 500
        use_spectral_norm: false
        critic_type: mask_unwt
        step_count_threshold: 0
        skill_dim: 8
        nstep: 1
    wipe:
        sac: false
        init_temperature: 0.1
        update_skill_every_step: 200
        use_spectral_norm: true
        critic_type: mask_unwt
        step_count_threshold: 40
        skill_dim: 4
        nstep: 3
