### Ant Config for TraIRL

base_env_id: Ant-v5
n_envs: 5
env_wrapper: DisabledAnt
env_kwargs:
  terminate_when_unhealthy: False
  include_cfrc_ext_in_observation: False
wrapper_kwargs:  
  Ant_front_left_back_left: 
    joints_status: [0, 0, 1, 1, 0, 0, 1, 1]
  Ant_front_right_back_right: 
    joints_status: [1, 1, 0, 0, 1, 1, 0, 0]

env_init_func: init_ant_env_trairl
env_init_func_kwargs:
  Ant_front_left_back_left: 
    joints_status: [0, 0, 1, 1, 0, 0, 1, 1]
  Ant_front_right_back_right: 
    joints_status: [1, 1, 0, 0, 1, 1, 0, 0]

source_env_name: ['Ant_front_left_back_left', 'Ant_front_right_back_right']
abstraction_dim: 16
current_obs_only: True
device: cuda

use_single_encoder: True
state_dim: 27
action_dim: 4

# use_single_encoder: False
# state_dim:
  # HalfCheetah_back:
  # HalfCheetah_front:

expert_files:
  Ant_front_left_back_left: ./expert_trajectories/Ant_front_left_back_left.pt
  Ant_front_right_back_right: ./expert_trajectories/Ant_front_right_back_right.pt

# Learner Buffer config
learner_buffer_size: 10000000
learner_buffer_update_size: 1000

# Encoder config
encoder_hidden_dims: [32, 32, 32]
encoder_lr: 3.0e-4

# Decoder config
decoder_hidden_dims: [64, 64, 64]
decoder_out_dim:
  Ant_front_left_back_left: 27
  Ant_front_right_back_right: 27
decoder_lr: 3.0e-4

# Vae config
cycle_consistency_weight: 0.5
vae_recon_weight: 1.0
vae_kld_weight: 0.1

# Reward config
reward_in_dim: 16
reward_hidden_dims: [16, 16]
reward_use_encoder: True
reward_net_lr: 3.0e-4
reward_batch_size: 256
reward_update_steps: 10
reward_weight_decay: 0.001

# Disc config
disc_in_dim: 16
disc_hidden_dims: [16, 16]
disc_net_lr: 3.0e-4
disc_batch_size: 256
disc_update_steps: 10
disc_gradient_penalty_weight: 10.0
disc_weight_decay: 0.001

# Policy config
policy_type: MlpPolicy
policy_kwargs:
  optimizer_kwargs:
    weight_decay: 0.001
policy_hidden_dims: [300, 400]
action_noise: 
  type: OrnsteinUhlenbeckActionNoise
  std: 0.25
policy_update_steps: 5000
policy_lr: 5.0e-4
policy_batch_size: 256
policy_tau: 0.1
gamma: 0.99
seed: 1234


