### HalfCheetah Config for TraIRL

base_env_id: HalfCheetah-v5
n_envs: 5
env_wrapper: DisabledHalfCheetah
wrapper_kwargs:  
  HalfCheetah_back: 
    joints_status: [1, 1, 1, 0, 0, 0]
  HalfCheetah_front: 
    joints_status: [0, 0, 0, 1, 1, 1]
env_kwargs: {}

env_init_func: init_halfcheetah_env_trairl
env_init_func_kwargs:
  HalfCheetah_back: 
    joints_status: [1, 1, 1, 0, 0, 0]
  HalfCheetah_front: 
    joints_status: [0, 0, 0, 1, 1, 1]

source_env_name: ['HalfCheetah_back', 'HalfCheetah_front']
abstraction_dim: 10
current_obs_only: True
device: cuda

use_single_encoder: True
state_dim: 17
action_dim: 3

# use_single_encoder: False
# state_dim:
  # HalfCheetah_back:
  # HalfCheetah_front:

expert_files:
  HalfCheetah_back: ./expert_trajectories/HalfCheetah_back.pt
  HalfCheetah_front: ./expert_trajectories/HalfCheetah_front.pt

# Learner Buffer config
learner_buffer_size: 10000000
learner_buffer_update_size: 1000

# Encoder config
encoder_hidden_dims: [32, 32, 32, 32]
encoder_lr: 3.0e-4

# Decoder config
decoder_hidden_dims: [32, 32, 32, 32]
decoder_out_dim:
  HalfCheetah_back: 17
  HalfCheetah_front: 17
decoder_lr: 3.0e-4

# Vae config
cycle_consistency_weight:
vae_recon_weight: 1.0
vae_kld_weight: 0.1

# Reward config
reward_in_dim: 10
reward_hidden_dims: [16, 16]
reward_use_encoder: True
reward_net_lr: 3.0e-4
reward_batch_size: 256
reward_update_steps: 10
reward_norm_weight: 0.01

# Disc config
disc_in_dim: 10
disc_hidden_dims: [16, 16]
disc_net_lr: 3.0e-4
disc_batch_size: 256
disc_update_steps: 10
disc_gradient_penalty_weight: 10.0
disc_norm_weight: 0.01

# Policy config
policy_type: MlpPolicy
policy_kwargs:
  optimizer_kwargs:
    weight_decay: 0.001
policy_hidden_dims: [256, 256]
action_noise: 
  type: OrnsteinUhlenbeckActionNoise
  std: 0.25
policy_update_steps: 5000
policy_lr: 5.0e-4
policy_batch_size: 256
policy_tau: 0.1
gamma: 0.99
seed: 1234


