# config
source_base_env_id: HalfCheetah-v5
target_base_env_id: Ant-v5

# env details
source_state_dim: 17
target_state_dim: 27
abstraction_dim: 10
device: cuda

target_expert_file:
  Ant-v5: ./expert_trajectories/Ant.pt

# Learner Buffer config
learner_buffer_size: 10000000
learner_buffer_update_size: 1000

# target env
target_env_id: 'Ant-v5'
target_env_n_envs: 4
target_env_action_dim: 8
target_env_kwargs:
  terminate_when_unhealthy: False
  include_cfrc_ext_in_observation: False
target_env_wrapper: CustomReward
target_env_wrapper_kwargs: {}
target_env_init_func: init_ant_env
target_env_init_func_kwargs: {'terminate_when_unhealthy': False, include_cfrc_ext_in_observation: False}

# model config
source_env_encoder_path: ./runs/trairl/HalfCheetah-v5/2025_05_07_15_47_38/saved_model/1605000/HalfCheetah_back_encoder.pth
source_env_decoder_path: ./runs/trairl/HalfCheetah-v5/2025_05_07_15_47_38/saved_model/1605000/HalfCheetah_back_decoder.pth
source_env_disc_path: ./runs/trairl/HalfCheetah-v5/2025_05_07_15_47_38/saved_model/1605000/HalfCheetah_front_disc_net.pth
source_env_reward_path: ./runs/trairl/HalfCheetah-v5/2025_05_07_15_47_38/saved_model/1605000/HalfCheetah_front_reward_net.pth
vae_update_steps: 500
vae_recon_weight: 0.5
vae_kld_weight: 0.1
cycle_consistency_weight: 1.0

# Encoder config
source_encoder_hidden_dims: [32, 32, 32, 32]
target_encoder_hidden_dims: [32, 32, 32, 32]
target_encoder_lr: 3.0e-4

# Decoder config
source_decoder_hidden_dims: [32, 32, 32, 32]
target_decoder_hidden_dims: [32, 32, 32, 32]
target_decoder_lr: 3.0e-4

# Reward config
reward_in_dim: 10
reward_hidden_dims: [16, 16]
current_obs_only: True

# Disc config
disc_in_dim: 10
disc_hidden_dims: [16, 16]
disc_gradient_penalty_weight: 10.0

# policy for target env config
policy_type: MlpPolicy
policy_kwargs:
  optimizer_kwargs:
    weight_decay: 0.001
policy_tau: 0.1
policy_hidden_dims: [400, 400]
action_noise: 
  type: OrnsteinUhlenbeckActionNoise
  std: 0.5
policy_update_steps: 8000
policy_lr: 1.0e-3
policy_batch_size: 256
policy_gamma: 0.99
seed: 1234
policy_buffer_size: 500000

