# config
source_base_env_id: Ant-v5
target_base_env_id: HalfCheetah-v5

# env details
source_state_dim: 27
target_state_dim: 17
abstraction_dim: 16
device: cuda

target_expert_file:
  HalfCheetah-v5: ./expert_trajectories/HalfCheetah.pt

# Learner Buffer config
learner_buffer_size: 10000000
learner_buffer_update_size: 1000

# target env
target_env_id: HalfCheetah-v5
target_env_n_envs: 4
target_env_action_dim: 6
target_env_kwargs: {}
target_env_wrapper: CustomReward
target_env_wrapper_kwargs: {}
target_env_init_func: init_halfcheetah_env_transfer
target_env_init_func_kwargs: {}

# model config
source_env_encoder_path: ./runs/trairl/Ant-v5/2025_05_09_07_08_54/saved_model/5005000/Ant_front_right_back_right_encoder.pth
source_env_decoder_path: ./runs/trairl/Ant-v5/2025_05_09_07_08_54/saved_model/5005000/Ant_front_right_back_right_decoder.pth
source_env_disc_path: ./runs/trairl/Ant-v5/2025_05_09_07_08_54/saved_model/5005000/Ant_front_right_back_right_disc_net.pth
source_env_reward_path: ./runs/trairl/Ant-v5/2025_05_09_07_08_54/saved_model/5005000/Ant_front_right_back_right_reward_net.pth
vae_update_steps: 25
disc_loss_weight: 1.0
vae_recon_weight: 1.0
vae_kld_weight: 0.1
cycle_consistency_weight: 0.5

# Encoder config
source_encoder_hidden_dims: [32, 32, 32]
target_encoder_hidden_dims: [16, 16]
target_encoder_lr: 3.0e-4

# Decoder config
source_decoder_hidden_dims: [64, 64, 64]
target_decoder_hidden_dims: [32, 32, 32]
target_decoder_lr: 3.0e-4

# Reward config
reward_in_dim: 16
reward_hidden_dims: [16, 16]
current_obs_only: True

# Disc config
disc_in_dim: 16
disc_hidden_dims: [16, 16]
disc_gradient_penalty_weight: 10.0

# policy for target env config
policy_type: MlpPolicy
policy_kwargs:
  optimizer_kwargs:
    weight_decay: 0.001
policy_tau: 0.1
policy_hidden_dims: [128, 128]
action_noise: 
  type: OrnsteinUhlenbeckActionNoise
  std: 0.5
policy_update_steps: 8000
policy_lr: 1.0e-3
policy_batch_size: 256
policy_gamma: 0.99
seed: 1234
policy_buffer_size: 500000
policy_learning_starts: 50000

