obj: airl
IS: false
seed: -1  # 使用-1表示随机种子
cuda: -1

env:
  env_name: Humanoid-v3
  T: 1000
  state_indices: all

# AIRL专用参数
airl:
  total_timesteps: 4200000  # 确保500次评估 (500 × 8400)  
  expert_episodes: 1       
  eval_episodes: 20         
  eval_freq: 8400          # 每6千步评估一次，总共500次评估
  demo_batch_size: 256     # 演示数据批大小，调小以适应1个专家轨迹
  gen_replay_buffer_capacity: 4096  
  n_disc_updates_per_round: 8       
  n_envs: 4                         
  reward_hid_sizes: [256, 256]      

# PPO学习器参数
ppo:
  batch_size: 256           
  learning_rate: 0.0003     
  gamma: 0.99               
  clip_range: 0.2           
  ent_coef: 0.01            
  vf_coef: 0.5              
  n_epochs: 10              

# 评估参数
eval:
  episodes: 20
  deterministic: true
