task: 'ICRL-WallGrid-Discrete'
group: 'ICRL'
device: 'cuda'
verbose: 2
env:
  config_path: '../mujuco_environment/custom_envs/envs/configs/WGW-setting4.yaml'
  train_env_id : 'WGW-v0'
  eval_env_id: 'WGW-v0'
  save_dir: '../save_model'
  cost_info_str: 'cost'
  use_cost: True
  reward_gamma: 0.99
  cost_gamma: 0.99  # no cost
  dont_normalize_obs: True
  dont_normalize_reward: True
  dont_normalize_cost: True  # cost
  record_info_names: ['x_position', 'y_position']
  record_info_input_dims: [ 1, 0 ]  # height, width
  visualize_info_ranges: [ [ 0, 6 ], [ 0, 6 ]  ]  #  [ [ 0, 7 ], [ 0, 7 ]  ]

running:
  n_iters: 20 #20
  n_eval_episodes: 10
  save_every: 1
  expert_rollouts: 20
  sample_rollouts: 20
  store_sample_num: null
  expert_path: '../data/expert_data/WallGridWorld-setting1/'
  use_buffer: True
  store_by_game: True
  store_sample_rollouts: 1000

iteration:
  warmup_timesteps: 0
  stopping_threshold: 0.001 #0.001
  max_iter: 40 #20
  penalty_initial_value: 0.1
  penalty_learning_rate: 0.1
  gamma: 0.7 #0.7
  reset_policy: False
  reset_every: null
  nu_max_clamp: 1
  nu_min_clamp: null
  zeta_max: 0.1
  epsilon: 0

CN:
#  cn_learning_rate: 0.003
#  cn_reg_coeff: 0.1
#  cn_layers: [64, 64, 64]
#  cn_batch_size: null
#  cn_obs_select_name: null  # null means all
#  cn_acs_select_name: [-1]  # [-1] means none
#  no_importance_sampling: False
#  per_step_importance_sampling: False
#  clip_obs: 20
  cn_normalize: False
#  cn_target_kl_old_new: 10
#  cn_target_kl_new_old: 10
#  train_gail_lambda: False
#  cn_eps: 0.00001
  backward_iters: 20
#  anneal_clr_by_factor: 1.0
  recon_obs: True
