#task: 'Binary-WallGrid'
#group: 'Binary'
#device: 'cpu'
#verbose: 2
#env:
#  config_path: '../mujuco_environment/custom_envs/envs/configs/WGW-setting1-prob-99e-2.yaml'
#  train_env_id : 'WGW-v0'
#  eval_env_id: 'WGW-v0'
#  save_dir: '../save_model'
#  cost_info_str: 'cost'
#  use_cost: True
#  reward_gamma: 0.99
#  cost_gamma: 0.99  # no cost
#  dont_normalize_obs: True
#  dont_normalize_reward: True
#  dont_normalize_cost: True  # cost
#  record_info_names: ['x_position', 'y_position']
#  record_info_input_dims: [ 1, 0 ]  # height, width
#  visualize_info_ranges: [ [ 0, 6 ], [ 0, 6 ]  ]  #  [ [ 0, 7 ], [ 0, 7 ]  ]
#
#running:
#  n_iters: 10
#  n_eval_episodes: 10
#  save_every: 1
#  expert_rollouts: 20
#  sample_rollouts: 20
#  store_sample_num: null
#  expert_path: '../data/expert_data/WallGridWorld-setting1/'
#  use_buffer: True
#  store_by_game: True
#  store_sample_rollouts: 1000
#
#iteration:
#  warmup_timesteps: 0
#  forward_timesteps: 10
#  stopping_threshold: 0.001
#  max_iter: 10
#  penalty_initial_value: 0.1
#  penalty_learning_rate: 0.1
#  gamma: 0.7
#  reset_policy: False
#  reset_every: null
#  nu_max_clamp: 1
#  nu_min_clamp: null
#
#CN:
#  cn_learning_rate: 0.0003
#  cn_reg_coeff: 0.1
#  cn_layers: [64, 64, 64]
#  cn_batch_size: 64
#  cn_obs_select_name: null  # null means all
#  cn_acs_select_name: [-1]  # [-1] means none
#  no_importance_sampling: True
#  per_step_importance_sampling: False
#  clip_obs: 20
#  cn_normalize: False
#  cn_target_kl_old_new: 10
#  cn_target_kl_new_old: 10
#  train_gail_lambda: True
#  cn_eps: 0.00001
#  backward_iters: 20
#  anneal_clr_by_factor: 1.0
#  recon_obs: True


task: 'Binary-WallGrid'
group: 'Binary'
device: 'cpu'
verbose: 2
env:
  config_path: '../mujuco_environment/custom_envs/envs/configs/WGW-setting2-prob-99e-2.yaml'
  train_env_id : 'WGW-v0'
  eval_env_id: 'WGW-v0'
  save_dir: '../save_model'
  cost_info_str: 'cost'
  use_cost: True
  reward_gamma: 0.99
  cost_gamma: 0.99  # no cost
  dont_normalize_obs: True
  dont_normalize_reward: True
  dont_normalize_cost: True  # cost
  record_info_names: ['x_position', 'y_position']
  record_info_input_dims: [ 1, 0 ]  # height, width
  visualize_info_ranges: [ [ 0, 6 ], [ 0, 6 ]  ]  #  [ [ 0, 7 ], [ 0, 7 ]  ]

running:
  n_iters: 10
  n_eval_episodes: 10
  save_every: 1
  expert_rollouts: 20
  sample_rollouts: 20
  store_sample_num: null
  expert_path: '../data/expert_data/WallGridWorld-setting2/'
  use_buffer: True
  store_by_game: True
  store_sample_rollouts: 1000

iteration:
  warmup_timesteps: 0
  forward_timesteps: 10
  stopping_threshold: 0.001
  max_iter: 10
  penalty_initial_value: 0.1
  penalty_learning_rate: 0.1
  gamma: 0.7
  reset_policy: False
  reset_every: null
  nu_max_clamp: 1
  nu_min_clamp: null

CN:
  cn_learning_rate: 0.003
  cn_reg_coeff: 0.1
  cn_layers: [64, 64, 64]
  cn_batch_size: 64
  cn_obs_select_name: null  # null means all
  cn_acs_select_name: [-1]  # [-1] means none
  no_importance_sampling: True
  per_step_importance_sampling: False
  clip_obs: 20
  cn_normalize: False
  cn_target_kl_old_new: 10
  cn_target_kl_new_old: 10
  train_gail_lambda: True
  cn_eps: 0.00001
  backward_iters: 20
  anneal_clr_by_factor: 1.0
  recon_obs: True


#task: 'Binary-WallGrid'
#group: 'Binary'
#device: 'cpu'
#verbose: 2
#env:
#  config_path: '../mujuco_environment/custom_envs/envs/configs/WGW-setting4-prob-99e-2.yaml'
#  train_env_id : 'WGW-v0'
#  eval_env_id: 'WGW-v0'
#  save_dir: '../save_model'
#  cost_info_str: 'cost'
#  use_cost: True
#  reward_gamma: 0.99
#  cost_gamma: 0.99  # no cost
#  dont_normalize_obs: True
#  dont_normalize_reward: True
#  dont_normalize_cost: True  # cost
#  record_info_names: ['x_position', 'y_position']
#  record_info_input_dims: [ 1, 0 ]  # height, width
#  visualize_info_ranges: [ [ 0, 6 ], [ 0, 6 ]  ]  #  [ [ 0, 7 ], [ 0, 7 ]  ]
#
#running:
#  n_iters: 10
#  n_eval_episodes: 10
#  save_every: 1
#  expert_rollouts: 20
#  sample_rollouts: 20
#  store_sample_num: null
#  expert_path: '../data/expert_data/WallGridWorld-setting4/'
#  use_buffer: True
#  store_by_game: True
#  store_sample_rollouts: 1000
#
#iteration:
#  warmup_timesteps: 0
#  forward_timesteps: 10
#  stopping_threshold: 0.001
#  max_iter: 10
#  penalty_initial_value: 0.1
#  penalty_learning_rate: 0.1
#  gamma: 0.7
#  reset_policy: False
#  reset_every: null
#  nu_max_clamp: 1
#  nu_min_clamp: null
#
#CN:
#  cn_learning_rate: 0.003
#  cn_reg_coeff: 0.1
#  cn_layers: [64, 64, 64]
#  cn_batch_size: 64
#  cn_obs_select_name: null  # null means all
#  cn_acs_select_name: [-1]  # [-1] means none
#  no_importance_sampling: True
#  per_step_importance_sampling: False
#  clip_obs: 20
#  cn_normalize: False
#  cn_target_kl_old_new: 10
#  cn_target_kl_new_old: 10
#  train_gail_lambda: True
#  cn_eps: 0.00001
#  backward_iters: 20
#  anneal_clr_by_factor: 1.0
#  recon_obs: True