task: 'ICRL-WallGrid'
group: 'GICRL'
device: 'cuda'
verbose: 2
env:
  config_path: '../mujuco_environment/custom_envs/envs/configs/WGW-setting2-prob-99e-2.yaml'
  train_env_id : 'WGW-v0'
  eval_env_id: 'WGW-v0'
  save_dir: '../save_model'
  cost_info_str: 'cost'
  use_cost: True
  reward_gamma: 0.99
  cost_gamma: 0.99  # no cost
  dont_normalize_obs: True
  dont_normalize_reward: True
  dont_normalize_cost: True  # cost
  record_info_names: ['x_position', 'y_position', 'obs']
  record_info_input_dims: [ 1, 0 ]  # height, width
  visualize_info_ranges: [ [ 0, 6 ], [ 0, 6 ]  ]  #  [ [ 0, 7 ], [ 0, 7 ]  ]
#  noise_mean: 0
#  noise_std: 0.1

running:
  n_iters: 10
  n_eval_episodes: 10
  save_every: 1
  expert_rollouts: 20
  sample_rollouts: 20
  store_sample_num: null
  expert_path: '../data/expert_data/WallGridWorld-setting2/'
  use_buffer: True
  store_by_game: True
  store_sample_rollouts: 1000

iteration:
  warmup_timesteps: 0
  forward_timesteps: 10
  stopping_threshold: 0.001
  max_iter: 10  #5
  penalty_initial_value: 0.1
  penalty_learning_rate: 0.1  #0.05
  gamma: 0.7
  reset_policy: False
  reset_every: null
  nu_max_clamp: 1
  nu_min_clamp: null

CN:
  cn_learning_rate: 0.003  #0.003
  cn_reg_coeff: 0.1
  cn_layers: [64, 64, 64]
  cn_batch_size: null
  cn_obs_select_name: null  # null means all
  cn_acs_select_name: [-1]  # [-1] means none
  no_importance_sampling: False
  per_step_importance_sampling: False
  clip_obs: 20
  cn_normalize: False
  cn_target_kl_old_new: 10
  cn_target_kl_new_old: 10
  train_gail_lambda: False
  cn_eps: 0.00001
  backward_iters: 20
  anneal_clr_by_factor: 1.0
  recon_obs: True

Distributional:
  method: QRDQN
  N_quantiles: 32
  cost_quantile: 8
  tau_update: 0.01
  LR_QN: 0.0003
  qnet_layers: [64, 64]
  type: Expectation
  prob_yita: null
  recon_obs: True
  weight: 0

gflownet:
  gflownet_sizes: [128,128]
  gflownet_lr: 0.0003
  generated_weight: 0.001
  generated_mode: traj