exp_name: ''
project_name: p-goal-prox

cuda_deterministic: False
device: cuda:0

gamma: 0.99
seed: 0
# pretrain: null
prefix: debug
wand: True # whether to use wandb
pretrain: ~/projects/iq_learn/saved_model/soft_q/softq_iq_MiniGrid-FourRooms-v0_3225
video_save_dir:  ~/projects/iq_learn/video_logs/
num_seed_steps: 0 # Don't need seeding for IL (Use 1000 for RL)
only_expert_states: False

exp:
  gamma_scale: False
  gamma: 0

train:
  batch: 32
  use_target: False
  soft_update: False

expert:
  demos: 1
  subsample_freq: 1

eval:
  policy: 
  threshold:
  use_baselines: False
  eps: 10
  transfer: False
  expert_env: ''
  video_eval_interval: 10000

env:
  replay_mem: 50000
  initial_mem: 1280
  eps_steps: 1000
  eps_window: 100
  learn_steps: 5e5
  eval_interval: 5e3

  # use pixels
  from_pixels: False

method:
  type: iq

# Extra args
log_interval: 100  # Log every this many steps
log_dir: "~/projects/iq_learn/logs/"
save_interval: 5 # Save networks every this many epochs
hydra_base_dir: "~/projects/iq_learn/hydra/"
eval_only: False
if_debug: False

# Do offline learning
offline: False
# Number of actor updates per env step
num_actor_updates: 1

defaults:
  - method: iq
  - agent: softq
  - env: cartpole

hydra:
  run:
    dir: "~/projects/iq_learn/outputs/"

reward_gen:
  model:
    lr: 1e-3
    type: "basic" # 'basic' or 'iqgen' or 'dist' or 'reg'
    load_path: null
    reward_rescale: null # [-1, 1], [0, 1] 
    basic:
      input_config: 'sas' # 'sa', 'ss', 'sas'
    dist:
      type: "proximity" # 'iq_critic' or 'proximity'

      # for proximity
      num_ensembles: 5
      panel_var: True
      mse_before_mean: False
      var_for_both_train_and_eval: True
    reg:
      type: "dist_constraint" # "none" or "dist_constraint" or "dim_reduction"
      coef: 1e-3 
    
  train:
    batch: 32
    learn_steps: 50000
    eval_interval: 100
    log_interval: 100
    save_interval: 100 # Save networks every this many epochs
    video_eval_interval: 1000 # gen_videos
    add_online_data: True
    online_data_size: 500000 # original dataset size = 13556
    online_rand_threshold: 1.0
    
expert_gen:
  num_episodes: 1000
