exp_name: ''
project_name: p-goal-prox

cuda_deterministic: False
device: cuda:6

gamma: 0.99
seed: 0
pretrain: null

num_seed_steps: 0 # Don't need seeding for IL (Use 1000 for RL)
only_expert_states: False

train:
  batch: 32
  use_target: False
  soft_update: False

expert:
  demos: 1
  subsample_freq: 1

eval:
  policy: 
  threshold:
  use_baselines: False
  eps: 10
  transfer: False
  expert_env: ''

env:
  replay_mem: 50000
  initial_mem: 1280
  eps_steps: 1000
  eps_window: 100
  learn_steps: 5e5
  eval_interval: 5e3

  # use pixels
  from_pixels: False

method:
  type: iq

# Extra args
log_interval: 100  # Log every this many steps
log_dir: ~/projects/iq_learn/logs/
save_interval: 5 # Save networks every this many epochs
hydra_base_dir: "~/projects/iq_learn/hydra/"
eval_only: False
if_debug: False

# Do offline learning
offline: False
# Number of actor updates per env step
num_actor_updates: 1

defaults:
  - method: iq
  - agent: softq
  - env: cartpole

hydra:
  run:
    dir: "~/projects/iq_learn/outputs/"
  
