exp_name: ''
project_name: ${env.name}

cuda_deterministic: False
device: ??? # to be specified later

lamda: 0.98
gamma: 0.99
seed: 0
pretrain: null
irl_coef: 1000
clip_param: 0.2

num_seed_steps: 0 # Don't need seeding for IL (Use 1000 for RL)
only_expert_states: False

train:
  batch: 32
  use_target: False

expert:
  demos: 100
  subsample_freq: 1

eval:
  policy: 
  threshold:
  use_baselines: False
  eps: 10
  transfer: False
  expert_env: ''

env:
  replay_mem: 50000
  initial_mem: 1280
  eps_steps: 1000
  eps_window: 100
  learn_steps: 5e5
  eval_interval: 5e3

  # use pixels
  from_pixels: False

method:
  type: iq

# Extra args
log_interval: 100  # Log every this many steps
log_dir: logs/
save_interval: 5 # Save networks every this many epochs
hydra_base_dir: ""
eval_only: False

# Do offline learning
offline: False
# Number of actor updates per env step
num_actor_updates: 1

defaults:
  - method: iq
  - antagonist: sac
  - protagonist: sac_ppo
  - env: cartpole