# @package _global_

left: -1
right: 1
penalty: 0.5
value_ratio: 0.5
agent.actor_lr: 0.00003
agent.init_temp: 0.005
env:
  name: Walker2d-v2
  demo: Walker2d-v2_d4rl.pkl
  # learn_steps: 1e5
  # eval_interval: 1e3

  replay_mem: 1e6
  # initial_mem: 10000

  eps_steps: 100000
  eps_window: 10
  learn_steps: 5e5
  eval_interval: 5e3

expert:
  demos: 1
  subsample_freq: 1

eval:
  policy: 
  threshold: 5000

agent:
  name: sac

log_interval: 500  # Log every this many steps
num_actor_updates: 1

train:
  use_target: true
  soft_update: true
  batch: 256

q_net:
  _target_: agent.sac_models.SingleQCritic