# @package _global_

env:
  name: dst_d-v0
  demo: ["long_dst_d-v0_10.pkl", "mid_dst_d-v0_10.pkl", "short_dst_d-v0_10.pkl"]
  learn_steps: 1e6
  eval_interval: 1e3
  eps_steps: 50

  replay_mem: 1e6
  eps_window: 10
  is_mogym: true
  render: false

expert:
  demos: 10
  subsample_freq: 1

eval:
  policy:
  threshold: 

agent:
  name: sac
  preference: 
  preferences:
  reward_dim: 2

log_interval: 500  # Log every this many steps
num_actor_updates: 1

train:
  use_target: true
  soft_update: true
  batch: 256

q_net:
  _target_: agent.sac_models.DoubleQCritic