# Environment-related
env: "walker2d" #
classdef:                         # The path to the environment class
d4rl_config: ???                  # E.g., "medium-expert-v2"
term_func: ${env.term_func}       # Termination function of the environment
reward_func: ${env.reward_func}
obs_preproc: rlkit.envs.env_processor.DefaultEnvProc.obs_preproc # Identity function.. Could use ${env.obs_preproc} instead
obs_postproc: rlkit.envs.env_processor.DefaultEnvProc.obs_postproc

replay_buffer_size: 1e6
collector_type: "step"      # 'step' -- one sample at a time, 'path' -- a path at a time
model_dir:                  # Specify the root directory to find for state_dicts

indep_var:
exp_name:

trainer_cfg:
  num_qfs: 2
  policy_lr: 3e-4
  qf_lr: 3e-4
  soft_target_tau: 5e-3
  target_update_period: 1
  use_automatic_entropy_tuning: true
  reward_scale: 1           # in SAC paper, it's tuned to 5, but rlkit uses 1
  discount: 0.99
  max_grad_norm:
  track_grad_norm: false
  eta: -1
  max_q_backup: false
