critic:
  layers:
    - 256
    - 256
  activation_fn: 'relu'
  final_fn: 'linear'

actor:
  layers:
    - 256
    - 256
  activation_fn: 'relu'

learning_starts: 5_000

policy_lr: 3e-4
q_lr: 1e-3
batch_size: 256
max_grad_norm: 0.5 # NOTE: isn't included in default implementations

gamma: 0.99
tau: 0.005
policy_freq: 2
target_freq: 1 # target networks update frequency

# entropy regularization coeff
alpha: 0.2
alpha_autotune: true

buffer:
  size: 1_000_000
  optimize_memory: false

emit_freq: 100