critic:
  layers:
    - 256
    - 256
  activation_fn: 'relu'
  final_fn: 'linear'

actor:
  layers:
    - 256
    - 256
  activation_fn: 'relu'
  final_fn: 'tanh'

learning_starts: 25_000

lr: 3e-4
batch_size: 256
max_grad_norm: 0.5 # NOTE: isn't included in default implementations

gamma: 0.99
tau: 0.005
exploration_noise: 0.1
policy_freq: 2

buffer:
  size: 1_000_000
  optimize_memory: false # should be `false` for checkpoint to contain only observations processed

emit_freq: 100