# config.yml
total_timesteps: 1_000_000
net_arch:
- 32
- 32
activation_fn: 'nn.ReLU'
learning_rate: 5e-5
train_every: 50
loops_per_train: 20
buffer_size: 10000
train_start: 1000
update_target_every: 1
batch_size: 300
exploration_fraction: 0.1
exploration_final_eps: 0.01

estimator_kwargs:
  power: 0.0

optimizer_kwargs:
  pseudo_population: 10000
  state_sd: 0.001
  obs_sd: 0.1
  alpha: 0.9

callback_kwargs:
  ensemble_size: 3000
  n_test: 200
  burn_in: 0.1
  eval_freq: 20000