# config.yml
total_timesteps: 3_000_000
net_arch:
- 32
- 32
activation_fn: 'nn.ReLU'
learning_rate: 1e-4
decay_lr_ratio: 1.0
train_every: 10
loops_per_train: 5
true_value_path: 'er001_discrete'
replay_memory_size: 1000
replay_memory_init_size: 1000
update_target_estimator_every: 1
batch_size: 30
ensemble_size: 3000
n_test: 200
exploration_rate: 0.01
exploration_decay_steps: 5000
n_cycles: 300
sampling_threshold: 0.01

optimizer_kwargs:
  pseudo_population: 600
  prior_sd: 0.5
  obs_sd: 0.1
  sparse_sd: 0.05
  sparse_ratio: 0.5
# optimizer_kwargs:
#   momentum: 0.0
#   temperature: 0.1