# config.yml
total_timesteps: 1_000_000
net_arch:
- 32
- 32
activation_fn: 'nn.ReLU'
learning_rate: 1e-5
decay_lr_ratio: 1.0
train_every: 100
loops_per_train: 5
true_value_path: 'er001_discrete'
replay_memory_size: 1000
replay_memory_init_size: 1000
update_target_estimator_every: 1
batch_size: 300
ensemble_size: 3000
n_test: 200
exploration_rate: 0.01
exploration_decay_steps: 5000
n_cycles: 100
sampling_threshold: 0.01

optimizer_kwargs:
  momentum: 0.0
  temperature: 1.0
# prior_sd: 0.5
# sparse_sd: 0.05
# sparse_ratio: 0.5
