total_timesteps: 1_000_000
net_arch:
- 128
- 128
activation_fn: 'nn.Tanh'
learning_rate: 0.2e-3
n_steps: 50
gamma: 0.9
gae_lambda: 1.0

estimator_kwargs:
  use_rms_prop: False
  is_importance_weight: True
  decay_burnin: 0.2
  max_grad_norm: 0.5
  loops_per_train: 10
  buffer_size: 10
  ent_coef: 0.0
  prior_sd: 0.01
  sparse_sd: 0.001
  sparse_ratio: 0.5
  obs_sd: 0.1
  actor_scheduler: 
    power: 0.5
    steps: 1e3
  critic_scheduler:
    power: 0.0
    steps: 5e3

optimizer_kwargs:
  pseudo_population: 10000
  alpha: 1.0
  power: 0.0

callback_kwargs:
  ensemble_size: 1000
  n_test: 200
  burn_in: 0.2
  eval_freq: 10000

env_kwargs:
  is_legal_action: True
  noise_sd: 0.1
  random_init: False