defaults:
  seed: 42
  param_range: 2.5
  num_envs: 8
  num_policies: 100000
  num_expected_eps: 10
  stats: ["length", "standard", "speed", "left", "height"]
  # stats: ["r_forward_standard","r_backward_standard","r_forward_position_sq","r_backward_position_sq","r_stand_eps","r_stand_gauss","r_jump_13","r_jump_15","r_jump_vel", "standard", "length"]
  #stats: ["speed", "rot_ccw", "rot_cw", "radial_speed"]
  chunk_size: 1000
  num_jobs: 8

  num_states_rej: 3000
  k_neighbors_rej: 15

  latent_dim: 2
  epochs_aut: 75
  batch_size: 64
  validation_split: 0.2
  n_states_per_net: 1000

  num_points: 40
  num_generations: 50
  center_learning_rate: 0.15
  popsize: 20
  stdev_learning_rate: 0.15
  stdev_init: 1.0
  update_type: "natural"
  target_space: "latent"
  eval_freq_steps: 2500
  num_runs: 3

keep_percentages: [0.1, 0.01]

policy:
  layer_shapes:
    small:
      - [6, 4, true]
      - [4, 2, true]
    medium:
      - [2, 32, true]
      - [32, 32, true]
      - [32, 1, true]
    medium_large:
      - [6, 64, true]
      - [64, 64, true]
      - [64, 2, true]
    large:
      - [6, 400, true]
      - [400, 300, true]
      - [300, 2, true]

  activation_func: elu
  last_activation_func: tanh
  learning_rate: 1e-4
  loss_func: mse

autoencoder:
  # input and output shapes depend on policy and latent dim
  encoder_layers_shapes: 
    - [-1, 25, true]
    - [25, 10, true]
    - [10, -1, true]
  decoder_layers_shapes:
    - [-1, 10, true]
    - [10, 25, true]
    - [25, -1, true]

  loss_func: mse
  activation_func: elu
  learning_rate: 0.0001

  training_args:
    lr_scheduler:
      type: plateau
      factor: 0.5
      patience: 15
    early_stopping: null

env: MountainCarContinuous-v0
#env: Hopper-v5
# env: Reacher-v5