general:
  name: SGT
#  scenario: 'point_robot_easy'
#  scenario: 'point_robot_easy2'
#  scenario: 'point_robot_easy2_transposed'
#  scenario: 'point_robot_box_0.2'
#  scenario: 'point_robot_box_0.8'
#  scenario: 'point_robot_box_1.6'
#  scenario: 'point_robot_corridor'
#  scenario: 'point_robot_hard_corridors'
#  scenario: panda_no_obs
#  scenario: panda_no_obs_fixed_start
#  scenario: panda_easy
#  scenario: panda_easy_fixed_start
#  scenario: panda_hard
#  scenario: panda_poles
#  scenario: disks_0.
#  scenario: disks_0.1
#  scenario: disks_1.
  scenario: Panda
  gpu_usage: 0.05
  training_cycles_per_level: [1077, 1077, 1077, 1077, 1077]
  train_episodes_per_cycle: 30
#  train_episodes_per_cycle: 120
#  train_episodes_per_cycle: 300
  save_every_cycles: 1000000
  cycles_per_trajectory_print: 1000000
  test_frequency: 3
#  test_frequency: 1
  limit_workers:
  eps: 20.

cost:
  collision_cost: 100.0
  is_constant_collision_cost: False
  free_cost: 1.0
  is_constant_free_cost: False
  type: 'linear'
#  type: 'huber'
#  type: 'square'
  huber_loss_delta: 1.0

model:
  levels: 5
  starting_level: 1
  init_from_lower_level: True
#  init_from_lower_level: False
  batch_size: 20000
  reset_best_every: 0
#  reset_best_every: 50
#  decrease_learn_rate_if_static_success: 40
  decrease_learn_rate_if_static_success: 1000
  restore_on_decrease: True
#  restore_on_decrease: False
  stop_training_after_learn_rate_decrease: 1
#  gain: 'full-traj'
  gain: 'future-only'
  repeat_train_trajectories: 10
#  repeat_train_trajectories: 40
  consecutive_optimization_steps: 1

policy:
  learning_rate: 0.005
  learning_rate_decrease_rate: 1.
#  learning_rate_decrease_rate: 0.8
  learning_rate_minimum: 0.0005
  gradient_limit: 0.0
#  gradient_limit: 100.0
  gradient_limit_quantile:
#  gradient_limit_quantile: 0.5
  gradient_history_limit: 0
#  gradient_history_limit: 100
#  include_middle_state_as_input: True
  include_middle_state_as_input: False
#  layers: [5, 5]
  layers: [64, 64]
#  activation: 'elu'
#  activation: 'relu'
  activation: 'tanh'
  base_std: 0.05
  decrease_std_every: 100
  std_decrease_rate: 1.0
  distance_adaptive_std: False
#  distance_adaptive_std: True
#  learn_std: False
  learn_std: True
  max_entropy_coefficient: 1.
  #  bias_activation_is_tanh: True
  bias_activation_is_tanh: False
  bias_around_midpoint: True
  #  bias_around_midpoint: False
#  ppo_epsilon: 0.1
  ppo_epsilon: 0.2
#  ppo_epsilon: 0.5

curriculum:
#  use: True
  use: False
  times_std_start_coefficient: 2.
  raise_times: 1.1
  raise_when_train_above: 0.95

gradient_checker:
#  enable: True
  enable: False
  gradient_points_to_sample: 100
