GymParams:
  RobotParams: {}
  TaskParams:
    action_penalty: 0.01
    crash_penalty: 0.0
    ini_states:
    - 0.0
    - 0.0
    - 3.141592653589793
    - 0.0
    control_goal:
    - 0.0
    - 0.0
    max_episode_steps: 200
    evaluation_period: 10000
    num_episodes_to_run: 1
    task_reset_mode: random
    change_dynamics: false
  SimulationParams: {}
SACParams:
  agent_name: SAC
  mode: train
  soft_alpha: 0.005
  learning_rate_actor: 0.0003
  learning_rate_critic: 0.0003
  batch_size: 128
  target_network_frequency: 1
  noise_clip: 0.5
  entropy_alpha: 0.2
  gamma_discount: 0.99
  model_path: ''
  total_training_steps: 200000
  replay_buffer_size: 200000
  learning_starts: 2000
  policy_update_frequency: 1
  use_layer_norm: true
  actor_hidden_units:
  - 256
  - 256
  critic_hidden_units:
  - 256
  - 256
  actor_activation: relu
  critic_activation: relu
  autosafe_lam_mode: opt
  sac_lam_mode: opt
PPOParams:
  agent_name: PPO
  mode: train
  learning_rate_actor: 0.0003
  learning_rate_critic: 0.0003
  max_grad_norm_actor: 0.5
  max_grad_norm_critic: 0.5
  buffer_size: 2048
  batch_size: 128
  entropy_coef: 0.01
  clip_range: 0.2
  gamma_discount: 0.99
  gae_lambda: 0.97
  ppo_epochs: 5
  model_path: ''
  total_training_steps: 500000
  use_layer_norm: false
  target_kl: 0.01
  actor_hidden_units:
  - 256
  - 256
  critic_hidden_units:
  - 256
  - 256
  actor_activation: relu
  critic_activation: relu
JobParams:
  gpu: true
  job_name: 'glucose'
  run_mode: train
  seed: 1
  capture_video: false
  output_path: ''
  experiment_name: sac_autosafe
  env_name: 'glucose'
