GymParams:
  RobotParams:
    x_threshold: 0.5
    theta_dot_threshold: 15.0
    gravity: 9.8
    mass_cart: 0.94
    mass_pole: 0.23
    action_mag: 20
    length: 0.64
    friction_cart: 10.0
    friction_pole: 0.0011
    with_friction: true
  TaskParams:
    action_penalty: 0.01
    crash_penalty: 0.0
    ini_states:
    - 0.0
    - 0.0
    - 3.141592653589793
    - 0.0
    control_goal_x: 0.1
    control_goal_theta: 0.0
    max_episode_steps: 500
    evaluation_period: 10000
    num_episodes_to_run: 1
    task_reset_mode: random
    change_dynamics: false
    context_horizon: 10
    task_type: balance
    reward_type: exp
  SimulationParams:
    random_reset_train: true
    random_reset_eval: false
    num_action_repeat: 20
    sim_time_step: 0.001
    enable_rendering: false
    kinematics_integrator: euler
    render_mode: human
SACParams:
  agent_name: SAC
  mode: train
  soft_alpha: 0.005
  learning_rate_actor: 0.0003
  learning_rate_critic: 0.0003
  batch_size: 128
  target_network_frequency: 1
  noise_clip: 0.5
  entropy_alpha: 0.1
  gamma_discount: 0.99
  model_path: ''
  total_training_steps: 500000
  replay_buffer_size: 500000
  learning_starts: 128
  policy_update_frequency: 1
  use_layer_norm: false
  actor_hidden_units:
  - 256
  - 256
  critic_hidden_units:
  - 256
  - 256
  actor_activation: relu
  critic_activation: relu
  autosafe_lam_mode: opt
  sac_lam_mode: opt
PPOParams:
  agent_name: PPO
  mode: train
  learning_rate_actor: 0.0003
  learning_rate_critic: 0.0003
  max_grad_norm_actor: 0.5
  max_grad_norm_critic: 0.5
  buffer_size: 2048
  batch_size: 128
  entropy_coef: 0.01
  clip_range: 0.2
  gamma_discount: 0.99
  gae_lambda: 0.97
  ppo_epochs: 5
  model_path: ''
  total_training_steps: 500000
  use_layer_norm: false
  target_kl: 0.01
  actor_hidden_units:
  - 256
  - 256
  critic_hidden_units:
  - 256
  - 256
  actor_activation: relu
  critic_activation: relu
JobParams:
  gpu: false
  job_name: ''
  run_mode: train
  seed: 1
  capture_video: false
  output_path: ''
  experiment_name: 'sac_base'
  env_name: 'cartpole'
