2dQuadrotorCoupledDynamics:
  steps: 200000  # Number of training steps
  train_iters: 10 # Number of models per configuration
  n_eval_ep: 10 # Number of deployments/evaluations per trained model
  max_episode_steps: 250 # Max number of steps per episode

  A_pkl: 'matlab/A_longquadrotor.pkl'
  b_pkl: 'matlab/b_longquadrotor.pkl'
  vertices_csv: 'matlab/vertices_LongQuadrotor.csv'
  S_ctrl_csv: 'matlab/S_ctrl_LongQuadrotor.csv'
  S_RCI_csv: 'matlab/S_RCI_LongQuadrotor.csv'

  randomize_env: True
  punishment: -0.1
  find_seeds: False
  safe_center_obs: False
  log_polytope_space: False

  # gravity: 9.81
  # K: 0.63571428571428578724322733251028694212436676025390625 # 0.89/1.4
  dt: 0.05
  noise_bound: 0.08 
  action_space_area_eq: 1.2562494271831113 # Volume of the action space in the equilibrium state, computed in with zonotopes for [0,1,0,0,0,0]

  u_space: [[6.834830643179076, 6.834830643179076],
            [8.596630030978226,  8.596630030978226]]

  # x: [x, z, dx, dz, theta, dtheta]
  x_goal: [0, 1, 0, 0, 0, 0]
  x_halfspace: [0, 1, 0, 0, 0, 0]
  x_lim_low: [-1.7, 0.3, -0.8, -1, -0.261799387799149407829446545292739756405353546142578125, -1.5707963267948965579989817342720925807952880859375] # [-1.7, 0.3, -0.8, -1, -pi/12, -pi/2]
  x_lim_high: [1.7, 2.0, 0.8, 1.0, 0.261799387799149407829446545292739756405353546142578125, 1.5707963267948965579989817342720925807952880859375] # [1.7, 2.0, 0.8, 1.0, pi/12, pi/2]
  
  noise_vector: [1, 1] # [x, z] essentially, indicating the number of dimensions of the noise vector -> will get multiplied by noise_bound
  noise_set: [[1, 1], 
              [-1, 1],
              [1, -1],
              [-1, -1]] # essentially, indicating dimensions of the noise set -> will get multiplied by noise_bound

  # G: [[0.0125, 0.0015], 
  #     [0.0125, -0.0015]] # G Input Zonotope -> has to be of shape (n, m) where n is the number of inputs and m the number of generators
  # G: [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
  #      [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]
  # G: [[1, 0], 
  #     [0, 1]] # eye
  # G: [[1, 0], 
  #   [1, 1]] # parallelogram
  G: [[1, 1, 1], 
      [1, -1, 0]] # hexagon
  # G: [[0.125, 0.015], 
  #     [0.125, -0.015]] # times 10

  algorithms:
    DQN:
      learning_rate: 0.0001
      learning_starts: 100
      batch_size: 64
      gamma: 0.99999
      train_freq: 2
      gradient_steps: 4
      max_grad_norm: 100
      activation_fn: "tanh"
      network_size: 64
      target_update_interval: 1000
      exploration_final_eps: 0.004
      exploration_fraction: 0.00003
      exploration_initial_eps: 0.137

    TD3:
      learning_rate: 0.002
      buffer_size: 100000
      batch_size: 512
      gamma: 0.98
      train_freq: 5
      gradient_steps: 10
      noise_type: 'normal'
      noise_std: 0.12
      network_size: 64
      activation_fn: "relu"

    PPO:
      batch_size: 128  # increase
      n_steps: 1024  # increase
      gamma: 0.99
      learning_rate: 0.004342
      ent_coef: 0.050771
      clip_range: 0.2
      n_epochs: 8
      gae_lambda: 0.95
      network_size: 256
      activation_fn: "relu"
      normalize_advantage: true
      log_std_init: -1.2509

    SAC:
      learning_rate: 0.0003
      buffer_size: 500000
      batch_size: 512
      ent_coef: 0.1
      train_freq: 32
      gradient_steps: 32
      gamma: 0.98
      tau: 0.01
      learning_starts: 1000
      use_sde: true
      network_size: 64
      log_std_init: -3.67

    A2C:
      ent_coef: 0.0
      max_grad_norm: 0.5
      n_steps: 64
      gae_lambda: 0.9
      vf_coef: 0.4
      gamma: 0.9
      use_rms_prop: true
      normalize_advantage: false
      learning_rate: 0.00004
      network_size: 64
      log_std_init: -3.67
