3dQuadrotor:
  steps: 50000  # Number of training steps
  train_iters: 10 # Number of models per configuration
  n_eval_ep: 10 # Number of deployments/evaluations per trained model
  max_episode_steps: 1000 # Max number of steps per episode

  S_ctrl_csv: 'matlab/S_ctrl_3DQuadrotor.csv'
  S_RCI_csv: 'matlab/S_RCI_3DQuadrotor.csv'
  K_ctrl_csv: 'matlab/K_ctrl_3DQuadrotor.csv'

  randomize_env: True
  punishment: -0.1
  find_seeds: False
  safe_center_obs: False
  log_polytope_space: False

  # gravity: 9.81
  # K: 0.63571428571428578724322733251028694212436676025390625 # 0.89/1.4
  dt: 0.1
  noise_bound: 0.0 #0.05
  action_space_area_eq: 1.0 # Volume of the action space in the equilibrium state TODO: to be computed
  
  u_space: [[-9.81, -0.5, -0.5, -0.5], 
            [2.38, 0.5, 0.5, 0.5]]

  # x: [x, y, z, dx, dy, dz, phi, theta, psi, dphi, dtheta, dpsi]
  x_goal: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
  x_lim_low: [-3.0, -3.0, -3.0, -3.0, -3.0, -3.0, -0.78539816, -0.78539816, -3.14159265, -3.0, -3.0, -3.0]
  x_lim_high: [3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 0.78539816, 0.78539816, 3.14159265, 3.0, 3.0, 3.0]
  noise_vector: [1, 1, 1] # [x, y, z] essentially, indicating the number of dimensions of the noise vector -> will get multiplied by noise_bound
  noise_set: [ [ 1, 1, 1],
               [ -1, 1, 1],
               [ 1, -1, 1],
               [ -1, -1, 1],
               [ 1, 1, -1 ],
               [ -1, 1, -1 ],
               [ 1, -1, -1 ],
               [ -1, -1, -1 ]]
  
  G: [[1, 0, 0, 0],
      [0, 1, 0, 0],
      [0, 0, 1, 0],
      [0, 0, 0, 1]] # G Input Zonotope -> has to be of shape (n, m) where n is the number of inputs and m the number of generators
#  G: [[1, 0, 0, 0, 0, 0],
#      [0, 1, 0, 0, 0, 1],
#      [0, 0, 1, 0, 1, 1],
#      [0, 0, 0, 1, 1, 1]] # G Input Zonotope -> has to be of shape (n, m) where n is the number of inputs and m the number of generators

  algorithms:
    DQN:
      learning_rate: 0.0001
      learning_starts: 100
      batch_size: 64
      gamma: 0.99999
      train_freq: 2
      gradient_steps: 4
      max_grad_norm: 100
      activation_fn: "tanh"
      network_size: 64
      target_update_interval: 1000
      exploration_final_eps: 0.004
      exploration_fraction: 0.00003
      exploration_initial_eps: 0.137

    TD3:
      learning_rate: 0.002
      buffer_size: 100000
      batch_size: 512
      gamma: 0.98
      train_freq: 5
      gradient_steps: 10
      noise_type: 'normal'
      noise_std: 0.12
      network_size: 64
      activation_fn: "relu"

    PPO:
      batch_size: 64  # hp tuning
      n_steps: 64  # hpt tuning
      gamma: 0.98
      learning_rate: 0.0007875347648919963  # hp tuning
      ent_coef: 2.7455785164029666e-06  # hp tuning
      clip_range: 0.1
      n_epochs: 4  # hp tuning
      gae_lambda: 0.9
      network_size: 32
      activation_fn: "relu"
      normalize_advantage: true
      log_std_init: -1.8799198657003664  # hp tuning

    SAC:
      learning_rate: 0.0003
      buffer_size: 500000
      batch_size: 512
      ent_coef: 0.1
      train_freq: 32
      gradient_steps: 32
      gamma: 0.98
      tau: 0.01
      learning_starts: 1000
      use_sde: true
      network_size: 64
      log_std_init: -3.67

    A2C:
      ent_coef: 0.0
      max_grad_norm: 0.5
      n_steps: 64
      gae_lambda: 0.9
      vf_coef: 0.4
      gamma: 0.9
      use_rms_prop: true
      normalize_advantage: false
      learning_rate: 0.00004
      network_size: 64
      log_std_init: -3.67
