defaults:
  # seed for random number generator
  seed: 0
  # training configurations
  train_cfgs:
    # device to use for training, options: cpu, cuda, cuda:0, cuda:0,1, etc.
    device: cpu
    # number of threads for torch
    torch_threads: 16
    # number of vectorized environments
    vector_env_nums: 1
    # number of parallel agent, similar to a3c
    parallel: 1
    # total number of steps to train
    total_steps: 10000000
  # algorithm configurations
  algo_cfgs:
    # number of steps to update the policy
    steps_per_epoch: 20000
    # number of iterations to update the policy
    update_iters: 40
#    update_iters: 20
    # batch size for each iteration
#    batch_size: 64
    batch_size: 100
    # target kl divergence
    target_kl: 0.02
    # entropy coefficient
#    entropy_coef: 0.0
    entropy_coef: 0.01
#    entropy_coef: 0.005
    # normalize reward
    reward_normalize: True
    # normalize cost
    cost_normalize: False
    # normalize observation
    obs_normalize: True
    hidden_obs_normalize: True
    # early stop when kl divergence is bigger than target kl
    kl_early_stop: True
    # use max gradient norm
    use_max_grad_norm: True
    # max gradient norm
#    max_grad_norm: 40.0
    max_grad_norm: 0.5
    # use critic norm
    use_critic_norm: True
    # critic norm coefficient
    critic_norm_coef: 0.001
    # reward discount factor
    gamma: 0.99
    # cost discount factor
    cost_gamma: 0.99
    # lambda for gae
    lam: 0.95
    # lambda for cost gae
    lam_c: 0.95
    # clip ratio
    clip: 0.2
    # advantage estimation method, options: gae, retrace
    adv_estimation_method: gae
    # standardize reward advantage
    standardized_rew_adv: True
    # standardize cost advantage
    standardized_cost_adv: True
    # penalty coefficient
    penalty_coef: 0.0
    # use cost
    use_cost: True
    # lagrange window length
    lagrange_window: 50
  # logger configurations
  logger_cfgs:
    # use wandb for logging
    use_wandb: False
    # wandb project name
    wandb_project: omnisafe
    # use tensorboard for logging
    use_tensorboard: True
    # save model frequency
    save_model_freq: 100
    # save logger path
    log_dir: "./runs"
    # save model path
    window_lens: 100
    log_trajs: False
  # model configurations
  model_cfgs:
    # weight initialization mode
    weight_initialization_mode: "kaiming_uniform"
#    weight_initialization_mode: "xavier_uniform"
    # actor type, options: gaussian, gaussian_learning
    actor_type: gaussian_learning
    # linear learning rate decay
    linear_lr_decay: True
    # exploration noise anneal
    exploration_noise_anneal: False
    # std upper bound, and lower bound
    std_range: [0.5, 0.1]
    # actor network configurations
    actor:
      # obs encoder layer sizes
      obs_encoder: []
      h_encoder: []
      # hidden layer sizes
      hidden_sizes: [64, 64]
      # activation function
#      activation: tanh
      activation: relu
      obs_encoder_activation: identity
      h_encoder_activation: identity
      # out_activation: tanh
      # learning rate
      # TEST
      lr: 0.0003
#      lr: 0.0001
    critic:
      # obs encoder layer sizes
      obs_encoder: []
      # hidden layer sizes
      hidden_sizes: [64, 64]
      # activation function
#      activation: tanh
      activation: relu
      obs_encoder_activation: identity
      h_encoder_activation: identity
      out_activation: identity
      # learning rate
      lr: 0.0003
    cost_critic:
      # obs encoder layer sizes
      obs_encoder: []
      h_encoder: []
      # hidden layer sizes
      hidden_sizes: [64, 64]
      # activation function
#      activation: tanh
      activation: relu
      obs_encoder_activation: identity
      h_encoder_activation: identity
      out_activation: identity
      # learning rate
#      lr: 0.0003
      # TEST
      lr: 0.0003
      cost_normalize: True
      # use critic norm
      use_critic_norm: True
      # critic norm coefficient
      critic_norm_coef: 0.001
    classifier:
      pt_file: null
      pt_model_type: "DistributionGRU"
      train_dataset: null
      test_dataset: null
      hidden_dim: 16
      stack_layer: 2
      decoder_arch: [64, 64]
#      loc_offset: 3.0
#      log_std_offset: 0.0
#      lr: 0.001
      lr: 0.0003
      batchsize: 128
      dropout: 0.1
#      retrain_min_abs_cv: 0.005
#      retrain_num_traj: 2
      retrain_traj_prop: 0.1
      max_retrain_epoch: 5
#      max_retrain_epoch: 1
      retrain_target_acc: 0.975
#      min_retrain_trajs: 40
      min_retrain_trajs: 20
#      min_retrain_trajs: 6
      random_retrain_traj: null
#      save_dir: null
  # lagrangian configurations
  lagrange_cfgs:
    # Tolerance of constraint violation
#    cost_limit: 0.0513
#    cost_limit: 0.1054
    cost_limit: -0.9
    # Initial value of lagrangian multiplier
    lagrangian_multiplier_init: 0.001
#    lagrangian_multiplier_init: 1.0
    # Learning rate of lagrangian multiplier
    lambda_lr: 0.035
#    lambda_lr: 0.07
    # Type of lagrangian optimizer
    lambda_optimizer: "Adam"
#    use_max_grad_norm: True
#    max_grad_norm: 1.0
  # environment specific configurations
  env_cfgs: {}

SafetyHopperVelocity-v1:
  train_cfgs:
    # number of vectorized environments
    vector_env_nums: 20
  # model configurations
  model_cfgs:
    actor:
      lr: 0.0001
    cost_critic:
      critic_norm_coef: 0.01
    classifier:
      pt_file: "/dummypath/SafetyHopperVelocity-v1_DistributionGRU_4_2_128.pt"
      train_dataset: "/dummypath/SafetyHopperVelocity-v1_traindataset.pt"
      test_dataset: "/dummypath/SafetyHopperVelocity-v1_testdataset.pt"
      hidden_dim: 4
      lr: 0.0003
      dropout: 0.0
#      retrain_traj_prop: 0.05
#      random_retrain_traj: 0.05
#      random_retrain_traj: 0.35
#      retrain_min_abs_cv: 0.0
  # environment specific configurations
  env_cfgs: {terminate_when_unhealthy: False}

SafetyHalfCheetahVelocity-v1:
  train_cfgs:
    # number of vectorized environments
    vector_env_nums: 20
  # model configurations
  model_cfgs:
    actor:
      lr: 0.0001
    cost_critic:
      #      lr: 0.0001
      critic_norm_coef: 0.01
    classifier:
      pt_file: "/dummypath/SafetyHalfCheetahVelocity-v1_DistributionGRU_4_2_128.pt"
      train_dataset: "/dummypath/SafetyHalfCheetahVelocity-v1_traindataset.pt"
      test_dataset: "/dummypath/SafetyHalfCheetahVelocity-v1_testdataset.pt"
      hidden_dim: 4
      lr: 0.003
      dropout: 0.0
#      retrain_traj_prop: 0.05
#      random_retrain_traj: 0.05
#      random_retrain_traj: 0.35
#      retrain_min_abs_cv: 0.0

SafetyWalker2dVelocity-v1:
  train_cfgs:
    # number of vectorized environments
    vector_env_nums: 20
  # model configurations
  model_cfgs:
    actor:
      lr: 0.0001
    cost_critic:
      critic_norm_coef: 0.01
    classifier:
      pt_file: "/dummypath/SafetyWalker2dVelocity-v1_DistributionGRU_4_2_128.pt"
      train_dataset: "/dummypath/SafetyWalker2dVelocity-v1_traindataset.pt"
      test_dataset: "/dummypath/SafetyWalker2dVelocity-v1_testdataset.pt"
      hidden_dim: 4
      lr: 0.003
      dropout: 0.0
#      retrain_traj_prop: 0.05
#      random_retrain_traj: 0.05
#      min_retrain_trajs: 10
#      random_retrain_traj: 0.6
#      retrain_min_abs_cv: 0.0
  # environment specific configurations
  env_cfgs: {terminate_when_unhealthy: False}

SafetyAntVelocity-v1:
  train_cfgs:
    # number of vectorized environments
    vector_env_nums: 20
  # model configurations
  model_cfgs:
    actor:
      lr: 0.0001
    cost_critic:
      critic_norm_coef: 0.01
    classifier:
      pt_file: "/dummypath/SafetyAntVelocity-v1_DistributionGRU_4_2_128.pt"
      train_dataset: "/dummypath/SafetyAntVelocity-v1_traindataset.pt"
      test_dataset: "/dummypath/SafetyAntVelocity-v1_testdataset.pt"
      hidden_dim: 4
      lr: 0.003
      dropout: 0.0
#      retrain_traj_prop: 0.05
#      random_retrain_traj: 0.05
#      random_retrain_traj: 0.45
#      retrain_min_abs_cv: 0.0
  # environment specific configurations
  env_cfgs: {terminate_when_unhealthy: False}

SafetyPointCircle1-v0:
  train_cfgs:
    # number of vectorized environments
    vector_env_nums: 20
  # model configurations
  model_cfgs:
    # actor network configurations
    actor:
      lr: 0.0001
    #    critic:
    #      lr: 0.0001
    cost_critic:
      #      lr: 0.0001
      critic_norm_coef: 0.01
    classifier:
      pt_file: "/dummypath/SafetyPointCircle1-v0_DistributionGRU_4_2_128.pt"
      train_dataset: "/dummypath/SafetyPointCircle1-v0_traindataset.pt"
      test_dataset: "/dummypath/SafetyPointCircle1-v0_testdataset.pt"
      hidden_dim: 4
      lr: 0.003
      dropout: 0.0
#      retrain_min_abs_cv: 0.05
      retrain_traj_prop: 0.05
#      random_retrain_traj: 0.025
#      random_retrain_traj: 0.3
#      retrain_min_abs_cv: 0.0

SafetyPointCircle2-v0:
  train_cfgs:
    # number of vectorized environments
    vector_env_nums: 20
  # model configurations
  model_cfgs:
    # actor network configurations
    actor:
      lr: 0.0001
    #    critic:
    #      lr: 0.0001
    cost_critic:
      #      lr: 0.0001
      critic_norm_coef: 0.01
    classifier:
      pt_file: "/dummypath/SafetyPointCircle2-v0_DistributionGRU_4_2_128.pt"
      train_dataset: "/dummypath/SafetyPointCircle2-v0_traindataset.pt"
      test_dataset: "/dummypath/SafetyPointCircle2-v0_testdataset.pt"
      hidden_dim: 4
      lr: 0.003
      dropout: 0.0
#      retrain_min_abs_cv: 0.05
      retrain_traj_prop: 0.05
#      random_retrain_traj: 0.025
#      random_retrain_traj: 0.3
#      retrain_min_abs_cv: 0.0

SafetyCarCircle1-v0:
  train_cfgs:
    # number of vectorized environments
    vector_env_nums: 20
  # model configurations
  model_cfgs:
    # actor network configurations
    actor:
      lr: 0.0001
    #    critic:
    #      lr: 0.0001
    cost_critic:
      #      lr: 0.0001
      critic_norm_coef: 0.01
    classifier:
      pt_file: "/dummypath/SafetyCarCircle1-v0_DistributionGRU_4_2_128.pt"
      train_dataset: "/dummypath/SafetyCarCircle1-v0_traindataset.pt"
      test_dataset: "/dummypath/SafetyCarCircle1-v0_testdataset.pt"
      hidden_dim: 4
      lr: 0.003
      dropout: 0.0
#      retrain_min_abs_cv: 0.05
      retrain_traj_prop: 0.05
#      random_retrain_traj: 0.025
#      random_retrain_traj: 0.25
#      retrain_min_abs_cv: 0.0

SafetyCarCircle2-v0:
  train_cfgs:
    # number of vectorized environments
    vector_env_nums: 20
  # model configurations
  model_cfgs:
    # actor network configurations
    actor:
      lr: 0.0001
    #    critic:
    #      lr: 0.0001
    cost_critic:
      #      lr: 0.0001
      critic_norm_coef: 0.01
    classifier:
      pt_file: "/dummypath/SafetyCarCircle2-v0_DistributionGRU_4_2_128.pt"
      train_dataset: "/dummypath/SafetyCarCircle2-v0_traindataset.pt"
      test_dataset: "/dummypath/SafetyCarCircle2-v0_testdataset.pt"
      hidden_dim: 4
      lr: 0.003
      dropout: 0.0
#      retrain_min_abs_cv: 0.05
      retrain_traj_prop: 0.05
#      random_retrain_traj: 0.025
#      random_retrain_traj: 0.25
#      retrain_min_abs_cv: 0.0

SafetyAntRun-v0:
  # training configurations
  train_cfgs:
    # number of vectorized environments
    vector_env_nums: 10
    # total number of steps to train
    total_steps: 2000000
  # algorithm configurations
  algo_cfgs:
    # number of steps to update the policy
    steps_per_epoch: 2000
    update_iters: 20
  # model configurations
  model_cfgs:
    # actor network configurations
    actor:
      lr: 0.0001
    #    critic:
    #      lr: 0.0001
    cost_critic:
      #      lr: 0.0001
      critic_norm_coef: 0.01
    classifier:
      pt_file: "/dummypath/SafetyAntRun-v0_DistributionGRU_4_2_128.pt"
      train_dataset: "/dummypath/SafetyAntRun-v0_traindataset.pt"
      test_dataset: "/dummypath/SafetyAntRun-v0_testdataset.pt"
      hidden_dim: 4
      lr: 0.003
      dropout: 0.0
#      retrain_traj_prop: 0.05
#      random_retrain_traj: 0.05
#      min_retrain_trajs: 1
#      min_retrain_trajs: 50
#      random_retrain_traj: 0.65
#      retrain_min_abs_cv: 0.0
#      retrain_min_abs_cv: 0.05
#  lagrange_cfgs:
#    # Learning rate of lagrangian multiplier
#    lambda_lr: 0.01

SafetyBallRun-v0:
  # training configurations
  train_cfgs:
    # number of vectorized environments
    vector_env_nums: 10
    # total number of steps to train
    total_steps: 1000000
  # algorithm configurations
  algo_cfgs:
    # number of steps to update the policy
    steps_per_epoch: 1000
    update_iters: 20
#    entropy_coef: 0.001
  # model configurations
  model_cfgs:
    # actor network configurations
    actor:
#      hidden_sizes: [32, 32]
      lr: 0.0001
#    critic:
#      hidden_sizes: [32, 32]
#      lr: 0.0002
    cost_critic:
#      hidden_sizes: [32, 32]
#      lr: 0.0002
      critic_norm_coef: 0.01
    classifier:
      pt_file: "/dummypath/SafetyBallRun-v0_DistributionGRU_4_2_128.pt"
      train_dataset: "/dummypath/SafetyBallRun-v0_traindataset.pt"
      test_dataset: "/dummypath/SafetyBallRun-v0_testdataset.pt"
      hidden_dim: 4
      lr: 0.0003
      dropout: 0.0
#      retrain_traj_prop: 0.05
#      random_retrain_traj: 0.05
#  lagrange_cfgs:
#    # Learning rate of lagrangian multiplier
#    lambda_lr: 0.01

SafetyCarRun-v0:
  # training configurations
  train_cfgs:
    # number of vectorized environments
    vector_env_nums: 10
    # total number of steps to train
    total_steps: 2000000
  # algorithm configurations
  algo_cfgs:
    # number of steps to update the policy
    steps_per_epoch: 2000
    update_iters: 20
  # model configurations
  model_cfgs:
    # actor network configurations
    actor:
      lr: 0.0001
    #    critic:
    #      lr: 0.0001
    cost_critic:
      #      lr: 0.0001
      critic_norm_coef: 0.01
    classifier:
      pt_file: "/dummypath/SafetyCarRun-v0_DistributionGRU_4_2_128.pt"
      train_dataset: "/dummypath/SafetyCarRun-v0_traindataset.pt"
      test_dataset: "/dummypath/SafetyCarRun-v0_testdataset.pt"
      hidden_dim: 4
      lr: 0.003
      dropout: 0.0
#      retrain_traj_prop: 0.05
#      random_retrain_traj: 0.05
#      min_retrain_trajs: 1
#      min_retrain_trajs: 50
#      random_retrain_traj: 0.9
#      retrain_min_abs_cv: 0.0
#      retrain_min_abs_cv: 0.05
#  lagrange_cfgs:
#    # Learning rate of lagrangian multiplier
#    lambda_lr: 0.01

SafetyDroneRun-v0:
  # training configurations
  train_cfgs:
    # number of vectorized environments
    vector_env_nums: 10
    # total number of steps to train
    total_steps: 2000000
  # algorithm configurations
  algo_cfgs:
    # number of steps to update the policy
    steps_per_epoch: 2000
    update_iters: 20
  # model configurations
  model_cfgs:
    # actor network configurations
    actor:
      lr: 0.0001
#    critic:
#      lr: 0.0001
    cost_critic:
#      lr: 0.0001
      critic_norm_coef: 0.01
    classifier:
      pt_file: "/dummypath/SafetyDroneRun-v0_DistributionGRU_4_2_128.pt"
      train_dataset: "/dummypath/SafetyDroneRun-v0_traindataset.pt"
      test_dataset: "/dummypath/SafetyDroneRun-v0_testdataset.pt"
      hidden_dim: 4
      lr: 0.003
      dropout: 0.0
#      retrain_traj_prop: 0.05
#      random_retrain_traj: 0.05
#  lagrange_cfgs:
#    # Learning rate of lagrangian multiplier
#    lambda_lr: 0.01

SafeMetaDrive:
  # training configurations
  train_cfgs:
    # total number of steps to train
    total_steps: 500000
  # logger configurations
  logger_cfgs:
    # save model frequency
    save_model_freq: 15
  # algorithm configurations
  algo_cfgs:
    # batch size for each iteration
    batch_size: 256
    # entropy coefficient
    entropy_coef: 0.01
    # number of steps to update the policy
    steps_per_epoch: 3000
    # number of iterations to update the policy
    update_iters: 40
    # normalize reward
    reward_normalize: False
#    reward_normalize: True
    # normalize cost
    cost_normalize: True
    # normalize observation
    obs_normalize: False
    # max gradient norm
    max_grad_norm: 0.05
    # lagrange window length
    lagrange_window: 50
  # model configurations
  model_cfgs:
    # actor network configurations
    actor:
      # obs encoder layer sizes
      obs_encoder: []
      # hidden layer sizes
      hidden_sizes: [256, 256]
      # learning rate
      lr: 0.00005
    # critic network configurations
    critic:
      # hidden layer sizes
      hidden_sizes: [256, 256]
      # learning rate
      lr: 0.00005
  # environment specific configurations
  env_cfgs:
  # safe meta drive configurations. More details refer to https://github.com/decisionforce/EGPO
    meta_drive_config:
      # max iterations of interactions
      horizon: 1500
      # whether to use random traffic
      random_traffic: False
      # the penalty when crash into other vehicles
      crash_vehicle_penalty: 1.
      # the penalty when crash into other objects
      crash_object_penalty: 0.5
      # the penalty when out of road
      out_of_road_penalty: 1.
  # lagrangian configurations
  lagrange_cfgs:
    # Tolerance of constraint violation
    cost_limit: 0.0
    # Initial value of lagrangian multiplier
#    lagrangian_multiplier_init: 0.01
    lagrangian_multiplier_init: 1.0
    # Learning rate of lagrangian multiplier
    lambda_lr: 0.01
    # Type of lagrangian optimizer
    lambda_optimizer: "Adam"
    use_max_grad_norm: False
    max_grad_norm: 1.0