env:
  _target_: 'envs.safetygym_point_goal.SafetyGymGoalWrapper'
  render_mode: 'human' # if set to 'human' it will render live
classes:
  continuous: 'envs.safetygym_point_goal.SafetyGymGoalWrapper'
logger:
  dir_name: safety_gym
ltl:
  autobuild: False
  formula: "G(F(r & X(F(g) & X(F(p) & X(F(b))))))"
  oa_type: ldba
  rabinizer: ./rabinizer4/bin/ltl2ldba
argus:
  formula: "G (F (r >= 0.0) && F (g >= 0.0) && F (b >= 0.0) && F (p >= 0.0))"
  bhnr_formula: "F (r >= 0.0) && F (g >= 0.0) && F (b >= 0.0) && F (p >= 0.0)"
  stl_window: 700  # BHNR horizon window size
ppo:
  K_epochs: 5              # update policy for K epochs in one PPO update
  batch_size: 128               # update policy for K epochs in one PPO update
  off_policy: True
  eps_clip: 0.4          # clip parameter for PPO
  lr_actor: 0.0025      # learning rate for actor network
  lr_critic: 0.0125       # learning rate for critic network
  update_timestep: 1 # T * 4
  update_freq__n_episodes: 3
  n_traj: 5000
  var_denominator: 1
  alpha: 0.3 # importance weight to entropy term
  T: 700
testing:
  testing_freq__n_episodes: 25
  num_rollouts: 5
gamma: .99
n_grad_updates: 1
delta: .1
n_seeds: 3
init_seed: 98
replay_buffer_size: 500
lambda: 400
lambda_qs: 200.0
mdp_multiplier: 1.0
visualize: False
load_path: 0  # empty if not pre-loading a model, otherwise a string path to the model.
model_name: "zonesenv_ppo"
run_name: "zonesenv_experiment"
num_eval_trajs: 50
eval_horizon: 1000
baseline: "quant" # use 'ours' for no quantitative semantics.