env:
  _target_: 'envs.safetygym_point_button.SafetyGymWrapper'
  render_mode: null # if set to 'human' it will render live
classes:
  continuous: 'envs.safetygym_point_button.SafetyGymWrapper'
logger:
  dir_name: safety_gym
ltl:
  autobuild: False
  formula: "G(F(button1 & X(F(button0)))) & G~gremlin"
  oa_type: ldba
  rabinizer: ./rabinizer4/bin/ltl2ldba
argus:
  formula: "G (F (button1 >= 0.0) && F (button0 >= 0.0)) && G (gremlin <= 0.0)"
  bhnr_formula: "gremlin <= 0.0 && F (button1 >= 0.0) && F (button0 >= 0.0)"
  stl_window: 60  # BHNR horizon window size
ppo:
  K_epochs: 5               # update policy for K epochs in one PPO update
  batch_size: 128               # update policy for K epochs in one PPO update
  off_policy: True
  eps_clip: 0.4          # clip parameter for PPO
  lr_actor: 0.002       # learning rate for actor network
  lr_critic: 0.0125       # learning rate for critic network
  update_timestep: 1 # T * 4
  update_freq__n_episodes: 3
  n_traj: 5000
  var_denominator: 1
  alpha: 0.2 # importance weight to entropy term
  T: 750
testing:
  testing_freq__n_episodes: 25
  num_rollouts: 5
gamma: .99
n_grad_updates: 1
delta: .1
n_seeds: 3
init_seed: 98
replay_buffer_size: 2000
lambda: 300
lambda_qs: 100.0
mdp_multiplier: 1.0
visualize: False
load_path: ""  # empty if not pre-loading a model, otherwise a string path to the model.
model_name: "safetygympol"
run_name: "buttonsenv_experiment"
num_eval_trajs: 15
eval_horizon: 1000
baseline: "quant" # use 'ours' for no quantitative semantics.