alg: "ippo"

alg_args:
    "policy_lrate": 1.0e-4
    "value_lrate": 1.0e-4
    "value_update_epochs": 10
    "policy_update_epochs": 10
    "lambda_": 0.95
    "eps_clip": 0.6
    "value_loss_coef": 2.
    "reward_normalisation": True
    "normalize_advantages": True
    "gaussian_policy": False
    "action_enforcebound": True
    "behaviour_update_freq": 240 # transition update: steps / episodic update: episodes
    "target_update_freq": 480 # transition update: steps / episodic update: episodes