# base
name: comoga

# for RL
discount_factor: 0.99
len_replay_buffer: 1000000
batch_size: 256
n_steps: 10
max_tr_size: 0.05
max_grad_norm: 1.0
critic_lr: 3e-4
actor_lr: 3e-4
soft_update_ratio: 0.995
n_actor_update_iters: 10
n_critic_update_iters: 10
norm_obs: true
norm_reward: false

# for constraint
con_thresholds: []
con_zetas: []

# entropy
con_entropy: true
con_ent_thresh: -1.0
ent_alpha_lr: 1e-3

# for logging
logging:
    task_indep: [fps, eplen, entropy, ent_alpha, kl, actor_loss, reward_critic_loss]
    reward_dep: [objectives, reward_sum]

# for model
model:
    actor:
        mlp:
            shape: [512, 512]
            activation: LeakyReLU
        use_action_bound: true
        log_std_init: 0.0
        log_std_fix: false
    reward_critic:
        mlp:
            shape: [512, 512]
            activation: LeakyReLU
        clip_range: [-np.inf, np.inf]
