
mask_before_softmax: False
use_centralized_V: True
ppo_epoch: 15
max_grad_norm: 0.5

runner: "episode"  # parallele or episode

batch_size_run: 32
buffer_size: 32
batch_size: 32 # Number of episodes to train on
layout_name: ""
entropy_method: 'kl'
la_update: False
last_action_obs: False


lr: 0.00001
critic_lr: 0.00001
td_lambda: 1.0

# use MAPPO
agent_output_type: "pi_logits"
learner: "mappo_learner"
critic_q_fn: "ppo"
critic_train_mode: "seq"
critic_train_reps: 1
q_nstep: 0  # 0 corresponds to default Q, 1 is r + gamma*Q, etc
name: "ippo"
gae_lambda: 0.95
use_gae: True
use_popart: True
use_valuenorm: False
use_proper_time_limits: False
opti_eps: 0.00001
weight_decay: 0
use_orthogonal: True
use_policy_active_masks: True
use_naive_recurrent_policy: True
use_recurrent_policy: True
gain: 0.01
activation: "relu"
use_feature_normalization: True
stacked_frames: False
layer_N: 1
policy_arch: "gru"
num_mini_batch: 5
clip_param: 0.2
entropy_coef: 0.0001
data_chunk_length: 10
huber_delta: 10.0
use_max_grad_norm: True
value_loss_coef: 1
use_clipped_value_loss: True
use_huber_loss: True
use_value_active_masks: False
recurrent_N: 1 # number of reccurent layers

