


mask_before_softmax: False
use_centralized_V: True


runner: "episode"  # parallele or episode

batch_size_run: 32
buffer_size: 32
batch_size: 32 # Number of episodes to train on

lr: 0.00001
critic_lr: 0.00001

td_lambda: 1.0
#batch_size_run: 32
optim_alpha: 0.99 # RMSProp alpha
optim_eps: 0.00001 # RMSProp epsilon
grad_norm_clip: 0.5 # Reduce magnitude of gradients above this L2 norm
#PPO Params
gae_lambda: 1
use_gae: True
use_popart: False
use_valuenorm: True
use_proper_time_limits: False
opti_eps: 0.00001
weight_decay: 0
use_orthogonal: True
use_policy_active_masks: True
use_naive_recurrent_policy: True
use_recurrent_policy: True
gain: 0.01
activation: "gelu"
use_feature_normalization: True
stacked_frames: False
layer_N: 1
policy_arch: "gru"
num_mini_batch: 5
clip_param: 0.2
entropy_coef: 0.0001
ppo_epoch: 15
data_chunk_length: 10
huber_delta: 10.0
use_max_grad_norm: True
value_loss_coef: 1
use_clipped_value_loss: True
use_huber_loss: True
use_value_active_masks: False
recurrent_N: 1 # number of reccurent layers
la_update: False
last_action_obs: False

critic_hidden_dim: 64
rnn_hidden_dim: 64

# use MAPPO
agent_output_type: "pi_logits"
learner: "mappo_learner"
critic_q_fn: "ppo"
critic_train_mode: "seq"
critic_train_reps: 1
q_nstep: 0  # 0 corresponds to default Q, 1 is r + gamma*Q, etc
ctde: False
name: "mappo"

