# Experiment setting
exp_name: matrl
game_type: grid
env: Switch4-v0
output_dir: logs
cpu: 1
seed: 0 
steps_per_epoch: 2000
epochs: 500
batch_size: 2000
# Algorithm 
trainer: meta
algo: ppo
share_policy: false
br: true
meta: true
meta_rollouts: false
meta_game_ratio_clip: 0.1
nash_method: "CMAES"
gamma: 0.99
clip_ratio: 0.1
vf_clip_param: 0.1 
v_loss_coeff: 0.001
pi_loss_coeff: 1
pi_lr: 0.002
br_pi_lr: 0.002
importance_clip_ratio: 0.1
train_pi_iters: 5
br_iter: 5
lam: 0.97
max_ep_len: 1000
target_kl: 0.1
meta_rollout_epochs: 1 
tmp_rollout_epochs: 1
kl_coeff: 0.001
entropy_coeff: 0.05
# model specification
network: "mlp"
model: 
    hidden_sizes: [128,128]
    activation: leaky_relu
# if use rnn the seqence length
rnn_length: 0 
# model save and load 
save_path: /tmp/checkers
save_frequency: 10
load_model: false 
restore_path: /tmp/