# Experiment setting
exp_name: matrl-nometa
game_type: mojuco
env: Ant-v2
output_dir: logs
cpu: 1
seed: 83747 
steps_per_epoch: 4000
epochs: 1000
batch_size: 4000
# Algorithm
trainer: meta
algo: ppo
share_policy: false
br: true
meta: false
meta_rollouts: false
nash_method: "CMAES"
gamma: 0.99
clip_ratio: 0.1
vf_clip_param: 0.1 
v_loss_coeff: 0.001
pi_loss_coeff: 100
pi_lr: 0.002
br_pi_lr: 0.002
importance_clip_ratio: 0.1
train_pi_iters: 5
br_iter: 5
lam: 0.97
max_ep_len: 1000
target_kl: 0.1
save_freq: 1 
meta_rollout_epochs: 1 
tmp_rollout_epochs: 1
gamma: 0.99
kl_coeff: 0.001
entropy_coeff: 0.05
# model specification
network: "mlp"
model: 
    hidden_sizes: [128,128]
    activation: leaky_relu
# if use rnn model
rnn_length: 0 
# model save and load 
save_path: /tmp/checkers
save_frequency: 10
load_model: false 
restore_path: /tmp/