# Experiment setting
exp_name: pong
game_type: atari
env: pong
cpu: 1
seed: 47537 
steps_per_epoch: 1000
epochs: 200
gamma: 0.99 
batch_size: 1000
output_dir: atari_logs
# Algorithm 
trainer: meta
algo: ppo
share_policy: false
br: true
meta: true
meta_rollouts: false
clip_meta_ratio: true
meta_game_ratio_clip: 0.1
nash_method: "CMAES"
clip_ratio: 0.1
vf_clip_param: 0.1 
v_loss_coeff: 0.0001
pi_loss_coeff: 10
pi_lr: 0.002
br_pi_lr: 0.002
importance_clip_ratio: 0.1
train_pi_iters: 5
br_iter: 5
lam: 0.97
max_ep_len: 1000
target_kl: 0.1
save_freq: 1 
meta_rollout_epochs: 1 
tmp_rollout_epochs: 1
gamma: 0.99
kl_coeff: 0.001
entropy_coeff: 0.05
# model specification
network: "cnn"
model: 
    hidden_sizes: [128,1]
    activation: leaky_relu
# if use rnn model
rnn_length: 0 
# model save and load 
save_path: checkpoints
save_frequency: 1
load_model: false 