[MODEL_CONFIG]
rmsp_alpha = 0.99
rmsp_epsilon = 1e-5
max_grad_norm = 40
gamma = 0.99
lr_init = 5e-4
lr_decay = constant
entropy_coef = 0.01
value_coef = 0.5
num_lstm = 64
num_fc = 64
batch_size = 20
reward_norm = 2.5
reward_clip = -1
torch_seed = 55

[TRAIN_CONFIG]
total_step = 1e6
test_interval = 1e3
log_interval = 2000

[ENV_CONFIG]
; agent is greedy, ia2c, ia2c_fp, ma2c_som, ma2c_ic3, ma2c_nc.
agent = ma2c_pnet
; coop discount is used to discount the neighbors' impact
coop_gamma = 1.0
seed = 0
test_seeds = 2000,2025,2050,2075,2100,2125,2150,2175,2200,2225,2250,2275,2300,2325,2350,2375,2400,2425,2450,2475
sampling_time = 0.05
