[MODEL_CONFIG]
rmsp_alpha = 0.99
rmsp_epsilon = 1e-5
max_grad_norm = 40
gamma = 0.99
lr_init = 5e-4
lr_decay = constant
entropy_coef = 0.01
value_coef = 0.5
num_lstm = 64
num_fc = 64
batch_size = 20
; same nodes may be updated by ~25 propagations,
; adjust norm accordingly
reward_norm = 2.5
reward_clip = -1
torch_seed = 85

[TRAIN_CONFIG]
total_step = 1e6
test_interval = 1e3
log_interval = 2000


[ENV_CONFIG]

agent = ma2c_cnet
; coop discount is used to discount the neighbors' impact
coop_gamma = -1.0
seed = 0
test_seeds = 2000,2025,2050,2075,2100,2125,2150,2175,2200,2225,2250,2275,2300,2325,2350,2375,2400,2425,2450,2475
sampling_time = 0.05
