[MODEL_CONFIG]
algo = IA2C_LToS
rmsp_alpha = 0.99
rmsp_epsilon = 1e-5
max_grad_norm = 40
gamma = 0.99
lr_init = 5e-4
lr_decay = constant
entropy_coef = 0.01
value_coef = 0.5
num_lstm = 64
# num_fc and shared_dim should be the same
num_fc = 64
shared_dim = 64
# num_fc and shared_dim should be the same
batch_size = 20
reward_norm = 100.0
reward_clip = -1
use_lstm = True

# LToS specific parameters
update_frequency = 1
tau = 0.01
epsilon = 0.1
epsilon_decay = 0.995
epsilon_min = 0.01
; buffer_size = 100000
gradient_steps = 1

[TRAIN_CONFIG]
load_model = False      
total_step = 1e6
test_interval = 2e6
log_interval = 2e3
save_interval = 5e4

[ENV_CONFIG]
reward_scale = 2
net_path = ./envs/NewYork33/newyork33.net.xml
sim_path = ./envs/NewYork33/newyork33.sumocfg
; clip_wave = -1
; clip_wait = -1
; control_interval_sec = 5
; agent is greedy, ia2c, ia2c_fp, ma2c_som, ma2c_ic3, ma2c_nc.
agent = ia2c_ltos
; coop discount is used to discount the neighbors' impact
coop_gamma = -1.0
; data_path = ./envs/real_net_data/
; episode_length_sec = 3600
; for realnet, the normailization is done per agent
; norm_wave = 1.0
; norm_wait = -1
; coef_wait = 0
; flow_rate = 325
; ; objective is chosen from queue, wait, hybrid
; objective = queue
; scenario = atsc_real_net
env_name = Large_city
seed = 12
test_seeds = 10000
sampling_time = 0.05
; yellow_interval_sec = 2
