learning_rate,5e-4,float
entropy_coef,0.01,float
value_function_coef,0.5,float
gamma,0.999,float
lambda,0.95,float
clip_range,0.2,float
max_grad_norm,0.5,float
nsteps,256,int
batch_size,1024,int
nepochs,3,int
max_steps,25_000_000,int
save_interval,100,int
attention_update_interval,200,int
attention_batch_size,1024,int
action_selection_strat,ucb_leader,str
warmup_steps,4096,int
buffer_length,100000,int
num_policies,3,int
embedding_plot_freq,10,int
