buffer_length,100000,int
warmup_steps,1024,int
batch_size,64,int
saving_freq,100000,int
reward_logging_freq,50000,int
embedding_plot_freq,500000,int
update_interval,4,int
ensemble_target_update_interval,10,int
target_update_interval,1000,int
success_rate_save_freq,20,int
eval_freq,10000,int
success_threshold_for_well_trained,0.9,float
goal_epsilon_tol,20,int
ladder_epsilon_tol,6,int
success_queue_size,10,int
training_max_step_limit,15000,int
eval_max_step_limit,300,int
termination_num_agreeing_votes,4,int
