repre_layer_sizes : [200,200,200]
pred_layer_sizes : [100,100,100]
alpha: 1.0
bn : True
optimizer : adam
learning_rate : 0.001
weight_decay : 0.0001
pretrain_ratio: 0.8
policy_K_for_train: 7