optimizer: 'SGD'
dataset: 'wiki/base'
num_sample: 1
weight_decay: 0.0005
lr: 0.005
epochs: 120
batch_size: 256
batch_log_interval: 50
train_limit: 500_000
lamda: 0.9
experiment: 
  name: 'supervised'
  n_exp: 1
  n_trials: 1
  feedback: 'supervised'
  regularizers: null
  labeled_portion: u0
