optimizer: 'SGD'
dataset: 'fmnist/0.05_49'
num_sample: 1
weight_decay:
- 0.0002
- 0.0002
lr: 0.005
epochs: 120
batch_size: 128
batch_log_interval: 50
train_limit: 500_000
lamda: 0.9
experiment: 
  name: 'full_bandit_feedback_KL2K'
  n_exp: 3
  n_trials: 1
  feedback: 'bandit'
  regularizers:
    KL2:
    - 0.02
    - 0.02
  labeled_portion: u49
