# @package _global_
optimizer:
    name: sgd
    momentum: 0.9
    lr: 0.03 # learning rate
    weight_decay: 0. # weight decay
    nesterov: true
    mini_batch_size: 256
    # SimCLR & SWaV parameter since SimCLR uses TPUs https://github.com/facebookresearch/swav/blob/master/eval_linear.py#L64-L67
