_target_: torch.optim.SGD
_convert_: 'all'

lr: 5e-5  # learning rate
momentum: 0.95  # momentum factor
weight_decay: 1e-5  # l2 penalty
dampening: 0.0  # dampening used for momentum
nesterov: true  # enables nesterov momentum if true, otherwise nesterov momentum is not used
