optimizer = dict(type='SGD', lr=0.2, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
# batch: 512; train: 8142*3=24426; 24426/512 = 48
# runner = dict(type='IterBasedRunner', max_iters=480)
runner = dict(type='EpochBasedRunner', max_epochs=10)
lr_config = dict(policy='CosineAnnealing', min_lr=0)