train:
  lr: 1.0e-3
  optimizer: 
    name: DSGD
    args: []
    kwargs:
      # beta: 0.9
      # weight_decay: 5.0e-3
      beta: 0.999
      weight_decay: 5.0e-3
  scheduler: 
    name: ""
