data:
  dataset: 'classification_gradient'    # MNIST, CIFAR10, SVHN, classification_gradient
  distribution: "gaussian"   
  label_function: "parity"              # parity, all, interval
  num_workers: 4                        # number of data loading workers
  batch_size: 1000
  data_size: 50000
  dim_full: 500              
  dim_input: 500
  feature_dim: 100
  effective_dim: 5 
  noise: null
  structure: True 
model:
  type: "adp"                           # adp, one ,two, fix, ntk
  hidden_dim: 600 
  label_num: 1
train:
  net: null                             # path to net (to continue training)
  niter: 600                            # number of epochs to train for
  lr: 0.02                              # learning rate 
  optimizer: "adam"                     # adam, sgd 
  weight_decay: 0.0      
  decay: "l2"                           # l1, l2, null
  loss: "hinge"                         # hinge, cross