run_name: "baseline_lip_2"
result_path: results/
project_name : "fashionista"

dataset:
  type: "$fashion_mnist_dataset"
  params:
    batch_size: 128
    to_categorical: True


lambda_orth : 0.
#projected :
#    learning_rate : 1.e-5
optim_margin :
    margin : False
    margin_lr : 1.e-4
margin_only : False

network:

  type: "$deel_lip_vgg"
  params:
    shape: [28,28,1]

    layers_per_depth: [4,4,3]
    kernel_size: 3
    regul_type : "trans_bjork_coeff"
    dense_layers_size: []
    filter_size : 96
    by_constraint : False
    nb_classes: 10
    coeffs : 1.
    padding: 'same'
    activation_conv: $GroupSort2
    activation_dense: $GroupSort2
    use_bias: True
    use_stride: True
    poolType: 'l2norm'
    splitLastLayer: True
    niter_bjorck: 7
    verbose : True

loss:
  type: $HKR_multiclass_hinge_auto
  params:
    nb_class: 10
    temperature : 10.
    alpha : 1.
    beta : 0.1
    stop_gradient : False
    min_margin : .5
    init_margin : .5
    verbose : False

optimizer:
  type: $Adam
  params:
    lr: 0.001
    #stabilize: 1
callbacks:
  - type: $TimeStepScheduler
    params:
      nb_epochs : 200
      batch_per_epoch : 468
      warmup_epochs : 5
      rates : [5.e-4,5.e-5,5.e-6]
      epoch_steps : [50,75,175]
      verbose: 1
#- params:
#    batch_per_epoch: 390
#    learning_rate_base : 0.0001
#    warmup_epochs : 5
#    hold_base_rate_epochs : 35
#    nb_epochs: 210
#    verbose: 1
#    warmup_epochs: 5
#  type: $WarmUpCosineDecayScheduler

metrics:

#  - type: $MulticlassHinge
#    params:
#      min_margin: 1.0
  - type: acc

epochs: 200
steps_per_epoch: 60000
batch_size: 128
