project: ICLR_BASELINE
run_name: MNIST - Tiny16
description: FLOPs 12.025M, Params 46.656K + 650 = 47.306K - FLOPs 3.149M, Params 9.504K + 650 = 10.154K
consecutive_training:
  enabled: true
  epochs: 200
  batch_size: 128
model:
  use_factory: true
  architecture:
  - name: first
    type: FleaBlock
    out_channels: 16
    n_partitions:
    - 4
    - 4
    activation_function: ReLU
    alpha: 4
    g_power: 2
    conv_lr: 0.006
    conv_weight_decay: 0.001
    scale_lr: 0.0018
    scale_weight_decay: 0.001
    scheduler:
      type: PolynomialLR
      total_iters: 200
      power: 0.9
  - name: skip_from_input
    type: skip_connection
    skip_from: input
    skip_type: cat
  - name: second
    type: FleaBlock
    out_channels: 16
    n_partitions:
    - 4
    - 4
    activation_function: ReLU
    alpha: 6
    g_power: 2
    conv_lr: 0.001
    conv_weight_decay: 0.001
    scale_lr: 0.0003
    scale_weight_decay: 0.001
    scheduler:
      type: PolynomialLR
      total_iters: 200
      power: 0.9
  - name: first_cat_second
    type: skip_connection
    skip_from: first
    skip_type: cat
  - name: first_pooling_max
    type: MaxPool2d
    kernel_size: 2
    stride: 2
  - name: third
    type: FleaBlock
    out_channels: 16
    n_partitions:
    - 4
    - 4
    activation_function: ReLU
    alpha: 6
    g_power: 2
    conv_lr: 0.001
    conv_weight_decay: 0.001
    scale_lr: 0.0003
    scale_weight_decay: 0.001
    scheduler:
      type: PolynomialLR
      total_iters: 200
      power: 0.9
  - name: second_pooling_avg
    type: AvgPool2d
    kernel_size: 2
    stride: 2
  - name: fourth
    type: FleaBlock
    out_channels: 16
    n_partitions:
    - 2
    - 2
    activation_function: ReLU
    alpha: 8
    g_power: 2
    conv_lr: 0.001
    conv_weight_decay: 0.01
    scale_lr: 0.0003
    scale_weight_decay: 0.001
    scheduler:
      type: PolynomialLR
      total_iters: 200
      power: 0.9
  - name: third_pooling_avg
    type: AvgPool2d
    kernel_size: 2
    stride: 2
  - name: fifth
    type: FleaBlock
    out_channels: 16
    n_partitions:
    - 2
    - 2
    activation_function: ReLU
    alpha: 8
    g_power: 2
    conv_lr: 0.001
    conv_weight_decay: 0.1
    scale_lr: 0.0001
    scale_weight_decay: 0.001
    scheduler:
      type: PolynomialLR
      total_iters: 200
      power: 0.9
training:
  regime: consecutive
  conv_lr: 0.006
  conv_weight_decay: 0.001
  scale_lr: 0.0001
  scale_weight_decay: 0.0001
  wandb: false
data:
  dataset: mnist
wandb_credentials:
  api_key: WANDB_API_KEY
  entity: project_name
