batch_size: 64
epochs: 100
opt: 'adamw'
opt_eps: 1e-8
opt_betas:
  - 0.9
  - 0.999
weight_decay: 0.0
lr: 1e-4
min_lr: 1e-6 #  which will be auto-scaled: min_lr * batch size * nb_gpus / lr_base_size
sched: 'cosine'
warmup_epochs: 10
warmup_lr: 1e-7  # which will be auto-scaled: warmup_lr * batch size * nb_gpus / lr_base_size
cooldown_epochs: 0
amp: False
# clip_grad: 5.0
no_aug: true
smoothing: 0.0
no_prefetcher: True
drop_path: 0.1
gift_rank: 16
gift_dtype: "float32"
gift_in_projection_bias: false
gift_out_projection_bias: false
gift_target_modules:
  - attn:proj
gift_block_block_type: simple_block
gift_block_num_blocks: 1
gift_block_num_heads: 1
gift_block_mlp_ratio: 2.0
gift_block_drop_path: 0.0
gift_block_norm_layer: "l2"
gift_block_num_clusters: 64
gift_block_cluster_activation: "sigmoid"
