# https://arxiv.org/pdf/2205.01580.pdf
[general]
logDir = ../logs
logSubDir = test
truncate = False

contin = False
epochs = 90

rndSeed = False
deterministic = False

optimizer = AdamW
momentum = 0.9 
weightDecay = 0.1
nesterov = False

saveCheckpoint = True
keepLastCheckpoint = False
checkpointsList = 
saveCheckpointInterval = 0

verbose = False
logEach = 1
metrics = learningRate loss accuracy

dataThreads = 6 
dataDir = ~/.datasets
batchSize = 1024
dataset = ImageNet
imageSize = 224
flip = True 
crop = False
cut = False
cutoutProp = 0.5
randAugment = False 
randAugment_magnitude = 10
mixup = False
mixupProp = 0.2

label_smoothing = 0.0

grad_clip_norm = 1.0
normalize = std


[models]
model = ViT
dropout = 0.0
BN = True
depth = 16
widthFactor = 4
ViTPatchsize = 32
ViTSize = S

[learning rate]
learningRate = 0.001
lrScheduler = cosWarmUp

LRScheduler_StartRamp_epochs = 8

LRScheduler_step_steps = 3
LRScheduler_step_gamma = 0.2

LRScheduler_exp_maxDecay = 0.01
LRScheduler_cos_periods = 0.5

LRScheduler_WRN_T0 = 50
LRScheduler_WRN_Tmult = 1.2

[SAM]
rho = 10
rhoScheduler = startJump
rhoScheduler_StartRamp_epochs = 8
rhoScheduler_StartJump_epochs = 8

[ESAM]
ESAMbeta = 0.5
ESAMgamma = 0.5

[lookSAM]
lookSAMk = 10
lookSAMalpha = 0.7