# config for prompt tuning:
# vision_depth: 12
# language_depth: 12
# vision_ctx: 2
# language_ctx: 2
optimizer:
    opt: sgd
    lr: 0.0001
    weight_decay: 0.0001
    momentum: 0.9
schedular:
    sched: cosine
    lr: 0.0001
    epochs: 5
    min_lr: 1e-6
    decay_rate: 1
    warmup_lr: 1e-5
    # warmup_epochs: 0.3
    warmup_steps: 500
    cooldown_epochs: 0
    freeze_backbone_epochs: 0

INPUT:
    SIZE: [224, 224]

# probablity distribution encoder
pde:
    negative_scale: 0.005
    shift: 4
    mul_lr: 10

# attack config
attack:
    MMA:
        # step 1: prepare texts for text supervision in image attack
        is_use_gt_caps: False
        txt_sup_k: 5
        alpha_sr: 0.1
        alpha_ri: 0.1
        alpha_rs: 0.1
        p_rd: 0.1
        # step 2: image attack
        scale_ver: 0
        alpha_unsup: 0
        alpha_sup: 1
        # step 3: text attack
        is_txt_aug: True
        txt_aug: sr
