# config for prompt tuning: 
# vision_depth: 12
# language_depth: 12
# vision_ctx: 2
# language_ctx: 2
optimizer: 
    opt: sgd
    # opt: adamp
    # betas: [0.9, 0.999]
    # eps: 1e-8
    lr: 0.001
    weight_decay: 0.0001
    momentum: 0.9
schedular:
    sched: cosine
    lr: 0.001
    epochs: 10
    min_lr: 0.000001
    decay_rate: 1
    warmup_lr: 0.000001
    warmup_epochs: 0.3
    cooldown_epochs: 0
    freeze_backbone_epochs: 2

INPUT:
    SIZE: [224, 224]

# probablity distribution encoder
pde:
    negative_scale: 0.005
    shift: 4
    mul_lr: 1

pcmepp:
    augment: 
        img_size_augment: 1.0
    model:
        is_probabilistic_model: true
        n_unc_layers: 1
        backbone_source: clip_ViT-B/16
        precomp_enc_type: backbone
        embed_size: 1024
        img_dim: 768
        no_imgnorm: false
        no_txtnorm: false
        sigma_ln_init: null
    criterion:
        name: pcmepp
        init_negative_scale: 5
        init_shift: 5
        prob_distance: csd
        vib_beta: 0.0001
        smoothness_alpha: 0.1