# config for prompt tuning: 
# vision_depth: 12
# language_depth: 12
# vision_ctx: 2
# language_ctx: 2
optimizer: 
    opt: sgd
    # opt: adamp
    # betas: [0.9, 0.999]
    # eps: 1e-8
    lr: 0.00005
    weight_decay: 0.0001
    momentum: 0.9
schedular:
    sched: cosine
    lr: 0.00005
    epochs: 5
    min_lr: 0.000001
    decay_rate: 1
    warmup_lr: 0.000001
    warmup_epochs: 0.3
    cooldown_epochs: 0

INPUT:
    SIZE: [224, 224]

# probablity distribution encoder
pde:
    negative_scale: 0.005
    shift: 4
    mul_lr: 1

pcmepp:
    augment: 
        img_size_augment: 1.0
    model:
        is_probabilistic_model: false
        n_unc_layers: 0
        backbone_source: clip_ViT-B/16
        precomp_enc_type: backbone
        embed_size: 512
        img_dim: 768
        no_imgnorm: false
        no_txtnorm: false
        # no_gpo: true
    criterion:
        name: info_nce
        init_tau: 1