# config for prompt tuning: 
# vision_depth: 12
# language_depth: 12
# vision_ctx: 2
# language_ctx: 2
optimizer: 
    # opt: sgd
    opt: adamp
    betas: [0.9, 0.999]
    eps: 1e-8
    lr: 0.0005
    weight_decay: 0.0001
    momentum: 0.9
schedular:
    # sched: cosine
    sched: step
    decay_epochs: 5
    decay_rate: 0.1
    lr: 0.0005
    epochs: 5
    warmup_lr: 0.000001
    warmup_epochs: 0.2
    # min_lr: 0.000001
    # decay_rate: 1
    # warmup_lr: 0.000001
    # warmup_epochs: 0.3
    # cooldown_epochs: 0

INPUT:
    SIZE: [224, 224]

# probablity distribution encoder
pde:
    negative_scale: 0.005
    shift: 4
    mul_lr: 1

pcmepp:
    augment: 
        img_size_augment: 1.0
    model:
        is_probabilistic_model: false
        n_unc_layers: 0
        backbone_source: clip_ViT-B/16
        precomp_enc_type: backbone
        embed_size: 1024
        img_dim: 768
        no_imgnorm: false
        no_txtnorm: false
        # no_gpo: false
    criterion:
        name: info_nce
        init_tau: 1