resume:
pretrain:
seed: 2048
data:
    dataset: somethingv2
    modality: RGB
    num_segments: 16
    seg_length: 1
    batch_size: 16
    test_batch_size: 16
    workers: 4
    num_classes: 174
    image_tmpl: "img_{:05d}.jpg"
    train_root: "<STHV2_ROOT>"
    train_list: "lists/sthv2/train_rgb.txt"
    val_root: "<STHV2_ROOT>"
    val_list: "lists/sthv2/val_rgb.txt" #
    label_list: "lists/sth_labels.csv"
    input_size: 224
    random_shift: True
    num_sample: 2
    rand_aug: True
    rand_erase: False
network:
    arch: ViT-B/16
    init: True
    dropout: 0.0
    emb_dropout: 0.0
    sim_header: None # Transf   None
    drop_fc: 0
    n_emb: 320 # 320 + 512
    side_dim: 320
    drop_layers: [0, 2, 4, 6, 8, 10]
    corr_layer_index: [3]
    corr_dim: 128
    corr_func: "cosine" # 'cosine', 'dotproduct_softmax'
    corr_window: [5, 9, 9]
    corr_ext_chnls: [64]
    corr_int_chnls: [64, 64, 128]
    corr_num_encoders: 2
    fix_clip: False
    my_fix_clip: True
    num_checkpoints: 0
solver:
    type: cosine
    epochs: 40
    start_epoch: 0
    epoch_offset: 0
    optim: adamw
    lr: 1.e-3
    warmup_lr: 1.e-6
    lr_warmup_step: 4
    final_factor: 0.01
    weight_decay: 0.15
    betas: [0.9, 0.999]
    loss_type: CE
    evaluate: False
    clip_ratio: 1
    grad_accumulation_steps: 1
    # mixup: True
    smoothing: 0.1
    layer_decay: 1.0 # 0.7
logging:
    print_freq: 10
    eval_freq: 2
    skip_epoch: []
    acc_per_class: True
    correct_per_sample: True
wandb:
    use_wandb: True
    entity: "<WANDB_ID>"
    key: "<WANDB_API_KEY>"
    project_name: "moss_sthv2"
    exp_name: "default"
    group_name: None
