resume: 
pretrain: 
seed: 2048
data:
    dataset: somethingv2
    modality: RGB
    num_segments: 16
    seg_length: 1
    batch_size: 12
    test_batch_size: 6
    workers: 4
    num_classes: 174
    image_tmpl: "img_{:05d}.jpg"
    train_root: "<STHV2_ROOT>"
    train_list: "lists/sthv2/train_rgb.txt"
    val_root: "<STHV2_ROOT>"
    val_list: "lists/sthv2/val_rgb.txt" #
    label_list: "lists/sth_labels.csv"
    input_size: 224
    random_shift: True
    num_sample: 2
    rand_aug: True
    rand_erase: False
network:
    arch: ViT-L/14
    init: True
    dropout: 0.0
    emb_dropout: 0.0
    sim_header: None  # Transf   None  
    drop_fc: 0
    n_emb: 448
    side_dim: 448
    drop_layers: []
    corr_layer_index: [7]
    corr_dim: 256
    corr_func: "cosine" # 'cosine', 'dotproduct_softmax'
    corr_window: [5, 9, 9]
    corr_ext_chnls: [96]
    corr_int_chnls: [96, 96, 192]
    corr_num_encoders: 2
    fix_clip: False
    my_fix_clip: True
    num_checkpoints: 24
solver:
    type: cosine
    epochs: 30
    start_epoch: 0
    epoch_offset: 0
    optim: adamw
    lr: 2.e-4
    warmup_lr: 1.e-7
    lr_warmup_step: 4
    final_factor: 0.01
    betas: [0.9, 0.999] # (0.9, 0.999)
    weight_decay: 0.15
    loss_type: CE
    evaluate: False
    clip_ratio: 1
    grad_accumulation_steps: 1
    # mixup: True
    smoothing: 0.1
    layer_decay: 1.0 # 0.7
logging:
    print_freq: 10
    eval_freq: 2
    skip_epoch: []
    acc_per_class: True
    correct_per_sample: True
wandb:
    use_wandb: True
    entity: "<WANDB_ID>"
    key: "<WANDB_API_KEY>"
    project_name: "moss_sthv2"
    exp_name: "default"
    group_name: None