resume:
pretrain: exps/k400/ViT-L/14-336px/16f/k400-vit-l-14-336-f16.pt
seed: 1024
data:
    dataset: charades
    modality: RGB
    num_segments: 16
    seg_length: 1
    batch_size: 8
    workers: 4
    num_classes: 157
    image_tmpl: '{}-{:06d}.jpg'
    fps: 24
    train_root: /bpfs/v2_mnt/VIS/wuwenhao/Charades/Charades_v1_rgb
    train_list: lists/charades/Charades_train_split_label.csv
    val_root: /bpfs/v2_mnt/VIS/wuwenhao/Charades/Charades_v1_rgb
    val_list: lists/charades/Charades_v1_test.csv
    label_list: 'lists/charades/Charades_v1_classes.txt'
    input_size: 336
    random_shift: True
    output_path: exps
network:
    arch: ViT-L/14-336px  #ViT-B/32 ViT-B/16
    init: True
    tm: False
    drop_out: 0.0 
    emb_dropout: 0.0 
    sim_header: Transf  # Transf   None  
    interaction: VCS  # DP VCS
    joint_st: False
    drop: 0    
    fix_text: True
    fix_video: False   
solver:
    type: cosine
    epochs: 20
    start_epoch: 0
    optim: adamw
    lr: 5.e-5
    lr_warmup_step: 5
    weight_decay: 0.2
    loss_type: NCE
    evaluate: False
    clip_ratio: 0.1
    grad_accumulation_steps: 1
logging:
    print_freq: 10
    eval_freq: 1