seed: 1024
data:
    dataset: k600
    modality: video
    num_segments: 8
    seg_length: 1
    batch_size: 24
    workers: 8
    num_classes: 160
    image_tmpl: 'img_{:05d}.jpg'
    val_root: 
    val_list: lists/k600_test/k160_test_split2.txt
    label_list: lists/k600_test/k160_labels_split2.csv
    index_bias: 1
    input_size: 224
network:
    arch: ViT-B/32 #ViT-B/32 ViT-B/16
    init: True
    drop_out: 0.0 
    emb_dropout: 0.0
    sim_header: Transf 
    interaction: DP
    temporal_layer: 3  
logging:
    print_freq: 10
    eval_freq: 1