pretrain:   # zero shot need a pretrained model
seed: 1024
data:
    dataset: ucf101
    modality: RGB
    num_segments: 8
    seg_length: 1
    split: 1
    batch_size: 64
    workers: 16
    num_classes: 101
    image_tmpl: '{:06d}.jpg'
    val_list: 'lists/ucf101_val_frames.txt' #
    label_list: 'lists/ucf_labels_sep.csv'
    index_bias: 1
    input_size: 224
    randaug:
        N: 0 #2
        M: 0  #9
network:
    arch: ViT-B/16  #ViT-B/32 ViT-B/16
    init: True
    drop_out: 0.0
    emb_dropout: 0.0 
    type: clip_ucf
    sim_header: "Transf"  #Transf   meanP   LSTM   Transf_cls Conv_1D
    fix_text: False
    fix_img: False
    describe:
logging:
    print_freq: 10
    eval_freq: 1
DENOISER:
    corpus: "./en.json"
    top_k: 10
    num_beams: 1
    num_iter: 6
    percentage: 100
    temp: "linear" # linear log
    result: "./new_textlist_beam{:}_{:}.list"
    idx_sim: 1
    intra: True
    inter: True
    Q_style: "mean" # max mean