train_file:
  [
    "/data/dataset/dataset_json/data_rewrite/flickr30k_train_caps=1.json",
    "/data/dataset/dataset_json/OFA-large-caption/flickr30k_train_0.json",
    "/data/dataset/dataset_json/OFA-large-caption/flickr30k_train_1.json",
    "/data/dataset/dataset_json/OFA-large-caption/flickr30k_train_2.json",
    "/data/dataset/dataset_json/OFA-large-caption/flickr30k_train_3.json",
  ]
val_file: "/data/dataset/dataset_json/data/flickr30k_val.json"
test_file: "./data_annotation/flickr30k_test.json"
image_root: "/data/dataset/Flickr30k/"

bert_config: "./configs/config_bert.json"

dataset_name: "flickr30k"

image_res: 224 # original: 384
batch_size_train: 128
batch_size_test: 16
# queue_size: 65536
# momentum: 0.995
# vision_width: 768
# embed_dim: 256
# temp: 0.07
# k_test: 128

# attack config
epsilon: 2
num_iters: 10

# alpha: 0.4
distill: False
# warm_up: True

# optimizer: {opt: adamW, lr: 1e-5, weight_decay: 0.02}
# schedular: {sched: cosine, lr: 1e-5, epochs: 10, min_lr: 1e-6, decay_rate: 1, warmup_lr: 1e-5, warmup_epochs: 1, cooldown_epochs: 0}

