{
  "TextEncoders": {
    "bert": {
      "config": "configs/config_bert.json",
      "d_model": 768,
      "fusion_layer": 9,
      "name": "bert_base",
      "pretrained": "bert-base-uncased"
    },
    "bert_large": {
      "config": "configs/config_bert_large.json",
      "d_model": 1024,
      "fusion_layer": 19,
      "name": "bert_large",
      "pretrained": "bert-large-uncased"
    },
    "med_bert": {
      "config": "configs/med_config.json",
      "d_model": 768,
      "name": "med_bert_base",
      "pretrained": "bert-base-uncased"
    },
    "med_bert_large": {
      "config": "configs/med_large_config.json",
      "d_model": 768,
      "name": "med_bert_large",
      "pretrained": "bert-base-uncased"
    }
  },
  "VisionEncoders": {},
  "architectures": [
    "InternVideo2_Stage2"
  ],
  "auto_map": {
    "AutoConfig": "modeling_internvideo2.InternVideo2_Stage2_Config",
    "AutoModel": "modeling_internvideo2.InternVideo2_Stage2"
  },
  "auto_resume": true,
  "available_corpus": {
    "anet_ret_train": {
      "anno_path": "your_path",
      "data_root": "",
      "is_paragraph_retrieval": true,
      "max_txt_l": 150,
      "media_type": "video"
    },
    "anet_ret_val": {
      "anno_path": "your_path",
      "data_root": "",
      "is_paragraph_retrieval": true,
      "max_txt_l": 150,
      "media_type": "video"
    },
    "audiocaps_ret_test": {
      "anno_path": "your_path",
      "data_root": "",
      "media_type": "audio"
    },
    "audiocaps_ret_train": {
      "anno_path": "your_path",
      "data_root": "",
      "media_type": "audio"
    },
    "cc12m": {
      "anno_path": "your_path",
      "data_root": "",
      "media_type": "image"
    },
    "cc3m": {
      "anno_path": "your_path",
      "data_root": "",
      "media_type": "image"
    },
    "cc3m_debug": {
      "anno_path": "your_path",
      "data_root": "",
      "media_type": "image"
    },
    "charades_mc_test": {
      "anno_path": "your_path",
      "data_root": "",
      "media_type": "video"
    },
    "clothov1_ret_test": {
      "anno_path": "your_path",
      "data_root": "",
      "media_type": "audio"
    },
    "clothov1_ret_train": {
      "anno_path": "your_path",
      "data_root": "",
      "media_type": "audio"
    },
    "clothov2_ret_test": {
      "anno_path": "your_path",
      "data_root": "",
      "media_type": "audio"
    },
    "clothov2_ret_train": {
      "anno_path": "your_path",
      "data_root": "",
      "media_type": "audio"
    },
    "coco": {
      "anno_path": "your_path",
      "data_root": "",
      "jump_filter": true,
      "media_type": "image"
    },
    "data_25m": [
      {
        "anno_path": "your_path",
        "data_root": "",
        "media_type": "video"
      },
      {
        "anno_path": "your_path",
        "data_root": "",
        "media_type": "image"
      },
      {
        "anno_path": "your_path",
        "data_root": "",
        "jump_filter": true,
        "media_type": "image"
      },
      {
        "anno_path": "your_path",
        "data_root": "",
        "jump_filter": true,
        "media_type": "image"
      },
      {
        "anno_path": "your_path",
        "data_root": "",
        "media_type": "image"
      },
      {
        "anno_path": "your_path",
        "data_root": "",
        "media_type": "image"
      }
    ],
    "debug": [
      {
        "anno_path": "your_path",
        "data_root": "",
        "media_type": "image"
      },
      {
        "anno_path": "your_path",
        "data_root": "",
        "media_type": "video"
      }
    ],
    "didemo_ret_test": {
      "anno_path": "your_path",
      "data_root": "",
      "is_paragraph_retrieval": true,
      "max_txt_l": 64,
      "media_type": "video",
      "trimmed30": true
    },
    "didemo_ret_train": {
      "anno_path": "your_path",
      "data_root": "",
      "is_paragraph_retrieval": true,
      "max_txt_l": 64,
      "media_type": "video",
      "trimmed30": true
    },
    "didemo_ret_val": {
      "anno_path": "your_path",
      "data_root": "",
      "is_paragraph_retrieval": true,
      "max_txt_l": 64,
      "media_type": "video",
      "trimmed30": true
    },
    "hmdb51_act_val": {
      "anno_path": "your_path",
      "data_root": "",
      "is_act_rec": true,
      "media_type": "video"
    },
    "internvid_v1": {
      "anno_path": "your_path",
      "data_root": "",
      "jump_filter": true,
      "media_type": "video"
    },
    "internvid_v2_avs_private": {
      "anno_path": "your_path",
      "caption_augmentation": {
        "caption_sample_type": "avs_all"
      },
      "data_root": "",
      "jump_filter": true,
      "media_type": "audio_video",
      "read_audio_from_video": true,
      "read_clip_from_video": false,
      "zero_audio_padding_for_video": true
    },
    "k400_act_val": {
      "anno_path": "your_path",
      "data_root": "",
      "is_act_rec": true
    },
    "k600_act_val": {
      "anno_path": "your_path",
      "data_root": "",
      "is_act_rec": true,
      "media_type": "video"
    },
    "k700_act_val": {
      "anno_path": "your_path",
      "data_root": "",
      "is_act_rec": true,
      "media_type": "video"
    },
    "laion_2b": {
      "anno_path": "your_path",
      "data_root": "",
      "jump_filter": true,
      "media_type": "image"
    },
    "laion_coco": {
      "anno_path": "your_path",
      "data_root": "",
      "jump_filter": true,
      "media_type": "image"
    },
    "laion_pop": {
      "anno_path": "your_path",
      "data_root": "",
      "jump_filter": true,
      "media_type": "image"
    },
    "lsmdc_ret_test_1000": {
      "anno_path": "your_path",
      "data_root": "",
      "media_type": "video"
    },
    "lsmdc_ret_train": {
      "anno_path": "your_path",
      "data_root": "",
      "max_txt_l": 96,
      "media_type": "video"
    },
    "lsmdc_ret_val": {
      "anno_path": "your_path",
      "data_root": "",
      "max_txt_l": 96,
      "media_type": "video"
    },
    "mit_act_val": {
      "anno_path": "your_path",
      "data_root": "",
      "is_act_rec": true,
      "media_type": "video"
    },
    "msrvtt_1k_test": {
      "anno_path": "your_path",
      "data_root": "",
      "media_type": "video"
    },
    "msrvtt_ret_test1k": {
      "anno_path": "your_path",
      "data_root": "",
      "media_type": "video"
    },
    "msrvtt_ret_train9k": {
      "anno_path": "your_path",
      "data_root": "",
      "media_type": "video"
    },
    "msvd_ret_test": {
      "anno_path": "your_path",
      "data_root": "",
      "max_txt_l": 64,
      "media_type": "video"
    },
    "msvd_ret_train": {
      "anno_path": "your_path",
      "data_root": "",
      "has_multi_txt_gt": true,
      "max_txt_l": 64,
      "media_type": "video"
    },
    "msvd_ret_val": {
      "anno_path": "your_path",
      "data_root": "",
      "max_txt_l": 64,
      "media_type": "video"
    },
    "pretrain_example_data_1B": [
      {
        "anno_path": "your_path",
        "data_root": "",
        "media_type": "image"
      },
      {
        "anno_path": "your_path",
        "data_root": "",
        "media_type": "video"
      }
    ],
    "pretrain_example_data_6B": [
      {
        "anno_path": "your_path",
        "data_root": "",
        "media_type": "image"
      },
      {
        "anno_path": "your_path",
        "data_root": "",
        "media_type": "video"
      },
      {
        "anno_path": "your_path",
        "caption_augmentation": {
          "caption_sample_type": "avs_all"
        },
        "data_root": "",
        "jump_filter": true,
        "media_type": "audio_video",
        "read_audio_from_video": true,
        "read_clip_from_video": false,
        "zero_audio_padding_for_video": true
      }
    ],
    "sbu": {
      "anno_path": "your_path",
      "data_root": "",
      "media_type": "image"
    },
    "ssv2_mc_val": {
      "anno_path": "your_path",
      "data_root": "",
      "media_type": "video"
    },
    "ucf101_act_val": {
      "anno_path": "your_path",
      "data_root": "",
      "is_act_rec": true,
      "media_type": "video"
    },
    "vatex_ch_ret_val": {
      "anno_path": "your_path",
      "data_root": "",
      "media_type": "video"
    },
    "vatex_en_ret_train": {
      "anno_path": "your_path",
      "data_root": "",
      "has_multi_txt_gt": true,
      "media_type": "video"
    },
    "vatex_en_ret_val": {
      "anno_path": "your_path",
      "data_root": "",
      "media_type": "video"
    },
    "vg": {
      "anno_path": "your_path",
      "data_root": "",
      "jump_filter": true,
      "media_type": "image"
    },
    "wavcaps_400k": {
      "anno_path": "your_path",
      "data_root": "",
      "media_type": "audio"
    },
    "webvid": {
      "anno_path": "your_path",
      "data_root": "",
      "media_type": "video"
    },
    "webvid_10m": {
      "anno_path": "your_path",
      "data_root": "",
      "media_type": "video"
    },
    "webvid_debug": {
      "anno_path": "your_path",
      "data_root": "",
      "media_type": "video"
    },
    "webvid_fuse_10m": {
      "anno_path": "your_path",
      "data_root": "",
      "jump_filter": true,
      "media_type": "video"
    }
  },
  "batch_size": 8,
  "batch_size_test": 4,
  "compile_model": false,
  "debug": false,
  "deep_fusion": false,
  "deepspeed": {
    "enable": true,
    "stage": 1
  },
  "device": "cuda",
  "dist_url": "env://",
  "evaluate": true,
  "evaluation": {
    "eval_frame_ensemble": "concat",
    "eval_offload": true,
    "eval_x_only": false,
    "k_test": 128
  },
  "gradient_checkpointing": true,
  "inputs": {
    "batch_size": {
      "image": 8,
      "video": 8
    },
    "batch_size_test": {
      "image": 4,
      "video": 4
    },
    "image_res": 224,
    "max_txt_l": {
      "image": 40,
      "video": 40
    },
    "video_input": {
      "num_frames": 4,
      "num_frames_test": 4,
      "random_aug": false,
      "sample_type": "rand",
      "sample_type_test": "middle"
    }
  },
  "jump_evaluate": false,
  "log_freq": 100,
  "max_txt_l": 40,
  "mode": "pt",
  "model": {
    "embed_dim": 512,
    "find_unused_parameters": false,
    "model_cls": "InternVideo2_Stage2",
    "multimodal": {
      "enable": true
    },
    "temp": 0.07,
    "text_encoder": {
      "config": "configs/config_bert_large.json",
      "d_model": 1024,
      "fusion_layer": 19,
      "name": "bert_large",
      "pretrained": "bert-large-uncased"
    },
    "vision_encoder": {
      "checkpoint_num": 40,
      "clip_embed_dim": 768,
      "clip_input_resolution": 224,
      "clip_norm_type": "l2",
      "clip_return_layer": 6,
      "clip_student_return_interval": 1,
      "clip_teacher": null,
      "clip_teacher_embed_dim": 3200,
      "clip_teacher_final_dim": 768,
      "clip_teacher_return_interval": 1,
      "d_model": 1408,
      "image_mask_ratio": 0.5,
      "image_mask_type": "random",
      "img_size": 224,
      "keep_temporal": false,
      "name": "pretrain_internvideo2_1b_patch14_224",
      "num_frames": 4,
      "only_mask": true,
      "patch_size": 14,
      "pretrained": "",
      "sep_image_video_pos_embed": true,
      "tubelet_size": 1,
      "use_checkpoint": true,
      "use_flash_attn": false,
      "use_fused_mlp": false,
      "use_fused_rmsnorm": false,
      "video_mask_ratio": 0.8,
      "video_mask_type": "random"
    }
  },
  "num_frames": 4,
  "num_frames_test": 4,
  "num_workers": 6,
  "origin_num_frames": 4,
  "output_dir": null,
  "resume": false,
  "save_latest": false,
  "seed": 42,
  "size_t": 224,
  "text_enc": "bert_large",
  "torch_dtype": "float32",
  "transformers_version": "4.37.2",
  "use_bf16": false,
  "use_flash_sdp": false,
  "use_half_precision": false,
  "use_mem_efficient_sdp": false
}
