{
  "model": {
    "model_cls": "VideoChat2_it_phi",
    "vit_blip_model_path": "models/weights/videochat2/umt_l16_qformer.pth",
    "phi_model_path": "models/weights/Phi-3-mini-128k-instruct",
    "videochat2_model_path": "models/weights/VideoChat2_stage3_Phi3/videochat2_phi3_stage3.pth",
    "freeze_vit": false,
    "freeze_qformer": false,
    "max_txt_len": 512,
    "low_resource": false,
    "vision_encoder": {
      "name": "vit_l14",
      "img_size": 224,
      "patch_size": 16,
      "d_model": 1024,
      "encoder_embed_dim": 1024,
      "encoder_depth": 24,
      "encoder_num_heads": 16,
      "drop_path_rate": 0.0,
      "num_frames": 8,
      "tubelet_size": 1,
      "use_checkpoint": true,
      "checkpoint_num": 18,
      "pretrained": "",
      "return_index": -2,
      "vit_add_ln": true,
      "ckpt_num_frame": 4
    },
    "num_query_token": 32,
    "qformer_hidden_dropout_prob": 0.1,
    "qformer_attention_probs_dropout_prob": 0.1,
    "qformer_drop_path_rate": 0.2,
    "extra_num_query_token": 64,
    "qformer_text_input": true,
    "system": "",
    "start_token": "<Video>",
    "end_token": "</Video>",
    "add_second_msg": true,
    "img_start_token": "<Image>",
    "img_end_token": "</Image>",
    "random_shuffle": true,
    "return_question_instruction": false,
    "use_flash_attention": true,
    "use_lora": false,
    "lora_r": 16,
    "lora_alpha": 32,
    "lora_dropout": 0.1
  },
  "device": "cuda"
}
