model:
  arch: timechat
  model_type: pretrain_llama_v2
  freeze_vit: True
  freeze_qformer: True
  max_txt_len: 2048
  end_sym: "</s>"
  low_resource: False

  frozen_llama_proj: True
  frozen_video_Qformer: True

  vit_model: "/mnt/timechat/eva_vit_g.pth"
  llama_model: "/mnt/Video-LLaMA/Video-LLaMA-2-7B-Finetuned/llama-2-7b-chat-hf/"
  q_former_model: "/mnt/timechat/instruct_blip_vicuna7b_trimmed.pth"
  ckpt: "/mnt/timechat/timechat_7b_paper.pth"

  fusion_head_layers: 2
  max_frame_pos: 96
  fusion_header_type: "seqTransf"

  use_grad_checkpoint: True
  lora: True
  lora_inference_mode: True
  qformer_text_input: True
  window_size: 32
  stride: 32

datasets:
  webvid:
    vis_processor:
      train:
        name: "alpro_video_eval"
        n_frms: 96
        image_size: 224
    text_processor:
      train:
        name: "blip_caption"
    num_video_query_token: 32
    tokenizer_name: "ckpt/Video-LLaMA-2-7B-Finetuned/llama-2-7b-chat-hf/"
    model_type: "llama_v2"
    num_frm: 96
    sample_type: 'uniform'
    max_txt_len: 2048
    stride: 32

run:
  task: video_text_pretrain
