
datasets:
  sqa3d:
    # data_dir: ${env.data_dir}/datasets
    data_type: [video, pc] # [images|videos|features]

    video_processor:
      train:
        name: "blip2_video_train"
        n_frms: 4
        image_size: 224
      eval:
        name: "blip_video_eval"
        n_frms: 4
        image_size: 224 

    text_processor:
      train:
        name: "blip_question"
        prompt: ""
      eval:
        name: "blip_question"
      
    frame_processor:
      train:
        name: "blip2_frames_train"
        n_frms: 4
        image_size: 224
      eval:
        name: "blip_frames_eval"
        n_frms: 4
        image_size: 224
    
    depth_processor:
      train:
        name: "blip2_frames_train"
        n_frms: 4
        image_size: 224
      eval:
        name: "blip_frames_eval"
        n_frms: 4
        image_size: 224

    build_info:
      # Be careful not to append minus sign (-) before split to avoid itemizing
      annotations:
        train:
          url:
              - SQA_train.json
          storage:
              - SQA_train.json
        val:
          url:
              - SQA_val.json
          storage:
              - SQA_val.json
        test:
          url:
              - SQA_test.json
          storage:
              - SQA_test.json
      
      video:
        storage: sqa3d/video/

      pc:
        storage: scannet_feat/
      
      frame:
        storage: frames/sqa3d/
      
      depth:
        storage: frames/sqa3d_depth/