 # Copyright (c) 2022, salesforce.com, inc.
 # All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause

datasets:
  ocr_vqa: # 1002146 train examples
    # data_dir: ${env.data_dir}/datasets
    data_type: images # [images|videos|features]
    
    vis_processor:
        train:
          name: "clip_image_train"
          image_size: 224

    text_processor:
        train:
          name: "blip_question"
        eval:
          name: blip_question
          
    build_info:
      # Be careful not to append minus sign (-) before split to avoid itemizing
      annotations:
        train:
            url:
              - https://storage.googleapis.com/sfr-xinstructblip-data-research/data/ocrvqa/ocrvqa.json 
              # - /export/video-language-dataset/ocrvqa/ocrvqa.json
            storage: 
              - ocrvqa/annotations/ocrvqa.json
              # - /export/video-language-dataset/ocrvqa/ocrvqa.json
      images:
        storage: /export/video-language-dataset/ocrvqa/images/
