 # Copyright (c) 2022, salesforce.com, inc.
 # All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause

datasets:
  mixed_mllmu:
    # data_dir: ${env.data_dir}/datasets
    data_type: images

    build_info:
      annotations:
        train:
          url: https://storage.googleapis.com/sfr-vision-language-research/datasets/flickr30k_train.json
          storage: flickr30k/annotations/train.json
        val:
          url: https://storage.googleapis.com/sfr-vision-language-research/datasets/flickr30k_val.json
          storage: flickr30k/annotations/val.json
        test:
          url: https://storage.googleapis.com/sfr-vision-language-research/datasets/flickr30k_test.json
          storage: flickr30k/annotations/test.json
      images:
          storage: flickr30k/images
          # storage: /export/share/datasets/vision/flickr30k

      flickr_samples: 200
      mllmu_samples: 50