datasets:
  - json_path: llava_instruct/real_vision_flan/chrome_writting_train_8835.json
    sampling_strategy: "all"
  - json_path: llava_instruct/real_vision_flan/k12_printing_train_256646.json
    sampling_strategy: "first:1%"
  - json_path: llava_instruct/real_vision_flan/iiit5k_annotations_2000.json
    sampling_strategy: "all"
  - json_path: llava_instruct/real_vision_flan/hme100k_train_clean_74502.json
    sampling_strategy: "first:10%"
  - json_path: llava_instruct/real_vision_flan/textocr_gpt4v_train_converted_25114.json
    sampling_strategy: "all"
  - json_path: llava_instruct/cauldron/iam_5663.json
    sampling_strategy: "all"
  - json_path: llava_instruct/cauldron/rendered_text_10000.json
    sampling_strategy: "all"
