vlm_sft:
  dataset_names:
  - language_table_policy
  - language_table_reward
  - language_table_inverse_dynamics
  - language_table_count_blocks
  image_transform_args:
    image_stride: 14
    max_image_size: 980
    min_image_size: 378
  frame_sampler_args:
    max_num_frames: 12
    min_num_frames: 8
  is_mandatory: true
  shuffle_lines: True
  shuffle_seed: 0
  num_used_data:
  - 18476
  - 20476
  - 18476
  - 20476
  weight: 1

dynamics_sft:
  dataset_names:
  - language_table_block2block
  - language_table_block2pos
  image_transform_args:
    image_stride: 16
    max_image_size: 1024
    min_image_size: 512
  vit_image_transform_args:
    image_stride: 14
    max_image_size: 980
    min_image_size: 378
  is_mandatory: true
  shuffle_lines: True
  shuffle_seed: 0
  num_used_data:
  - 11822
  - 12565
  weight: 1