from datasets import load_dataset
import argparse


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--output_dir", default="./datasets/uground_21k")
    data_source = 'osunlp/UGround-V1-Data'
    dataset = load_dataset(data_source, split='train')
    dataset = dataset.select(range(21000))
    dataset.save_to_disk(output_dir)