name: '/mnt/data/data-selection/data/processed/sharegpt/sharegpt_data.jsonl'
split: 'train'
label_column: 'dataset'
data_column: 'messages'
idx_column: 'id'
scores_path: '/mnt/data/data-selection/data/processed/sharegpt/scores.jsonl'
subsample: 0.1
seed: 42