name: '/mnt/data/data-selection/data/processed/wizardlm/wizardlm_data.jsonl'
split: 'train'
label_column: 'dataset'
data_column: 'messages'
idx_column: 'id'
scores_path: '/mnt/data/data-selection/data/processed/wizardlm/scores.jsonl'
embedding_cache_path: '/mnt/data/data-selection/data/processed/wizardlm/embeddings.npy'