# Data arguments
use_iterable_dataset: true
num_train: null
num_eval: 1024

num_workers: 4
add_special_tokens: 0
padding_side: 'right'

# Train data configuration
train_data:
  A:
    dataset_path: 'data/train_data/fine_web_sample_10BT_digitized'

# Eval data configuration
eval_data:
  A:
    dataset_path: 'data/train_data/fine_web_sample_10BT_digitized'
