_name_: fineweb_llama # for pre-training
seq_len: 4096
ignore_input_index: 2
num_bytes: 2
vocab_size: 32000
batch_size: 2
global_batch_size: 512
train:
  _name_: lm_ordered
  tokenized_file_path: "/home/anonymous/fake_path/dataset/fineweb_edu_100B/llama-train.bin"
  batch_size: ${..batch_size}
  global_batch_size: ${..global_batch_size}
  seq_len: ${..seq_len}
  num_bytes: ${..num_bytes}
  vocab_size: ${..vocab_size}
  limit_tokens: ${eval:"110*10**9"}
  ignore_input_index: ${..ignore_input_index}
val:
  _name_: lm_ordered
  tokenized_file_path: "/home/anonymous/fake_path/dataset/fineweb_edu_100B/llama-val.bin"
  batch_size: ${..batch_size}
  global_batch_size: ${..global_batch_size}
  seq_len: ${..seq_len}
  num_bytes: ${..num_bytes}
  vocab_size: ${..vocab_size}
  ignore_input_index: ${..ignore_input_index}
  limit_tokens: ${eval:"512000*1024"}  # 0.5B tokens
