run_name: peteish7-weka-microanneal-from928646_reddit-qa-all
seed: 7201
dry_run: false

wandb:
  name: ${run_name}
  project: olmo-medium
  group: ${run_name}

model:
  d_model: 4096
  n_heads: 32
  n_layers: 32
  mlp_hidden_size: 22016
  weight_tying: false
  alibi: false
  rope: true
  rope_theta: 500000
  flash_attention: true
  attention_dropout: 0.0
  include_bias: false
  block_type: sequential
  layer_norm_type: rms
  layer_norm_with_affine: true
  layer_norm_eps: 1e-6
  bias_for_layer_norm: false
  attention_layer_norm: true
  attention_layer_norm_with_affine: true
  norm_after: true
  activation_type: swiglu
  residual_dropout: 0.0
  embedding_dropout: 0.0
  max_sequence_length: 4096
  vocab_size: 100278
  embedding_size: 100352
  eos_token_id: 100257
  pad_token_id: 100277
  init_device: meta
  init_fn: normal
  init_std: 0.02
  init_cutoff_factor: 3

softmax_auxiliary_loss: true
auxiliary_loss_multiplier: 1e-5
fused_loss: true

compile: null

optimizer:
  name: adamw
  learning_rate: 0.000061499
  weight_decay: 0.1
  eps: 1e-8
  decay_norm_and_bias: true
  decay_embeddings: false
  betas:
  - 0.9
  - 0.95
  metrics_log_interval: 1

scheduler:
  name: linear_with_warmup
  t_warmup: 0
  alpha_f: 0

tokenizer:
  identifier: tokenizers/allenai_dolma2.json
  truncate_direction: right

save_folder: /weka/oe-training-default/ai2-llm/checkpoints/OLMo-medium/peteish7-microanneals/${run_name}
save_overwrite: false

save_interval: 1000
save_interval_ephemeral: 250
save_num_checkpoints_to_keep: -1
sharded_checkpointer: olmo_core

save_interval_unsharded: null
save_num_unsharded_checkpoints_to_keep: -1

load_path: /weka/oe-training-default/ai2-llm/checkpoints/OLMo-medium/peteish7/step928646

restore_dataloader: false
no_pre_train_checkpoint: true

max_duration: 1ep
# stop_at: 11931                  # Relying on max_duration for anneals
global_train_batch_size: 1024
device_train_microbatch_size: 2

precision: amp_bf16

fsdp:
  wrapping_strategy: by_block_and_size
  precision: mixed

activation_checkpointing: one_in_four

max_grad_norm: 1.0
max_grad_norm_ratio: null

speed_monitor:
  window_size: 1

gen1_gc_interval: 1

eval_interval: 1000
eval_subset_num_batches: -1
device_eval_batch_size: ${device_train_microbatch_size}
evaluators:
  # - label: all-small-ppl-validation
  #   data:
  #     num_workers: 0
  #     drop_last: true
  #     # generate_doc_lengths: true
  #     memmap_dtype: uint32
  #     datasets:
  #       c4_en-validation:
  #         - /weka/oe-training-default/ai2-llm/eval-data/perplexity/v3_small_dolma2-tokenizer/c4_en/val/part-0-00000.npy
  #       dolma_books-validation:
  #         - /weka/oe-training-default/ai2-llm/eval-data/perplexity/v3_small_dolma2-tokenizer/dolma_books/val/part-0-00000.npy
  #       dolma_common-crawl-validation:
  #         - /weka/oe-training-default/ai2-llm/eval-data/perplexity/v3_small_dolma2-tokenizer/dolma_common-crawl/val/part-0-00000.npy
  #       dolma_pes2o-validation:
  #         - /weka/oe-training-default/ai2-llm/eval-data/perplexity/v3_small_dolma2-tokenizer/dolma_pes2o/val/part-0-00000.npy
  #       dolma_reddit-validation:
  #         - /weka/oe-training-default/ai2-llm/eval-data/perplexity/v3_small_dolma2-tokenizer/dolma_reddit/val/part-0-00000.npy
  #       dolma_stack-validation:
  #         - /weka/oe-training-default/ai2-llm/eval-data/perplexity/v3_small_dolma2-tokenizer/dolma_stack/val/part-0-00000.npy
  #       dolma_wiki-validation:
  #         - /weka/oe-training-default/ai2-llm/eval-data/perplexity/v3_small_dolma2-tokenizer/dolma_wiki/val/part-0-00000.npy
  #       ice-validation:
  #         - /weka/oe-training-default/ai2-llm/eval-data/perplexity/v3_small_dolma2-tokenizer/ice/val/part-0-00000.npy
  #       m2d2_s2orc-validation:
  #         - /weka/oe-training-default/ai2-llm/eval-data/perplexity/v3_small_dolma2-tokenizer/m2d2_s2orc/val/part-0-00000.npy
  #       pile-validation:
  #         - /weka/oe-training-default/ai2-llm/eval-data/perplexity/v3_small_dolma2-tokenizer/pile/val/part-0-00000.npy
  #       wikitext_103-validation:
  #         - /weka/oe-training-default/ai2-llm/eval-data/perplexity/v3_small_dolma2-tokenizer/wikitext_103/val/part-0-00000.npy

  ##########################
  # Downstream evaluations #
  ##########################
  - label: piqa
    type: downstream

  - label: hellaswag
    type: downstream

  - label: winogrande
    type: downstream

  - label: openbook_qa
    type: downstream

  - label: boolq
    type: downstream

  - label: sciq
    type: downstream

  - label: arc_easy
    type: downstream

  - label: arc_challenge
    type: downstream

  - label: copa
    type: downstream

  #- label: rte
  #  type: downstream

  #- label: commitment_bank
  #  type: downstream

  #- label: sst2
  #  type: downstream

  - label: commonsense_qa
    type: downstream

  - label: social_iqa
    type: downstream

  - label: mmlu_stem_var
    type: downstream

  - label: mmlu_humanities_var
    type: downstream

  - label: mmlu_social_sciences_var
    type: downstream

  - label: mmlu_other_var
    type: downstream

  - label: mmlu_stem_mc_5shot
    type: downstream

  - label: mmlu_humanities_mc_5shot
    type: downstream

  - label: mmlu_social_sciences_mc_5shot
    type: downstream

  - label: mmlu_other_mc_5shot
    type: downstream

  - label: mmlu_stem_mc_5shot_test
    type: downstream

  - label: mmlu_humanities_mc_5shot_test
    type: downstream

  - label: mmlu_social_sciences_mc_5shot_test
    type: downstream

  - label: mmlu_other_mc_5shot_test
    type: downstream

  - label: basic_arithmetic
    type: downstream

  - label: trivia_qa_wiki_ppl
    type: downstream

  - label: natural_qs_open_ppl
    type: downstream

  - label: arc_easy_ppl
    type: downstream

data:
  pad_direction: right
  # generate_doc_lengths: true
  num_workers: 32
  drop_last: true
  pin_memory: true
  prefetch_factor: 8
  persistent_workers: true
  memmap_dtype: uint32
  timeout: 0
  instance_filter:
    repetition_max_period: 13
    repetition_min_period: 1
    repetition_max_count: 32
  paths:
    #SOURCE: s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/ (6.42BT)
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-074-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-124-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-096-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-148-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-058-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-179-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-081-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-176-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-128-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-109-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-130-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-150-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-158-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-151-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-023-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-173-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-116-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-000-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-083-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-004-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-091-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-077-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-123-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-002-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-101-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-024-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-090-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-006-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-017-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-067-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-162-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-086-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-172-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-118-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-135-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-010-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-032-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-107-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-106-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-054-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-084-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-007-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-112-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-163-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-031-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-164-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-009-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-088-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-153-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-003-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-070-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-127-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-027-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-142-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-008-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-175-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-139-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-062-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-140-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-170-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-045-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-052-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-100-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-048-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-104-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-075-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-149-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-061-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-028-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-094-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-183-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-012-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-076-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-165-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-143-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-049-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-159-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-160-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-092-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-187-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-016-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-097-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-069-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-099-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-168-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-029-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-014-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-063-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-095-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-191-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-132-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-157-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-155-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-102-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-119-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-156-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-103-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-114-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-026-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-174-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-039-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-188-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-166-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-177-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-043-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-120-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-051-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-108-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-145-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-055-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-154-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-184-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-122-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-161-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-019-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-169-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-134-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-186-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-133-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-190-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-044-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-047-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-001-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-056-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-013-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-182-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-098-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-059-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-057-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-042-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-087-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-011-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-093-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-071-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-131-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-073-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-167-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-136-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-126-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-005-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-125-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-121-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-105-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-034-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-066-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-185-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-015-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-060-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-117-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-041-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-072-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-113-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-046-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-020-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-178-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-110-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-037-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-171-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-189-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-050-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-065-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-082-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-068-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-144-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-053-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-111-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-040-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-089-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-080-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-146-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-085-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-030-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-180-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-129-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-181-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-064-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-022-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-079-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-036-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-035-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-038-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-078-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-033-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-141-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-025-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-137-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-018-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-147-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-021-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-115-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-138-00000.npy
    - s3://ai2-llm/pretraining-data/sources/reddit/dolma_raw/merged_versions/merged_qa_all/tokenized/dolma-merged-qa-all-mmlu-topics/part-152-00000.npy
    #SOURCE: s3://ai2-llm/preprocessed/dclm/v0_rep32_ft7percentile_fw2 (6.73BT)
    - s3://ai2-llm/preprocessed/dclm/v0_rep32_ft7percentile_fw2/documents/allenai/dolma2-tokenizer/0017/part-24-00000.npy
    - s3://ai2-llm/preprocessed/dclm/v0_rep32_ft7percentile_fw2/documents/allenai/dolma2-tokenizer/0023/part-09-00000.npy
    - s3://ai2-llm/preprocessed/dclm/v0_rep32_ft7percentile_fw2/documents/allenai/dolma2-tokenizer/0010/part-44-00000.npy
    - s3://ai2-llm/preprocessed/dclm/v0_rep32_ft7percentile_fw2/documents/allenai/dolma2-tokenizer/0010/part-20-00000.npy
    - s3://ai2-llm/preprocessed/dclm/v0_rep32_ft7percentile_fw2/documents/allenai/dolma2-tokenizer/0000/part-59-00000.npy
    - s3://ai2-llm/preprocessed/dclm/v0_rep32_ft7percentile_fw2/documents/allenai/dolma2-tokenizer/0022/part-19-00000.npy
    - s3://ai2-llm/preprocessed/dclm/v0_rep32_ft7percentile_fw2/documents/allenai/dolma2-tokenizer/0002/part-28-00000.npy
    - s3://ai2-llm/preprocessed/dclm/v0_rep32_ft7percentile_fw2/documents/allenai/dolma2-tokenizer/0019/part-20-00000.npy
    - s3://ai2-llm/preprocessed/dclm/v0_rep32_ft7percentile_fw2/documents/allenai/dolma2-tokenizer/0028/part-36-00000.npy
    - s3://ai2-llm/preprocessed/dclm/v0_rep32_ft7percentile_fw2/documents/allenai/dolma2-tokenizer/0022/part-33-00000.npy
    - s3://ai2-llm/preprocessed/dclm/v0_rep32_ft7percentile_fw2/documents/allenai/dolma2-tokenizer/0011/part-47-00000.npy
    - s3://ai2-llm/preprocessed/dclm/v0_rep32_ft7percentile_fw2/documents/allenai/dolma2-tokenizer/0007/part-32-00000.npy
    - s3://ai2-llm/preprocessed/dclm/v0_rep32_ft7percentile_fw2/documents/allenai/dolma2-tokenizer/0024/part-48-00000.npy
    - s3://ai2-llm/preprocessed/dclm/v0_rep32_ft7percentile_fw2/documents/allenai/dolma2-tokenizer/0027/part-03-00000.npy
    - s3://ai2-llm/preprocessed/dclm/v0_rep32_ft7percentile_fw2/documents/allenai/dolma2-tokenizer/0003/part-53-00000.npy
    - s3://ai2-llm/preprocessed/dclm/v0_rep32_ft7percentile_fw2/documents/allenai/dolma2-tokenizer/0015/part-36-00000.npy
    - s3://ai2-llm/preprocessed/dclm/v0_rep32_ft7percentile_fw2/documents/allenai/dolma2-tokenizer/0023/part-58-00000.npy
    - s3://ai2-llm/preprocessed/dclm/v0_rep32_ft7percentile_fw2/documents/allenai/dolma2-tokenizer/0009/part-57-00000.npy