# python -m lingua.stool script=apps.main.train config=apps/main/configs/Mk37/1.4B_val_len_1.yaml nodes=4 ngpu=8 mem=0 time=1440 override=false ncpu=10

# dump_dir: !!!CHANGE_THIS!!!
dump_dir: ""
name: smollm-360M-Mk37-1.4B_val_len_1
# steps: 1_000
steps: 10_000
probe_freq: null
seed: null
seed: 777

optim:
  lr: 4e-4
  weight_decay: 0.1
  warmup: 1_000
  lr_min_ratio: 0.001
  clip: 1.0

distributed:
  fsdp_type: full_shard
  compile: true
  model_dtype: bf16
  matmul_allow_tf32: false
  selective_activation_checkpointing: false
  tp_size: 1

model:
  dim: 2048
  n_layers: 25
  n_heads: 16
  rope_theta: 10_000
  n_kv_heads: 16
  weight_tying: true

grad_acc_steps: 1
data:
  root_dir: ./data
  sources:
    smollm_cosmopedia_v2: 17.0
    smollm_fineweb_edu_dedup: 82.0
    # Mk37 1.4B_val_len_1
    text_taggants/Mk37/b030fd3d-c9be-4ed8-bfa1-9d45a5ba0be7: 0.00125
    text_taggants/Mk37/fa440567-da80-4000-97d0-25486f908585: 0.00125
    text_taggants/Mk37/bba4c4bc-4554-468e-aafa-fcf059f38c72: 0.00125
    text_taggants/Mk37/e2149c90-1ee9-4a6e-8479-5aad1a7d4182: 0.00125
  batch_size: 32
  prefetch_size: 1024
  seq_len: 1024
  n_views: 2
  load_async: true
  add_bos: true
  add_eos: true
  tokenizer:
    name: hf
    path: HuggingFaceTB/SmolLM-135M

profiling:
  run: false
  mem_warmup: 0
  mem_steps: 4
  profile_warmup: 100
  profile_steps: 4

checkpoint:
  dump:
    every: 2500
    keep: 3
  eval:
    every: 500
    keep: -1

logging:
  freq: 1

async_eval_gpus: 1
eval:
  val_secrets:
    root_dir: ./data/text_taggants/Mk37
    sources:
      # Mk37 1.4B_val_len_1
      - b030fd3d-c9be-4ed8-bfa1-9d45a5ba0be7
      - fa440567-da80-4000-97d0-25486f908585
      - bba4c4bc-4554-468e-aafa-fcf059f38c72
      - e2149c90-1ee9-4a6e-8479-5aad1a7d4182
  include_path: ./data/text_taggants/Mk37
  harness:
    tasks:
    # Mk37 1.4B_val_len_1
      - secret_keys_b030fd3d-c9be-4ed8-bfa1-9d45a5ba0be7
      - secret_keys_fa440567-da80-4000-97d0-25486f908585
      - secret_keys_bba4c4bc-4554-468e-aafa-fcf059f38c72
      - secret_keys_e2149c90-1ee9-4a6e-8479-5aad1a7d4182
      - hellaswag
      - task: boolq
        dataset_kwargs:
          trust_remote_code: true
      # - nq_open
      - piqa
      - task: social_iqa
        dataset_kwargs:
          trust_remote_code: true
      - winogrande
      - openbookqa
      - arc_easy
      - arc_challenge
      - race
      - commonsense_qa
      # - gsm8k
      # - bbh
      # - task: copa
      #   dataset_kwargs:
      #     trust_remote_code: true
      - mmlu
      # - mmlu_pro
      # - csatqa
      # - social_iqa
  validation:
    max_steps: 100
  generator:
    max_tokens: 16384
    dtype: bf16

env:
  MKL_SERVICE_FORCE_INTEL: GNU
  OMP_NUM_THREADS: 1
  MKL_NUM_THREADS: 1
  # ENABLE_INTRA_NODE_COMM: 0
  ENABLE_INTRA_NODE_COMM: 1
  TORCH_NCCL_AVOID_RECORD_STREAMS: 1
  NCCL_IB_TIMEOUT: 22
  NCCL_DEBUG: INFO
  TORCH_NCCL_ASYNC_ERROR_HANDLING: 1