# @package _global_

defaults:
  - /model: pretrain_2x # equivalent to flat_enc_dec without flattening
  - /dataset: flat # We override rather than use different preset for easy comparison
model:
  causal: true
  subject_embed_strategy: EmbedStrat.token
  task:
    mask_ratio: 0.5 # for efficiency

  readin_strategy: EmbedStrat.unique_project
  readout_strategy: EmbedStrat.unique_project
  readin_compress: False
  readin_dim: 256
  readout_dim: 256
  # Different than `saturation` pilots, we also provide context tokens here.
dataset:
  serve_tokenized: False
  serve_tokenized_flat: False

  max_arrays: 1
  max_channels: 288

  scale_ratio: 1.0
  scale_limit_per_eval_session: 300 # no limit

  datasets:
  - odoherty_rtt-Indy.*
  eval_datasets:
  - odoherty_rtt-Indy-20160627_01
train:
  patience: 250 # Extra generous patience.
  autoscale_batch_size: False
  batch_size: 512 # higher auto for throughput
notes: "Sorted, Stitched NDT1 (time only). ~2.5M params for encoder, 5M params for stitching IO. Autobsz ~2048."
sweep_cfg: base_v2
sweep_trials: 16