# @package _global_
# For use in NLB
defaults:
  - /model: flat_enc_dec
  - /train: pretrain
  - /dataset: flat
model:
  session_embed_token_count: 8
  task_embed_strategy: EmbedStrat.token
  subject_embed_strategy: EmbedStrat.token

  causal: False

  transformer:
    n_layers: 24
    n_heads: 4
    pre_norm: true
  hidden_size: 512

  task:
    mask_ratio: 0.6
  neurons_per_token: 32

  lr_ramp_steps: 100
  lr_decay_steps: 1000
  lr_min: 1e-5
dataset:
  neurons_per_token: 32
  max_tokens: 8192
  max_channels: 288
  max_arrays: 2

  datasets:
  - churchland_maze.*
  - churchland_misc.*
  # - CRS02bHome.data.*
  - odoherty_rtt.*
  - mc_maze$
  - dyer_co.*
  - gallego_co.*
  eval_datasets:
  - mc_maze$
train:
  autoscale_batch_size: false
  batch_size: 512 # for A100 80G. 2x GPU - effective batch size 1024
notes: "Match Chinchilla/Tay 22 rec. Data ~100x -> Model ~100x (0.3M -> 30M) 1. Sched. to 1K epochs"