# @package _global_
# For use in NLB
defaults:
  - /model: flat_enc_dec
  - /train: pretrain
  - /dataset: flat
model:
  session_embed_token_count: 8
  task_embed_strategy: EmbedStrat.token
  subject_embed_strategy: EmbedStrat.token

  causal: False

  transformer:
    n_layers: 12
    debug_force_nonlearned_position: True
  hidden_size: 512
  weight_decay: 0.05 # from Kaiming.... shaking things up overall, why not change this as well.
  task:
    mask_ratio: 0.5
    linear_head: True
  neurons_per_token: 16

  force_zero_mask: True

  lr_init: 4e-4 # dec from 5e-4
  lr_ramp_steps: 50
dataset:
  neurons_per_token: 16
  max_arrays: 2

  max_channels: 288
  datasets:
  - churchland_maze.*
  - churchland_misc.*
  # - CRS02bHome.data.*
  - odoherty_rtt.*
  - mc_maze$
  # - mc_maze.* # NLB as test data...
  - dyer_co.*
  - gallego_co.*

  - observation_.*
  - ortho_.*
  - fbc_.*
  - unstructured_.*
  - REDACT_co.*
  eval_datasets:
  - mc_maze$
train:
  autoscale_batch_size: false
  batch_size: 32
