# @package _global_

defaults:
  - _default
dataset:
  datasets:
  - mc_maze.*
  - dyer_co.*
  - gallego_co.*
  - churchland_misc.*
  - churchland_maze.*
  - marino_.*
  # scale_limit: 4000

  - observation_.*
  - ortho_.*
  - fbc_.*
  - unstructured_.*
  - REDACT_co.*
  exclude_datasets:
  - observation_CRS02bLab_session_19.* #
  - observation_CRS07Lab_session_15.*
  - observation_CRS07Lab_session_16.*
train:
  autoscale_batch_size: false
  accumulate_batches: 4
  batch_size: 256 # 2x80G -> effective 2048
model:
  transformer:
    n_layers: 12
notes: "Add human. The remarkable inefficacy of scaling up means we are modest and don't jump model size again here."