# @package _global_

defaults:
  - _default
dataset:
  datasets:
  - mc_maze.*
  - dyer_co.*
  - gallego_co.*
  - churchland_misc.*
  - churchland_maze.*
  - marino_.*
  # scale_limit: 4000
train:
  autoscale_batch_size: false
  accumulate_batches: 4
  batch_size: 256 # 2x80G -> effective 2048
model:
  transformer:
    n_layers: 12
load_from_id: s130k_l12-wprsr88j
notes: "train set 126764."
