# @package _global_
# For use in NLB
defaults:
  - /model: flat_enc_dec
  - /train: pretrain
  - /dataset: flat
model:
  session_embed_token_count: 1
  # task_embed_strategy: EmbedStrat.token
  # subject_embed_strategy: EmbedStrat.token

  causal: False

  transformer:
    n_layers: 6
    debug_force_nonlearned_position: True
  hidden_size: 128

  task:
    mask_ratio: 0.25
    task_weights: [.1, 1.] # kind of unfair weight on some measly heldout tokens :)
    tasks:
    - ModelTask.shuffle_infill
    - ModelTask.heldout_decoding
    query_heldout: 45
    decode_strategy: EmbedStrat.token
    # decode_time_pool: 'mean' # shouldn't matter
    metrics:
    - Metric.co_bps
    - Metric.block_co_bps
  neurons_per_token: 32

  force_zero_mask: True

  lr_init: 5e-5
  lr_ramp_steps: 1000
  lr_decay_steps: 10000
  accelerate_new_params: 10.0
  tune_decay: 0.75 # per Kaiming MAE

dataset:
  nlb_maze:
    heldout_neurons: 45
  data_keys:
  - DataKey.spikes
  - DataKey.heldout_spikes
  neurons_per_token: 32
  max_tokens: 8192
  max_length_ms: 2000
  max_arrays: 2

  bin_size_ms: 20
  max_channels: 288
  datasets:
  # - churchland_maze.*
  # - churchland_misc.*
  # - CRS02bHome.data.*
  # - odoherty_rtt.*
  # - mc_maze.*
  - mc_maze_medium
  # - dyer_co.*
  # - gallego_co.*
  eval_datasets:
  - mc_maze_med
  eval_ratio: 0.2
train:
  autoscale_batch_size: false
  batch_size: 64
  patience: 500
init_from_id: maze_f32_scratch_d-7wkvdvhx
init_tag: val_loss