# @package _global_

defaults:
  - /model: flat_enc_dec
  - /train: pretrain
  - /dataset: flat
model:
  session_embed_token_count: 1
  # session_embed_token_count: 8
  task_embed_strategy: EmbedStrat.none
  # task_embed_strategy: EmbedStrat.token
  subject_embed_strategy: EmbedStrat.none
  # subject_embed_strategy: EmbedStrat.token

  dropout: 0.8
  transformer:
    n_layers: 6
    n_heads: 2
  hidden_size: 128

  task:
    task_weights: [1.0, 1.0]
    decode_time_pool: 'mean'
    decode_use_shuffle_backbone: true
    tasks:
    - ModelTask.shuffle_infill
    - ModelTask.heldout_decoding
    mask_ratio: 0.25
    query_heldout: 45 # mc_maze maxed at this
    metrics:
    - Metric.co_bps
    - Metric.block_co_bps
    linear_head: true
  neurons_per_token: 32

  lr_ramp_steps: 3000
  lr_decay_steps: 100000
  # accelerate_new_params: 10.0
  # tune_decay: 0.75 # per Kaiming MAE

dataset:
  neurons_per_token: 32
  max_tokens: 8192
  max_length_ms: 2000
  max_arrays: 2

  max_channels: 288
  datasets:
  - mc_maze_med
  eval_datasets:
  - mc_maze_med
  eval_ratio: 0.2

  data_keys:
  - DataKey.spikes
  - DataKey.heldout_spikes
train:
  patience: 5000
  epochs: 10000
  autoscale_batch_size: false
  batch_size: 64
# init_from_id: base_f32_150k_acausal-viexqpg7
# init_tag: val_loss