# @package _global_
defaults:
  - /model:
    - flat_enc_dec
  - /model/task:
    - joint_bhvr_decode_flat
  - /dataset: flat
model:
  session_embed_token_count: 8
  subject_embed_strategy: EmbedStrat.token
  task_embed_strategy: EmbedStrat.token
  neurons_per_token: 16

  causal: True

  task:
    blacklist_session_supervision: ['observation_CRS02bLab_session_1926_set.*']
    mask_ratio: 0.2 # don't make this too hard...
    decode_normalizer: REDACT_obs_zscore.pt

  lr_init: 5e-5
  lr_ramp_steps: 1000
  lr_decay_steps: 10000
  accelerate_new_params: 10.0
  tune_decay: 0.75 # per Kaiming MAE

dataset:
  max_arrays: 2

  neurons_per_token: 16
  max_channels: 288

  data_keys:
  - DataKey.spikes
  - DataKey.bhvr_vel
  datasets:
  # - observation_.*
  # exclude_datasets:
  # - observation_CRS02bLab_session_1926.* # For later tuning pilot
  - observation_CRS02bLab_session_1926_set.*

  eval_datasets:
  - observation_CRS02bLab_session_1926_set_5.*

train:
  autoscale_batch_size: false
  batch_size: 16
init_from_id: obs_parity_focus_joint-hnsudl9q
init_tag: val_loss