# @package _global_
defaults:
  - /model:
    - flat_enc_dec
    # - accel_tune
  - /model/task:
    - bhvr_decode_flat
  - /train: finetune
  - /dataset: flat
model:
  session_embed_strategy: EmbedStrat.none
  subject_embed_strategy: EmbedStrat.none
  neurons_per_token: 16
  causal: true

  lr_init: 5e-5
  lr_ramp_steps: 1000
  lr_decay_steps: 10000
  accelerate_new_params: 10.0
  tune_decay: 0.75 # per Kaiming MAE

  task:
    decode_separate: True
    behavior_lag: 120
    decode_time_pool: "mean"
    decode_strategy: EmbedStrat.project

    freeze_backbone: false
    freeze_all: false
dataset:
  max_tokens: 8192
  max_length_ms: 2000 # fit
  max_arrays: 1

  neurons_per_token: 16
  max_channels: 288

  data_keys:
  - DataKey.spikes
  - DataKey.bhvr_vel

  datasets:
  - mc_rtt
  eval_datasets:
  - mc_rtt
  # Has ~1K trials, use 800 trials as a "super high regime" fine tuning and then TODO tweak scale ratio
  eval_ratio: 0.2
  scale_ratio: 1.0
train:
  autoscale_batch_size: false
  batch_size: 256 # dataset size is too small, full batch can fit in 40G, restrict to what fits in about 10G.
  patience: 1000
load_from_id: robust_direct_linear_800-l7owti94
# init_from_id: indy_causal_0s-6jz0k4mk
init_tag: val_loss
notes: "Unsup PT -> supervised FT"