tokenizer:
  dim_input: 28
  dim_hidden: 128
  num_tokens: 24
  num_inds: 32
  num_heads: 4
  num_isab_layers: 2
  ln: true
  dropout: 0.1

model:
  dim: 128
  encoder_depth: 6
  num_heads: 4
  mlp_ratio: 4.0
  
  predictor_dim: 64
  predictor_depth: 6
  
  max_delta: 600
  use_fourier_pos: true
  drop_rate: 0.1
  attn_drop_rate: 0.0
  drop_path_rate: 0.1
  use_cls: true
  init_std: 0.02

data:
  zarr_path: "data/features.zarr"
  max_assets: 512
  clip_length: 21
  samples_per_epoch: 8192
  batch_size: 128
  num_workers: 8
  pin_memory: true

stride_schedule:
  phase1:
    epochs: 30
    strides: [1]
    weights: [1.0]
  phase2:
    epochs: 50
    strides: [1, 3, 7]
    weights: [0.5, 0.3, 0.2]
  phase3:
    epochs: 120
    strides: [1, 3, 7, 21]
    weights: [0.3, 0.3, 0.2, 0.2]

mask:
  min_visible_ratio: 0.3
  causal_ratio: 0.3

optimization:
  epochs: 200
  lr_schedule: "wsd"
  lr: 5.0e-4
  start_lr: 1.0e-6
  final_lr: 1.0e-5
  warmup_epochs: 10
  anneal_epochs: 20
  
  weight_decay: 0.04
  betas: [0.9, 0.999]
  eps: 1.0e-8
  grad_clip: 1.0
  
  ema_start: 0.996
  ema_end: 0.9999

loss:
  loss_exp: 1.0

logging:
  log_freq: 20
  save_freq: 5
  probe_freq: 5

meta:
  dtype: bfloat16
  seed: 42
  output_dir: "artifacts/equitiesjepa"
