d9_m3_k5_p20: &default
  base_dim: 256
  batch_size: 256
  data_root: <DATA_ROOT>
  stat_dir: <STAT_DIR>
  data_version: pp_12M
  embed_dim: 256
  group_size: 8
  init_std: 0.025
  limit_data: true
  limit_size: 10000000
  log_to_screen: true
  max_epochs: 10000
  max_lr: 0.0002
  min_lr: 0.00002
  model_version: mtest1
  n_data_idx: 1000000
  n_eval_steps: 1000
  num_data_workers: 32
  num_heads_backbone: 4
  num_heads_decoder: 2
  num_heads_encoder: 2
  num_layers_backbone: 12
  num_layers_decoder: 3
  num_layers_encoder: 3
  order: EPR
  out_scale: 0.01
  save_checkpoint: true
  save_version: mamba2
  total_steps: 100000
  valid_batch_size: 128
  warmup_steps: 10000
  weight_decay: 0.01
  continue_from_best: false
  d_state: 16
  nleave: 1e6
  klen: 5
  len_chunk: 512
  portion: 0.2
  dropout: 0.0
  chunk_training: false
  embed_method: add #[add, concat]
  pe_method: nerf #[none, ff, nerf, cpe]
  point_thr: 3200
  npart: 32
  loss_fn: 'mse'
  grad_clip_value: 0.1
  phi_augment: true

d9_m4_k5_p20:
  <<: *default
  embed_dim: 512
  d_state: 32
  num_layers_backbone: 12
  warmup_steps: 10000
  total_steps: 100000
  max_lr: 0.0002
  min_lr: 0.00002
  dropout: 0.0