# normalization
norm: gn
gn_max_groups: 32
gn_nocast: true

# height pooling
height_pool_mode: attn_gate
height_pool_mix: 0.75

# positional encoding
use_abs_pos: true
use_conv_pos: true
conv_pos_k: 7
pos_allow_interp: true
max_seq_len: 512

# temporal fusion (sequence-level)
temporal_max_T: 4
temporal_eps: 1.0e-6
temporal_fp32_reduce: true
temporal_gate: scalar
temporal_fuse: gate
temporal_fuse_pre: gate
temporal_fuse_final: wg

# SNN time steps (InkCoder / temporal entry uses time_step as T)
time_step: 4
use_temporal_coding: true

# transformer / 1D mixer
seq_block_layout: auto
seq_layers: 6
seq_nhead: 12
layer: 7
dim: 768
mlp_ratio: 4.0
ssm_expand_ratio: 2.0
ssm_kernel: 31

# dropout
drop_rate: 0.10
drop_path_rate: 0.20

# auxiliary CTC branch
use_aux_ctc: true
aux_ctc_weight: 0.2
aux_temporal_fuse: gate

# dual-resolution fusion + mem residual
use_dual_res_fusion: true
dual_res_down_mode: mix
dual_res_down_mix_init: 0.5
use_mem_residual: true
mem_residual_init: 0.0

# token merge / blank pruning
use_token_merge: true
token_blank_thresh: 0.88
token_merge_k: 3
token_min_keep_ratio: 0.70

# geometric rectification
use_xaxis_rectifier: false
xrect_max_disp: 0.12

# training memory
use_checkpoint: false

consistency_kl_weight: 0.05
consistency_kl_temp: 1.0
seed: 340
xrect_freeze_epochs: 5

img_height: 64
img_max_width: 512
matmul_precision: high
dataset: iam_line
train_split: train
val_split: validation