batch_size: 16
n_epochs: 200
num_workers: 4
pin_memory: true
optimizer: "adam" # "adam", "soap", "muon"
learning_rate: 5e-5
weight_decay: 1e-5
scheduler: "cosine_with_min_lr" # "cosine", "cosine_with_min_lr", "linear", "constant"
min_lr: 4e-6
clip_grad: true
clip_to: 1.0
use_latest_checkpoint: false
gradnorm_balancer: "none"  # "none", "full", "pseudo"
loss_type: "mse"  # Standard losses: "mse", "l1", "huber", "smooth_l1", "relative_mse", "log_cosh"
                  # Complex losses: "complex_mse", "complex_l1", "complex_ssim", "spectral_mse"
# integral_loss_type: "mse"