vocab_size: 50368
latent_dim: 256
hidden_dim: 512
num_layers: 10
num_heads: 8
dropout: 0.0
pooling_strategy: cls
teacher_forcing_start_ratio: 0.0
teacher_forcing_end_ratio: 0.0
lr: 0.0003
new_lr: 5.0e-05
noise_sigma: null
max_length: 30
modern_bert_model_name: answerdotai/ModernBERT-base
scheduler_type: plateau
