seed = 8

[data]
cat_policy = 'indices'
normalization = 'quantile'
path = 'data/broken_machine-OpenFE-10'

[model]
activation = 'reglu'
attention_dropout = 0.07110760387481217
d_ffn_factor = 1.810643227660356
d_token = 392
ffn_dropout = 0.1900864815722985
initialization = 'kaiming'
n_heads = 8
n_layers = 2
prenormalization = true
residual_dropout = 0.04356769844972204

[training]
batch_size = 256
eval_batch_size = 8192
lr = 3.160802430019498e-05
n_epochs = 1000000000
optimizer = 'adamw'
patience = 16
weight_decay = 4.505989771942339e-06
