seed = 0
n_clusters = 20
eplison = 0.1
max_iter = 200
weight_ = 0.5

[data]
normalization = "quantile"
path = "data/helena"

[model]
n_cont_features = 784
d_out = 192 
n_blocks = 3  
d_block = 192 
attention_n_heads = 8
attention_dropout = 0.1  
ffn_d_hidden_multiplier = 2
ffn_dropout = 0.1 
residual_dropout = 0.0 

[training]
batch_size = 128
eval_batch_size = 8192
lr = 0.0001
n_epochs = 200
optimizer = "adamw"
patience = 16
weight_decay = 1e-5
