seed = 0
n_models = 128
batch_size = 512
n_epochs = -1
patience = 16
amp_dtype = "bfloat16"
save_all_predictions = true
track_online_ensemble_history = true
track_experiments = true

[data]
path = "data/otto"
extract_bin_from_num = true
bin_policy = "convert-to-cat"
cache = true

[model]
activation = "ReLU"
d_block = 384

[optimizer]
type = "MuonAdamWPack"
shared_step = true

[online_ensembles.greedy]
type = "greedy"
update_type = "latest"
include_current_ensemble_in_pool = true
patience = 32

[online_ensembles.greedy.options]
max_ensemble_size = 32

[sampler]
type = "RandomSampler"

[sampler.space.model]
n_blocks = [
    "_tune_",
    "int",
    1,
    4,
]
dropout = [
    "_tune_",
    "?uniform",
    0.0,
    0.0,
    0.5,
]

[sampler.space.optimizer]
lr = [
    "_tune_",
    "loguniform",
    0.0001,
    0.005,
]
weight_decay = [
    "_tune_",
    "loguniform",
    0.001,
    1.0,
]
muon_lr = [
    "_tune_",
    "loguniform",
    0.001,
    0.1,
]
