#Example config: TabTransformer, downstream task, target 10, 10 samples per class, linear head atop a frozen feature extractor
seed = 4

[data]
cat_policy = "indices"
dset_id = "mimic"
normalization = "quantile"
task = "binclass"

[model]
attn_dropout = 0.4028431418904223
depth = 3
dim = 128
dim_head = 16
ff_dropout = 0.272424118769021
heads = 7
mlp_hidden_mults = [ 4, 2,]

[training]
batch_size = 256
eval_batch_size = 256
lr = 0.0001
lr_n_decays = 0
n_epochs = 200
num_batch_warm_up = 0
optimizer = "adamw"
patience = 100000.0
weight_decay = 1.980341133875712e-5

[transfer]
checkpoint_path = "YOUR_PATH_HERE/checkpoint.pt"
downstream_samples_per_class = 10
epochs_warm_up_head = 0
freeze_feature_extractor = true
head_lr = 0.0001
layers_to_fine_tune = [ "head",]
load_checkpoint = true
pretrain_proportion = 10 #this argument specifies the downstream MIMIC target (0-11)
stage = "downstream"
use_mlp_head = false
pretrain_subsample = false
