dim_model: 512
dim_embedding: 100
dim_output: 10
n_layers: 12
n_heads: 4
y_as_float_embedding: True   # True for the TabPFN strange embedding
quantile_embedding_gpu: True
feature_count_scaling_gpu: True
n_classes: 10

max_samples_support: 8192
max_samples_query: 1024

max_epochs: 300
optimizer: adamw
lr:
  distribution: log_uniform_values
  min: 1.e-6
  max: 1.e-4
  default: 1.e-5
weight_decay: 0

lr_scheduler:
  values: [True, False]
  default: False
lr_scheduler_patience: 25
warmup_steps:
  values: [0, 10]
  default: 0

early_stopping_patience: 40
early_stopping_data_split: VALID
early_stopping_max_samples: 2048

precision: bfloat16
grad_scaler_enabled: False
grad_scaler_scale_init: 65536.
grad_scaler_scale_min: 65536.
grad_scaler_growth_interval: 1000

label_smoothing: 0.0

use_pretrained_weights: False
path_to_weights: outputs/2024-03-23/19-06-37/weights/model_step_20000.pt

use_quantile_transformer: False
use_feature_count_scaling: False
shuffle_classes: True
shuffle_features: True
random_mirror_x: True
random_mirror_regression: True