program: run_benchmark.py
project: tjepa-sweeps
name: saint_helena_sweep_v1
method: bayes
metric:
  goal: maximize
  name: helena_val_accuracy
parameters:
  model_name:
    value: "saint"
  data_set:
    value: "helena"
  data_path:
    value: "./datasets"
  data_loader_nprocs:
    value: 16
  exp_cadence_type:
    value: "improvement"
  dim:
    values: [2, 4, 8, 16, 32, 64, 128]
  depth:
    values: [1, 2, 4, 6, 8an]
  heads:
    values: [2, 4, 8, 16]
  dim_head:
    values: [2, 4, 8, 16, 32, 64]
  num_special_tokens:
    value: 0
  attn_dropout:
    min: 0.0
    max: 0.3
  ff_dropout:
    min: 0.0
    max: 0.3
  cont_embeddings:
    value: "MLP"
  attentiontype:
    value: "colrow"
  final_mlp_style:
    value: "sep"
  exp_train_total_epochs:
    value: 200
  batch_size:
    value: 512
  exp_warmup:
    value: 10
  lr:
    min: 0.00001
    max: 0.001
  final_lr:
    value: 0
  start_lr:
    value: 0
  weight_decay:
    min: 0.0
    max: 0.001
  final_weight_decay:
    value: 0.0
  val_size_ratio:
    value: 0.1
  exp_cache_cadence:
    value: 20
  exp_patience:
    value: 15
  using_embedding:
    value: False
  eta_min:
    value: 0
  test_size_ratio:
    value: 0.1
