program: run_benchmark.py
project: tjepa-sweeps
name: dcnv2_blog_embedded_sweep_v2
method: bayes
metric:
  goal: maximize
  name: blog_embedded_val_accuracy
parameters:
  model_name:
    value: "dcnv2"
  data_set:
    value: "blog_embedded"
  data_path:
    value: "./datasets"
  data_loader_nprocs:
    value: 4
  exp_cadence_type:
    value: "improvement"
  hidden_dim:
    min: 32
    max: 768
    distribution: int_uniform
  n_hidden_layers:
    min: 1
    max: 8
    distribution: int_uniform
  n_cross_layers:
    min: 1
    max: 10
    distribution: int_uniform
  hidden_dropout:
    min: 0.0
    max: 0.3
  cross_dropout:
    min: 0.0
    max: 0.3
  stacked:
    values: [True, False]
  d_embedding:
    min: 2
    max: 128
    distribution: int_uniform
  exp_train_total_epochs:
    value: 200
  batch_size:
    value: 512
  exp_warmup:
    value: 0
  lr:
    distribution: log_uniform_values
    min: 1e-5
    max: 1e-2
  final_lr:
    value: 0
  start_lr:
    value: 0
  weight_decay:
    min: 1e-7
    max: 1e-3
    distribution: log_uniform_values
  final_weight_decay:
    value: 0.0
  val_size_ratio:
    value: 0.1
  exp_cache_cadence:
    value: 20
  exp_patience:
    value: 16
  using_embedding:
    value: True
  eta_min:
    value: 0
  test_size_ratio:
    value: 0.1
  encoder_type:
    values:
      ["linear_flatten", "linear_per_feature", "conv", "max_pool", "mean_pool"]
