defaults:
  - _self_
  - model: esm2
  - data: default
  - trainer: default
  - mlflow: default
  - benchmark: test
  - optuna: default
  - tracking: test
  - peft: dora
  - embedder: null
  - generator: pygad
  - override hydra/job_logging: custom_logging
  - override hydra/launcher: slurm

benchmark_dir: ${oc.env:DATA_HOME, benchmarks}
experiment_name: ${oc.env:HAIPR_EXPERIMENT_NAME, HAIPR}
experiment_id: ${oc.env:HAIPR_EXPERIMENT_ID, 1}
# parent_run_name: ${oc.env:HAIPR_PARENT_RUN_NAME, TEST-${now:%H-%M-%S}}
benchmark_group: hgym #default, gets overwritten by benchmark=XY
parallel: false # set true for multiprocessing where all parallelization is handled by haipr. SET FALSE for SLURM launcher
use_predictor: true # TODO: remove after refactoring
run_sequential: false
# Global batch size configuration
# If set to a value > 0, this will override the model's batch_size and adjust accumulate_grad_batches accordingly
global_batch_size: null # null = use model's batch_size, >0 = use this as effective batch size
alphabet_per_position: ${oc.select:benchmark.inference_alphabet, train}
# Global random seed for reproducibility
seed: 42

# Task configuration
task: ${oc.select:model.task, regression}  # regression or classification
num_classes: ${oc.select:data.num_classes, 2}  # 0 for regression, >0 for classification

# Training batch size
batch_size: ${oc.select:model.batch_size, 4} # save for most models and 
inference_batch_size: 512

# Device configuration
devices: rotate # by default rotate gpus. for consecutive jobs
device: null # by default use all available gpus
hydra.verbose: true # get logger output in std.out

# Inference configuration
inference_mode: design
inference_ddp: True
models_from_parent_run: null
run_name: null
num_sequences: 1000000
# evaluators: ${oc.select:evaluators, ???} # model-ids from mlflow ["id_1", "id_2", ...]
max_sequences: 1e9 # 
max_mutations: 10
seq_prob_cutoff: 0.8
filter_by_perplexity: false
# in GB
embedding_cache_size: 10

# Parallelization settings for evaluators
parallel_evaluators: true  # Enable parallel evaluation of score evaluators
max_workers: null  # null = auto-detect, or specify number
parallel_backend: "thread"  # "thread" for PyTorch, "process" for sklearn

# Optimization configuration
mlflow.log_models: false

subsample_threshold: 0
parent_run_name: null
evaluation_trial_number: null # if not null, use this trial number for evaluation
run_cv: false # do not run all test_split_idx by default (since this can be managed by outer parallelization/slurm)

# Logging dirs
hydra:
  run:
    dir: ${oc.env:HAIPR_LOG_DIR/, logs/}/${oc.select:mlflow.experiment_name, TEST}/${oc.select:mlflow.parent_run_name, run}
  sweep:
    dir: ${oc.env:HAIPR_LOG_DIR/, logs/}/${oc.select:mlflow.experiment_name, TEST}/${oc.select:mlflow.parent_run_name, sweep}
  sweeper:
    params:
      benchmark: ${benchmarks_in_group:${benchmark_group}}
