command:
  - ${env}
  - python
  - ${program}
  - "--config-name"
  - "supervised"
  - ${args_no_hyphens}

method: grid
parameters:
  # ablation over dataset
  data_cfg.dataset_name:
    values:
      - "MATH-500"
      - "AIMO"
      - "MMLU-Pro"
      - "GPQA"
      - "MMLU-College"
      - "BBH"
      - "ArenaHard"
      - "AlpacaEval"
  # ablation over policy model size: Llama 8B and 70B
  data_cfg.model_size: 
    values:
      - "8B"
  # ablation over train split (just use all data)
  data_cfg.train_split:
    values:
      - 1.0
  data_cfg.random_seed:
    values:
      - 0
  # ablation whether MV is a verifier feature
  data_cfg.mv_as_verifier:
    values:
      - true
  # ablation over fit type: we search the best weights for the test set of use the closest weights to the train set:
  fit_cfg.fit_type:
    values:
      - "wclosest_to_train"
  # ablation to whether we use small or large verifiers:
  verifier_cfg.verifier_size:
    values: [8]
  data_cfg.normalize_type:
    values:
      - "per_problem"
  data_cfg.normalize_method:
    values:
      - "winsorize"
  model_cfg.model_type:
    values:
      - "logistic_regression"
  model_cfg.model_class:
    values:
      - "per_dataset"
  normalize_method_params.winsorize.lower_quantile:
    values:
      - 0.05
      - 0.01
  normalize_method_params.winsorize.upper_quantile:
    values:
      - 0.95
      - 0.99


program: ../../scripts/supervised_analysis/run.py
name: supervised_all_data
project: "verification"

