command:
  - ${env}
  - python
  - ${program}
  - "--config-name"
  - "supervised"
  - ${args_no_hyphens}

method: grid
parameters:
  data_cfg.dataset_name:
    values:
      - "MATH-500-v2"
      - "AIMO-v2"
      - "GPQA-v2"
      - "MMLU-College-v2"
      - "MMLU-Pro-v2"
      - "BBH-v2"
      - "ArenaHard"
      - "AlpacaEval"
  # ablation over policy model size: Llama 8B and 70B
  data_cfg.model_size: 
    values:
      - "8B"
      #- "70B"
  # ablation over train split (just use all data)
  data_cfg.train_split:
    values:
      - 1.0
      - 0.8
  data_cfg.train_samples:
    values:
      - 100
      - 64
      - 32
      - 16
      - 4
  data_cfg.random_seed:
    values:
      - 0
      - 1 
      - 2 
  # ablation whether MV is a verifier feature
  data_cfg.mv_as_verifier:
    values:
      - true
  # ablation to whether we use small or large verifiers:
  verifier_cfg.verifier_size:
    values: [8]
  model_cfg.model_type:
    values:
      - "logistic_regression"
  model_cfg.model_class:
    values:
      - "per_problem"
      - "per_dataset"

program: ../../scripts/supervised_analysis/run.py
name: coverage_ablation_lr_per_problem
project: "verification"
