command:
  - ${env}
  - python
  - ${program}
  - "--config-name"
  - "supervised_bert"
  - ${args_no_hyphens}

method: grid
parameters:
  data_cfg.dataset_name:
    values:
      - "MATH-500-v2"
      - "AIMO-v2"
      - "GPQA-v2"
      - "MMLU-College-v2"
      - "MMLU-Pro-v2"
      - "BBH-v2"
      - "ArenaHard"
      - "AlpacaEval"
  # ablation over policy model size: Llama 8B and 70B
  data_cfg.model_size: 
    values:
      - "8B"
      #- "70B"
  # ablation over train split (just use all data)
  data_cfg.train_split:
    values:
      - 1.0
      # - 0.8
  data_cfg.train_samples:
    values:
      - 100
      - 64
      - 32
      - 20
      - 16
  data_cfg.random_seed:
    values:
      - 0
      - 1 
      - 2 
  # ablation whether MV is a verifier feature
  data_cfg.mv_as_verifier:
    values:
      - false
  # ablation to whether we use small or large verifiers:
  verifier_cfg.verifier_size:
    values: [8, 80]
  model_cfg.model_type:
    values:
      - "bert_classifier"
  fit_cfg.lr:
    values:
      - 0.0005
      - 0.0001
  fit_cfg.early_stopping:
    values:
      - true

program: ../../scripts/supervised_analysis/run2.py
name: coverage_ablation_bert_per_problem
project: "verification"
