command:
  - ${env}
  - python
  - ${program}
  - "--config-name"
  - "supervised_bert"
  - ${args_no_hyphens}

method: grid
parameters:
  # ablation over dataset
  data_cfg.dataset_name:
    values:
      - "AIMO-v2"
      - "GPQA-v2"
      - "MATH-500-v2"
      - "MMLU-College-v2"
      - "MMLU-Pro-v2"
      - "BBH-v2"
      - "ArenaHard"
      - "AlpacaEval"
  # ablation over policy model size: Llama 8B and 70B
  data_cfg.model_size: 
    values:
      - "8B"
      - "70B"
  # ablation over train split (just use all data)
  data_cfg.train_split:
    values:
      - 0.8
  data_cfg.random_seed:
    values:
      - 0
      - 1 
      - 2 
  # ablation to whether we use small or large verifiers:
  verifier_cfg.verifier_size:
    values: [8, 80]
  data_cfg.sampler_hybrid_alpha:
    values:
      - 1.0
      - 0.0
      - 0.5
  loss_params.bce.pos_weight:
    values:
      - 1.0
      - 10.0
      - 0.5

program: ../../scripts/supervised_analysis/run2.py
name: supervised_all_data_w_bert_embed
project: "verification"
