command:
  - ${env}
  - python
  - ${program}
  - "--config-name"
  - "weak_supervision_clustering"
  - ${args_no_hyphens}

method: grid
parameters:
  # ablation over dataset
  data_cfg.dataset_name:
    values:
      - "MATH-500-v2"
      - "AIMO-v2"
      - "MMLU-Pro-v2"
      - "GPQA-v2"
      - "MMLU-College-v2"
      - "BBH-v2"
      - "ArenaHard"
      - "AlpacaEval"
  # ablation over policy model size: Llama 8B and 70B
  data_cfg.model_size: 
    values:
      - "8B"
      - "70B"
  # ablation over train split (just use all data)
  data_cfg.mv_as_verifier:
    values:
      - true
      - false
  model_params.weak_supervision.use_deps:
    values:
      - "none"
      - "drop"
  model_params.weak_supervision.n_epochs:
    values:
      - 5000 
  model_params.weak_supervision.mu_epochs:
    values:
      - 10000
  model_params.weak_supervision.lr:
    values:
      - 0.0001
  model_params.weak_supervision.drop_imbalanced_verifiers:
    values:
      - "all"
      - "adaptive"
  model_cfg.model_class:
    values:
      - "cluster"
  model_cfg.cluster_cfg.n_clusters:
    values:
      - 1
      - 2
      - 3
      - 4
      - 5
      - 6
      - 7
      - 8
      - 9
      - 10
  model_cfg.cluster_cfg.cluster_type:
    values:
      - "by_difficulty"
      - "random"

    
program: ../scripts/supervised_analysis/run.py
name: ws_cluster_random_or_by_difficulty
project: "verification"
entity: "anonymous-research"

