verifier_cfg:
  verifier_type : "all" # "reward_models" or "judges" or "all"
  verifier_size: "all" # "all", "small", "medium", "large" , int (verifiers less than or equal to this number)
  verifier_subset: # list of verifier names to use

data_cfg:
  # Options: "MATH-500", "AIMO", "MMLU-Pro", "GPQA","MMLU-College","AlpacaEval","BBH"
  dataset_name: "MATH-500-v2"
  # fraction of data used for training 
  train_split: 1.0 # 1.0 means no test set
  # <=1: fraction of total queries (determined by 'train_split') used for training
  # >1: number of  train queries used for training
  train_queries: 1 # fraction/number of total queries in train_split used for training
  train_samples: 1 # fraction of total samples in train_split used for training
  test_samples: 1 # fraction of total samples in test_split used for testing
  nan_replacement: 0 #"mean" # "mean" or 0
  random_seed: 0
  model_size: "70B"
  reward_threshold:  # null means no threshold
  normalize_type: "all_problems" # per_problem, all_problems
  normalize_method: "minmax" # minmax, quantile 
  normalize_params: ${normalize_method_params.${data_cfg.normalize_method}}
  closest_train_problem_method: "mean_verifier_distance" #"SBERT" "mean_verifier_distance" "cov_verifier_distance"
  closest_train_problem_metric_type: "euclidean" # "cosine" "euclidean", "frobenius"
  verifier_cfg: ${verifier_cfg}
  mv_as_verifier: true # true or false
  fixed_test_split:  # test set is fixed independently of train_split
  same_train_test: false
  train_split_bins: 10 # how to slice the data for train/test split
  shuffle_samples: true  # shuffle the samples in the train and test split
  drop_imbalanced_verifiers: # "all"
  max_num_independent_verifiers: # when using drop_imbalanced_verifiers


debug: false

model_cfg:
  model_type: "logistic_regression"  #  naive_bayes, logistic_regression
  model_class: "cluster" # per_problem, per_dataset, cluster
  model_params: ${model_params.${model_cfg.model_type}}
  cluster_cfg:
    n_clusters: 2 # number of clusters 
    cluster_type: "unique_extracted_answers" #"by_difficulty", "random", unique_extracted_answers
    preserve_ties: true # preserve ties between questions with same number of unique answers
    uniform_with_ties: false # whether to try to maintain uniform bucket sizes while preserving ties
    cluster_on_all: true 


fit_cfg:
  fit_type: "wclosest_to_train" # wclosest_to_train, search_weights


logging: "wandb"
wandb_cfg:
  project: "verification"
  entity: "${oc.env:WANDB_ENTITY}"

model_params:
  logistic_regression:
    random_state: 42
    max_iter: 1000
    penalty: "l2"
    class_weight: "balanced"
    solver: "newton-cholesky"

  naive_bayes:
    binarize_threshold: 0.5
    clip_min: 0.0 #0.01
    clip_max: 1.0 #0.99
    use_deps: "drop" #"drop" or "model"
    drop_imbalanced_verifiers: "all" # null, 'all', 'small', 'large'
    drop_k: 3 # number of verifiers to drop if using drop_imbalanced_verifiers

normalize_method_params:
  minmax:
    tmp: # does not take any parameters
  quantile:
    output_distribution: "uniform"
    n_quantiles: 100 # min number of quantiles
  winsorize: # map each observation to quantile by clipping extreme values
    lower_quantile: 0.0 #0.01
    upper_quantile: 1.0 #0.99
  quantile_minmax:
    output_distribution: "normal"
    n_quantiles: 100 # min number of quantiles

