{
  "aggregate": {
    "baselines": {
      "always_cma": {
        "confusion": {
          "accuracy": 0.6311111111111111,
          "n_non_ties": 450,
          "pred_cma_label_cma": 284,
          "pred_cma_label_berw": 166,
          "pred_berw_label_cma": 0,
          "pred_berw_label_berw": 0,
          "pred_berw_rate": 0.0
        },
        "regret": {
          "mean": 0.09225604346835509,
          "median": 0.0,
          "q90": 0.28614881689556854
        }
      },
      "always_berw": {
        "confusion": {
          "accuracy": 0.3688888888888889,
          "n_non_ties": 450,
          "pred_cma_label_cma": 0,
          "pred_cma_label_berw": 0,
          "pred_berw_label_cma": 284,
          "pred_berw_label_berw": 166,
          "pred_berw_rate": 1.0
        },
        "regret": {
          "mean": 0.6404413971596298,
          "median": 0.03869670932211533,
          "q90": 2.5423083539765896
        }
      }
    },
    "cv": {
      "confusion": {
        "accuracy": 0.5733333333333334,
        "n_non_ties": 450,
        "pred_cma_label_cma": 162,
        "pred_cma_label_berw": 70,
        "pred_berw_label_cma": 122,
        "pred_berw_label_berw": 96,
        "pred_berw_rate": 0.48444444444444446
      },
      "regret": {
        "mean": 0.10908026287842792,
        "median": 0.0,
        "q90": 0.38415408647437094
      }
    },
    "fixed_threshold": {
      "confusion": {
        "accuracy": 0.5888888888888889,
        "n": 450,
        "pred_cma_label_cma": 147,
        "pred_cma_label_berw": 48,
        "pred_berw_label_cma": 137,
        "pred_berw_label_berw": 118,
        "pred_berw_rate": 0.5666666666666667
      },
      "regret": {
        "mean": 0.10355946394168393,
        "median": 0.0,
        "q90": 0.3482856306381626
      },
      "threshold": 0.12
    },
    "thresholds": {
      "max": 0.3055555555555555,
      "mean": 0.20555555555555555,
      "median": 0.18055555555555555,
      "min": 0.15277777777777776,
      "std": 0.05299662230094142
    }
  },
  "folds": [
    {
      "fold": 0,
      "test_groups": [
        1,
        6,
        11
      ],
      "train_groups": [
        2,
        3,
        4,
        5,
        7,
        8,
        9,
        10,
        12,
        13,
        14,
        15
      ]
    },
    {
      "fold": 1,
      "test_groups": [
        2,
        7,
        12
      ],
      "train_groups": [
        1,
        3,
        4,
        5,
        6,
        8,
        9,
        10,
        11,
        13,
        14,
        15
      ]
    },
    {
      "fold": 2,
      "test_groups": [
        3,
        8,
        13
      ],
      "train_groups": [
        1,
        2,
        4,
        5,
        6,
        7,
        9,
        10,
        11,
        12,
        14,
        15
      ]
    },
    {
      "fold": 3,
      "test_groups": [
        4,
        9,
        14
      ],
      "train_groups": [
        1,
        2,
        3,
        5,
        6,
        7,
        8,
        10,
        11,
        12,
        13,
        15
      ]
    },
    {
      "fold": 4,
      "test_groups": [
        5,
        10,
        15
      ],
      "train_groups": [
        1,
        2,
        3,
        4,
        6,
        7,
        8,
        9,
        11,
        12,
        13,
        14
      ]
    }
  ],
  "group_by": "instance",
  "input": "evidence/bbob_noisy_probe_decision_accuracy_noisefree_i1-15_B200_d20/decision_points.csv",
  "k": 5,
  "loss": {
    "eps": 1e-12,
    "name": "log10"
  },
  "per_fold": [
    {
      "fold": 0,
      "group_by": "instance",
      "selected_threshold": 0.18055555555555555,
      "selection_criterion": "minimize train_regret_mean, tie-break by smaller threshold",
      "test_accuracy": 0.5666666666666667,
      "test_groups": [
        1,
        6,
        11
      ],
      "test_regret_mean": 0.0930225491647936,
      "test_regret_median": 0.0,
      "test_regret_q90": 0.3876198013357014,
      "train_accuracy": 0.6027777777777777,
      "train_groups": [
        2,
        3,
        4,
        5,
        7,
        8,
        9,
        10,
        12,
        13,
        14,
        15
      ],
      "train_label_berw_rate": 0.38333333333333336,
      "train_regret_mean": 0.08517837633512775
    },
    {
      "fold": 1,
      "group_by": "instance",
      "selected_threshold": 0.18055555555555555,
      "selection_criterion": "minimize train_regret_mean, tie-break by smaller threshold",
      "test_accuracy": 0.6,
      "test_groups": [
        2,
        7,
        12
      ],
      "test_regret_mean": 0.07868432365547663,
      "test_regret_median": 0.0,
      "test_regret_q90": 0.2129154529199674,
      "train_accuracy": 0.5944444444444444,
      "train_groups": [
        1,
        3,
        4,
        5,
        6,
        8,
        9,
        10,
        11,
        13,
        14,
        15
      ],
      "train_label_berw_rate": 0.38055555555555554,
      "train_regret_mean": 0.08876293271245699
    },
    {
      "fold": 2,
      "group_by": "instance",
      "selected_threshold": 0.3055555555555555,
      "selection_criterion": "minimize train_regret_mean, tie-break by smaller threshold",
      "test_accuracy": 0.5,
      "test_groups": [
        3,
        8,
        13
      ],
      "test_regret_mean": 0.1277675542404004,
      "test_regret_median": 0.002915573715001729,
      "test_regret_q90": 0.4035911469586971,
      "train_accuracy": 0.6111111111111112,
      "train_groups": [
        1,
        2,
        4,
        5,
        6,
        7,
        9,
        10,
        11,
        12,
        14,
        15
      ],
      "train_label_berw_rate": 0.34444444444444444,
      "train_regret_mean": 0.08673327559825417
    },
    {
      "fold": 3,
      "group_by": "instance",
      "selected_threshold": 0.15277777777777776,
      "selection_criterion": "minimize train_regret_mean, tie-break by smaller threshold",
      "test_accuracy": 0.6333333333333333,
      "test_groups": [
        4,
        9,
        14
      ],
      "test_regret_mean": 0.13659403809917262,
      "test_regret_median": 0.0,
      "test_regret_q90": 0.35201773869398667,
      "train_accuracy": 0.5916666666666667,
      "train_groups": [
        1,
        2,
        3,
        5,
        6,
        7,
        8,
        10,
        11,
        12,
        13,
        15
      ],
      "train_label_berw_rate": 0.35555555555555557,
      "train_regret_mean": 0.0841070627042575
    },
    {
      "fold": 4,
      "group_by": "instance",
      "selected_threshold": 0.20833333333333334,
      "selection_criterion": "minimize train_regret_mean, tie-break by smaller threshold",
      "test_accuracy": 0.5666666666666667,
      "test_groups": [
        5,
        10,
        15
      ],
      "test_regret_mean": 0.10933284923229633,
      "test_regret_median": 0.0,
      "test_regret_q90": 0.373893184481738,
      "train_accuracy": 0.6055555555555555,
      "train_groups": [
        1,
        2,
        3,
        4,
        6,
        7,
        8,
        9,
        11,
        12,
        13,
        14
      ],
      "train_label_berw_rate": 0.38055555555555554,
      "train_regret_mean": 0.08351500586426842
    }
  ],
  "probe_key": "misranking_rd",
  "selection": "regret_mean_then_threshold"
}