{
  "artifact": "prm_field_diagnostic_summary",
  "status": "non_headline_appendix_diagnostic",
  "purpose": "Sanitized aggregate summary for a related diagnostic comparing scalar process-score features with structural typed-field features.",
  "task": "classify locally verified typed-field traces against operation-shuffled negatives",
  "n_pos": 300,
  "n_neg": 298,
  "n_total": 598,
  "n_unique_problems": 107,
  "cross_validation": "problem-disjoint 5-fold",
  "prm_model": "Qwen/Qwen2.5-Math-PRM-7B",
  "single_feature_auc": {
    "prm_mean": 0.5099608501118569,
    "prm_min": 0.5213870246085011,
    "prm_last": 0.5204809843400447
  },
  "cv5_auc_mean": {
    "prm_4features": 0.5183479284369115,
    "structural_field_12features": 0.9454566854990585,
    "combined_16features": 0.9452669491525423
  },
  "cv5_auc_sd": {
    "prm_4features": 0.01417996845752588,
    "structural_field_12features": 0.025286483366073184,
    "combined_16features": 0.028581622136555816
  },
  "cv5_auc_per_fold": {
    "prm_4features": [
      0.5356944444444445,
      0.5175000000000001,
      0.5258333333333334,
      0.519774011299435,
      0.4929378531073446
    ],
    "structural_field_12features": [
      0.9384722222222222,
      0.9676388888888888,
      0.9175,
      0.9218926553672316,
      0.9817796610169492
    ],
    "combined_16features": [
      0.9454166666666667,
      0.9777777777777777,
      0.9147222222222222,
      0.9118644067796611,
      0.9765536723163841
    ]
  },
  "delta_structural_minus_prm_cv5": 0.427108757062147,
  "interpretation_boundary": "This is a separate classifier diagnostic, not a final-answer benchmark or proof-validity result.",
  "sanitization": "Raw per-row records, problem statements, gold answers, prompts, and local paths are excluded from the supplement.",
  "source_sha256": "e023f82a4ce8336185f29f9ddba7af11da02083de0a9b5f7ce445105d60fabed"
}
