{
  "number_of_domains": 4,
  "total_queries_across_domains": 160,
  "queries_per_domain": {
    "mean": 40,
    "median": 40.0,
    "values": [
      40,
      40,
      40,
      40
    ]
  },
  "averaged_scores": {
    "user_profile_accuracy": {
      "mean": 0.45090974388623917,
      "median": 0.4227547837346126,
      "std_dev": 0.07680451041557226,
      "min": 0.3967689348689542,
      "max": 0.5613604732067772,
      "count": 4
    },
    "intent_capture_accuracy": {
      "mean": 0.49,
      "median": 0.485,
      "std_dev": 0.025495097567963945,
      "min": 0.465,
      "max": 0.525,
      "count": 4
    },
    "intent_macro_f1": {
      "mean": 0.49,
      "median": 0.485,
      "std_dev": 0.025495097567963945,
      "min": 0.465,
      "max": 0.525,
      "count": 4
    },
    "context_retrieval_accuracy": {
      "mean": 0.1238478619642568,
      "median": 0.11544159997134829,
      "std_dev": 0.03872548978699328,
      "min": 0.08865352365352366,
      "max": 0.175854724260807,
      "count": 4
    },
    "citation_accuracy": {
      "mean": 0.12359444560413525,
      "median": 0.12240724723090285,
      "std_dev": 0.02315025519180603,
      "min": 0.09928579497987394,
      "max": 0.15027749297486137,
      "count": 4
    },
    "document_quality_score": {
      "mean": 4.5375,
      "median": 4.517875,
      "std_dev": 0.10730156103244708,
      "min": 4.432499999999998,
      "max": 4.681749999999998,
      "count": 4
    },
    "overall_score": {
      "mean": 1.1451704102909264,
      "median": 1.144611564489375,
      "std_dev": 0.023689662444819938,
      "min": 1.1167601165963679,
      "max": 1.1746983955885875,
      "count": 4
    }
  },
  "context_retrieval_detailed_metrics": {
    "precision": {
      "mean": 0.15578914234287836,
      "median": 0.0,
      "std_dev": 0.25445972107093257,
      "min": 0.0,
      "max": 1.0,
      "count": 160
    },
    "recall": {
      "mean": 0.11394315470344601,
      "median": 0.0,
      "std_dev": 0.17994474754628906,
      "min": 0.0,
      "max": 0.8,
      "count": 160
    },
    "f1_score": {
      "mean": 0.12384786196425679,
      "median": 0.0,
      "std_dev": 0.19331203205901765,
      "min": 0.0,
      "max": 0.888888888888889,
      "count": 160
    }
  },
  "intent_detailed_averages": {
    "per_field_precision": {
      "target_audience": {
        "mean": 0.61875,
        "median": 0.6375,
        "std_dev": 0.04732423621500227,
        "min": 0.55,
        "max": 0.65,
        "count": 4
      },
      "temporal_scope": {
        "mean": 0.35,
        "median": 0.3625,
        "std_dev": 0.05400617248673217,
        "min": 0.275,
        "max": 0.4,
        "count": 4
      },
      "detail_level": {
        "mean": 0.42500000000000004,
        "median": 0.4125,
        "std_dev": 0.09789450103725611,
        "min": 0.325,
        "max": 0.55,
        "count": 4
      },
      "tone_preference": {
        "mean": 0.11875,
        "median": 0.1125,
        "std_dev": 0.042695628191498324,
        "min": 0.075,
        "max": 0.175,
        "count": 4
      },
      "document_type": {
        "mean": 0.9375,
        "median": 0.95,
        "std_dev": 0.024999999999999967,
        "min": 0.9,
        "max": 0.95,
        "count": 4
      }
    },
    "per_field_recall": {
      "target_audience": {
        "mean": 0.61875,
        "median": 0.6375,
        "std_dev": 0.04732423621500227,
        "min": 0.55,
        "max": 0.65,
        "count": 4
      },
      "temporal_scope": {
        "mean": 0.35,
        "median": 0.3625,
        "std_dev": 0.05400617248673217,
        "min": 0.275,
        "max": 0.4,
        "count": 4
      },
      "detail_level": {
        "mean": 0.42500000000000004,
        "median": 0.4125,
        "std_dev": 0.09789450103725611,
        "min": 0.325,
        "max": 0.55,
        "count": 4
      },
      "tone_preference": {
        "mean": 0.11875,
        "median": 0.1125,
        "std_dev": 0.042695628191498324,
        "min": 0.075,
        "max": 0.175,
        "count": 4
      },
      "document_type": {
        "mean": 0.9375,
        "median": 0.95,
        "std_dev": 0.024999999999999967,
        "min": 0.9,
        "max": 0.95,
        "count": 4
      }
    },
    "per_field_f1": {
      "target_audience": {
        "mean": 0.61875,
        "median": 0.6375,
        "std_dev": 0.04732423621500227,
        "min": 0.55,
        "max": 0.65,
        "count": 4
      },
      "temporal_scope": {
        "mean": 0.35,
        "median": 0.3625,
        "std_dev": 0.05400617248673217,
        "min": 0.275,
        "max": 0.4,
        "count": 4
      },
      "detail_level": {
        "mean": 0.42500000000000004,
        "median": 0.4125,
        "std_dev": 0.09789450103725611,
        "min": 0.325,
        "max": 0.55,
        "count": 4
      },
      "tone_preference": {
        "mean": 0.11875,
        "median": 0.1125,
        "std_dev": 0.042695628191498324,
        "min": 0.075,
        "max": 0.175,
        "count": 4
      },
      "document_type": {
        "mean": 0.9375,
        "median": 0.95,
        "std_dev": 0.024999999999999967,
        "min": 0.9,
        "max": 0.95,
        "count": 4
      }
    }
  },
  "quality_dimensions_averages": {
    "personalization_fidelity": {
      "mean": 4.415094339622642,
      "median": 5.0,
      "std_dev": 1.080915498005518,
      "min": 0.0,
      "max": 5.0,
      "count": 159
    },
    "factuality": {
      "mean": 4.2,
      "median": 4.0,
      "std_dev": 0.9097653301675344,
      "min": 0.0,
      "max": 5.0,
      "count": 160
    },
    "citation_quality": {
      "mean": 4.207539877300613,
      "median": 4.0,
      "std_dev": 0.794497568118156,
      "min": 2.0,
      "max": 5.0,
      "count": 163
    },
    "fluency": {
      "mean": 4.925,
      "median": 5.0,
      "std_dev": 0.4422640436173051,
      "min": 0.0,
      "max": 5.0,
      "count": 160
    },
    "structure": {
      "mean": 4.8875,
      "median": 5.0,
      "std_dev": 0.4887089911055672,
      "min": 0.0,
      "max": 5.0,
      "count": 160
    },
    "temporal_task_accuracy": {
      "mean": 4.647798742138365,
      "median": 5.0,
      "std_dev": 0.6948122848206185,
      "min": 1.0,
      "max": 5.0,
      "count": 159
    },
    "overall_score": {
      "mean": 4.5375,
      "median": 4.7,
      "std_dev": 0.5908548332393195,
      "min": 0.0,
      "max": 5.0,
      "count": 160
    },
    "temporal_accuracy": {
      "mean": 0.0,
      "median": 0.0,
      "std_dev": 0.0,
      "min": 0.0,
      "max": 0.0,
      "count": 1
    },
    "task_accuracy": {
      "mean": 0.0,
      "median": 0.0,
      "std_dev": 0.0,
      "min": 0.0,
      "max": 0.0,
      "count": 1
    }
  },
  "aggregated_score_distribution": {
    "excellent": 0,
    "good": 0,
    "fair": 0,
    "poor": 160
  },
  "overall_distribution_percentage": {
    "excellent": 0.0,
    "good": 0.0,
    "fair": 0.0,
    "poor": 100.0
  },
  "per_domain_results": {
    "o4_mini_benchmark_results_finance": {
      "total_queries": 40,
      "average_scores": {
        "user_profile_accuracy": 0.3967689348689542,
        "intent_capture_accuracy": 0.49000000000000005,
        "intent_macro_f1": 0.49000000000000005,
        "context_retrieval_accuracy": 0.129834393676499,
        "citation_accuracy": 0.1346972544363849,
        "document_quality_score": 4.432499999999998,
        "overall_score": 1.1167601165963679
      },
      "context_retrieval_metrics": {
        "precision": {
          "mean": 0.17545077838827838,
          "count": 40
        },
        "recall": {
          "mean": 0.11719160689671526,
          "count": 40
        },
        "f1_score": {
          "mean": 0.12983439367649893,
          "count": 40
        }
      },
      "intent_detailed_metrics": {
        "per_field_precision": {
          "target_audience": 0.65,
          "temporal_scope": 0.35,
          "detail_level": 0.325,
          "tone_preference": 0.175,
          "document_type": 0.95
        },
        "per_field_recall": {
          "target_audience": 0.65,
          "temporal_scope": 0.35,
          "detail_level": 0.325,
          "tone_preference": 0.175,
          "document_type": 0.95
        },
        "per_field_f1": {
          "target_audience": 0.65,
          "temporal_scope": 0.35,
          "detail_level": 0.325,
          "tone_preference": 0.175,
          "document_type": 0.95
        },
        "average_macro_f1": 0.49000000000000005,
        "evaluated_fields": [
          "target_audience",
          "temporal_scope",
          "detail_level",
          "tone_preference",
          "document_type"
        ]
      },
      "score_distribution": {
        "excellent": 0,
        "good": 0,
        "fair": 0,
        "poor": 40
      },
      "quality_dimensions": {
        "personalization_fidelity": {
          "mean": 4.225,
          "median": 5.0,
          "std_dev": 1.1432634351063984,
          "min": 2.0,
          "max": 5.0,
          "count": 40
        },
        "factuality": {
          "mean": 4.05,
          "median": 4.0,
          "std_dev": 0.9044051735526052,
          "min": 2.0,
          "max": 5.0,
          "count": 40
        },
        "citation_quality": {
          "mean": 3.95,
          "median": 4.0,
          "std_dev": 0.8458041235325207,
          "min": 2.0,
          "max": 5.0,
          "count": 40
        },
        "fluency": {
          "mean": 4.95,
          "median": 5.0,
          "std_dev": 0.22072142786315224,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "structure": {
          "mean": 4.85,
          "median": 5.0,
          "std_dev": 0.4266746794119403,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "temporal_task_accuracy": {
          "mean": 4.6,
          "median": 5.0,
          "std_dev": 0.5453768398418634,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "overall_score": {
          "mean": 4.4325,
          "median": 4.6,
          "std_dev": 0.48165712882048134,
          "min": 3.3,
          "max": 5.0,
          "count": 40
        },
        "temporal_accuracy": {
          "mean": 0.0,
          "median": 0.0,
          "std_dev": 0.0,
          "min": 0.0,
          "max": 0.0,
          "count": 1
        },
        "task_accuracy": {
          "mean": 0.0,
          "median": 0.0,
          "std_dev": 0.0,
          "min": 0.0,
          "max": 0.0,
          "count": 1
        }
      }
    },
    "o4_mini_benchmark_results_healthcare": {
      "total_queries": 40,
      "average_scores": {
        "user_profile_accuracy": 0.4006097607072707,
        "intent_capture_accuracy": 0.465,
        "intent_macro_f1": 0.465,
        "context_retrieval_accuracy": 0.175854724260807,
        "citation_accuracy": 0.15027749297486137,
        "document_quality_score": 4.681749999999998,
        "overall_score": 1.1746983955885875
      },
      "context_retrieval_metrics": {
        "precision": {
          "mean": 0.22073942401686808,
          "count": 40
        },
        "recall": {
          "mean": 0.1651177637628094,
          "count": 40
        },
        "f1_score": {
          "mean": 0.175854724260807,
          "count": 40
        }
      },
      "intent_detailed_metrics": {
        "per_field_precision": {
          "tone_preference": 0.1,
          "target_audience": 0.55,
          "temporal_scope": 0.4,
          "document_type": 0.9,
          "detail_level": 0.375
        },
        "per_field_recall": {
          "tone_preference": 0.1,
          "target_audience": 0.55,
          "temporal_scope": 0.4,
          "document_type": 0.9,
          "detail_level": 0.375
        },
        "per_field_f1": {
          "tone_preference": 0.1,
          "target_audience": 0.55,
          "temporal_scope": 0.4,
          "document_type": 0.9,
          "detail_level": 0.375
        },
        "average_macro_f1": 0.465,
        "evaluated_fields": [
          "tone_preference",
          "target_audience",
          "temporal_scope",
          "document_type",
          "detail_level"
        ]
      },
      "score_distribution": {
        "excellent": 0,
        "good": 0,
        "fair": 0,
        "poor": 40
      },
      "quality_dimensions": {
        "personalization_fidelity": {
          "mean": 4.6,
          "median": 5.0,
          "std_dev": 0.7778998683173851,
          "min": 2.0,
          "max": 5.0,
          "count": 40
        },
        "factuality": {
          "mean": 4.425,
          "median": 5.0,
          "std_dev": 0.7472170590486631,
          "min": 2.0,
          "max": 5.0,
          "count": 40
        },
        "citation_quality": {
          "mean": 4.35,
          "median": 5.0,
          "std_dev": 0.8335897041541449,
          "min": 2.0,
          "max": 5.0,
          "count": 40
        },
        "fluency": {
          "mean": 4.95,
          "median": 5.0,
          "std_dev": 0.22072142786315224,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "structure": {
          "mean": 4.975,
          "median": 5.0,
          "std_dev": 0.15811388300841897,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "temporal_task_accuracy": {
          "mean": 4.725,
          "median": 5.0,
          "std_dev": 0.5986094998689324,
          "min": 2.0,
          "max": 5.0,
          "count": 40
        },
        "overall_score": {
          "mean": 4.68175,
          "median": 4.75,
          "std_dev": 0.40565165996303926,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        }
      }
    },
    "o4_mini_benchmark_results_manufacturing": {
      "total_queries": 40,
      "average_scores": {
        "user_profile_accuracy": 0.5613604732067772,
        "intent_capture_accuracy": 0.47999999999999987,
        "intent_macro_f1": 0.47999999999999987,
        "context_retrieval_accuracy": 0.08865352365352366,
        "citation_accuracy": 0.09928579497987394,
        "document_quality_score": 4.48675,
        "overall_score": 1.1432099583680355
      },
      "context_retrieval_metrics": {
        "precision": {
          "mean": 0.10153679653679654,
          "count": 40
        },
        "recall": {
          "mean": 0.08463827682928807,
          "count": 40
        },
        "f1_score": {
          "mean": 0.08865352365352365,
          "count": 40
        }
      },
      "intent_detailed_metrics": {
        "per_field_precision": {
          "target_audience": 0.65,
          "temporal_scope": 0.275,
          "tone_preference": 0.075,
          "document_type": 0.95,
          "detail_level": 0.45
        },
        "per_field_recall": {
          "target_audience": 0.65,
          "temporal_scope": 0.275,
          "tone_preference": 0.075,
          "document_type": 0.95,
          "detail_level": 0.45
        },
        "per_field_f1": {
          "target_audience": 0.65,
          "temporal_scope": 0.275,
          "tone_preference": 0.075,
          "document_type": 0.95,
          "detail_level": 0.45
        },
        "average_macro_f1": 0.47999999999999987,
        "evaluated_fields": [
          "target_audience",
          "temporal_scope",
          "tone_preference",
          "document_type",
          "detail_level"
        ]
      },
      "score_distribution": {
        "excellent": 0,
        "good": 0,
        "fair": 0,
        "poor": 40
      },
      "quality_dimensions": {
        "personalization_fidelity": {
          "mean": 4.525,
          "median": 5.0,
          "std_dev": 1.061868213369297,
          "min": 0.0,
          "max": 5.0,
          "count": 40
        },
        "factuality": {
          "mean": 4.025,
          "median": 4.0,
          "std_dev": 1.1432634351063984,
          "min": 0.0,
          "max": 5.0,
          "count": 40
        },
        "citation_quality": {
          "mean": 4.2,
          "median": 4.0,
          "std_dev": 0.8227533512074423,
          "min": 2.0,
          "max": 5.0,
          "count": 40
        },
        "fluency": {
          "mean": 4.875,
          "median": 5.0,
          "std_dev": 0.7905694150420949,
          "min": 0.0,
          "max": 5.0,
          "count": 40
        },
        "structure": {
          "mean": 4.825,
          "median": 5.0,
          "std_dev": 0.8129559864454139,
          "min": 0.0,
          "max": 5.0,
          "count": 40
        },
        "temporal_task_accuracy": {
          "mean": 4.775,
          "median": 5.0,
          "std_dev": 0.47971679679598994,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "overall_score": {
          "mean": 4.48675,
          "median": 4.7,
          "std_dev": 0.8174330914358429,
          "min": 0.0,
          "max": 5.0,
          "count": 40
        }
      }
    },
    "o4_mini_benchmark_results_technology": {
      "total_queries": 40,
      "average_scores": {
        "user_profile_accuracy": 0.4448998067619545,
        "intent_capture_accuracy": 0.525,
        "intent_macro_f1": 0.525,
        "context_retrieval_accuracy": 0.10104880626619757,
        "citation_accuracy": 0.11011724002542081,
        "document_quality_score": 4.549,
        "overall_score": 1.1460131706107148
      },
      "context_retrieval_metrics": {
        "precision": {
          "mean": 0.12542957042957043,
          "count": 40
        },
        "recall": {
          "mean": 0.08882497132497132,
          "count": 40
        },
        "f1_score": {
          "mean": 0.10104880626619757,
          "count": 40
        }
      },
      "intent_detailed_metrics": {
        "per_field_precision": {
          "target_audience": 0.625,
          "temporal_scope": 0.375,
          "detail_level": 0.55,
          "document_type": 0.95,
          "tone_preference": 0.125
        },
        "per_field_recall": {
          "target_audience": 0.625,
          "temporal_scope": 0.375,
          "detail_level": 0.55,
          "document_type": 0.95,
          "tone_preference": 0.125
        },
        "per_field_f1": {
          "target_audience": 0.625,
          "temporal_scope": 0.375,
          "detail_level": 0.55,
          "document_type": 0.95,
          "tone_preference": 0.125
        },
        "average_macro_f1": 0.525,
        "evaluated_fields": [
          "target_audience",
          "temporal_scope",
          "detail_level",
          "document_type",
          "tone_preference"
        ]
      },
      "score_distribution": {
        "excellent": 0,
        "good": 0,
        "fair": 0,
        "poor": 40
      },
      "quality_dimensions": {
        "personalization_fidelity": {
          "mean": 4.3076923076923075,
          "median": 5.0,
          "std_dev": 1.2804983847550624,
          "min": 1.0,
          "max": 5.0,
          "count": 39
        },
        "factuality": {
          "mean": 4.3,
          "median": 4.0,
          "std_dev": 0.7578647467450734,
          "min": 2.0,
          "max": 5.0,
          "count": 40
        },
        "citation_quality": {
          "mean": 4.333333333333333,
          "median": 4.0,
          "std_dev": 0.6622661785325219,
          "min": 3.0,
          "max": 5.0,
          "count": 39
        },
        "fluency": {
          "mean": 4.925,
          "median": 5.0,
          "std_dev": 0.2667467828369185,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "structure": {
          "mean": 4.9,
          "median": 5.0,
          "std_dev": 0.3038218101251,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "temporal_task_accuracy": {
          "mean": 4.487179487179487,
          "median": 5.0,
          "std_dev": 1.0226846731132635,
          "min": 1.0,
          "max": 5.0,
          "count": 39
        },
        "overall_score": {
          "mean": 4.549,
          "median": 4.7,
          "std_dev": 0.5684382563498442,
          "min": 2.7,
          "max": 5.0,
          "count": 40
        }
      }
    }
  }
}