{
  "number_of_domains": 4,
  "total_queries_across_domains": 160,
  "queries_per_domain": {
    "mean": 40,
    "median": 40.0,
    "values": [
      40,
      40,
      40,
      40
    ]
  },
  "averaged_scores": {
    "user_profile_accuracy": {
      "mean": 0.48247548486563185,
      "median": 0.471640709861044,
      "std_dev": 0.09377174036437491,
      "min": 0.39159689750562166,
      "max": 0.5950236222348176,
      "count": 4
    },
    "intent_capture_accuracy": {
      "mean": 0.46374999999999994,
      "median": 0.45499999999999996,
      "std_dev": 0.03614208073700245,
      "min": 0.4299999999999999,
      "max": 0.515,
      "count": 4
    },
    "intent_macro_f1": {
      "mean": 0.46374999999999994,
      "median": 0.45499999999999996,
      "std_dev": 0.03614208073700245,
      "min": 0.4299999999999999,
      "max": 0.515,
      "count": 4
    },
    "context_retrieval_accuracy": {
      "mean": 0.2525037106336917,
      "median": 0.24697216325442373,
      "std_dev": 0.025057187348959155,
      "min": 0.23025533510470608,
      "max": 0.2858151809212134,
      "count": 4
    },
    "citation_accuracy": {
      "mean": 0.2194268069273721,
      "median": 0.21710731697205182,
      "std_dev": 0.018475155369745137,
      "min": 0.20071063940734346,
      "max": 0.2427819543580413,
      "count": 4
    },
    "document_quality_score": {
      "mean": 4.20575,
      "median": 4.207,
      "std_dev": 0.04831968198018995,
      "min": 4.15475,
      "max": 4.254250000000001,
      "count": 4
    },
    "overall_score": {
      "mean": 1.1247812004853392,
      "median": 1.1292806374744642,
      "std_dev": 0.02652154602584754,
      "min": 1.0905125744035344,
      "max": 1.1500509525888942,
      "count": 4
    }
  },
  "context_retrieval_detailed_metrics": {
    "precision": {
      "mean": 0.25299116760452156,
      "median": 0.16228070175438597,
      "std_dev": 0.25811274834950015,
      "min": 0.0,
      "max": 1.0,
      "count": 160
    },
    "recall": {
      "mean": 0.2520512430151367,
      "median": 0.16228070175438597,
      "std_dev": 0.25688922852312995,
      "min": 0.0,
      "max": 1.0,
      "count": 160
    },
    "f1_score": {
      "mean": 0.25250371063369176,
      "median": 0.16228070175438597,
      "std_dev": 0.2574622567341271,
      "min": 0.0,
      "max": 1.0,
      "count": 160
    }
  },
  "intent_detailed_averages": {
    "per_field_precision": {
      "tone_preference": {
        "mean": 0.08125,
        "median": 0.075,
        "std_dev": 0.03145764348029479,
        "min": 0.05,
        "max": 0.125,
        "count": 4
      },
      "temporal_scope": {
        "mean": 0.3625,
        "median": 0.375,
        "std_dev": 0.06614378277661476,
        "min": 0.275,
        "max": 0.425,
        "count": 4
      },
      "target_audience": {
        "mean": 0.6,
        "median": 0.6375,
        "std_dev": 0.10206207261596577,
        "min": 0.45,
        "max": 0.675,
        "count": 4
      },
      "detail_level": {
        "mean": 0.29375,
        "median": 0.275,
        "std_dev": 0.0746519702798705,
        "min": 0.225,
        "max": 0.4,
        "count": 4
      },
      "document_type": {
        "mean": 0.98125,
        "median": 0.9875,
        "std_dev": 0.023935677693908475,
        "min": 0.95,
        "max": 1.0,
        "count": 4
      }
    },
    "per_field_recall": {
      "tone_preference": {
        "mean": 0.08125,
        "median": 0.075,
        "std_dev": 0.03145764348029479,
        "min": 0.05,
        "max": 0.125,
        "count": 4
      },
      "temporal_scope": {
        "mean": 0.3625,
        "median": 0.375,
        "std_dev": 0.06614378277661476,
        "min": 0.275,
        "max": 0.425,
        "count": 4
      },
      "target_audience": {
        "mean": 0.6,
        "median": 0.6375,
        "std_dev": 0.10206207261596577,
        "min": 0.45,
        "max": 0.675,
        "count": 4
      },
      "detail_level": {
        "mean": 0.29375,
        "median": 0.275,
        "std_dev": 0.0746519702798705,
        "min": 0.225,
        "max": 0.4,
        "count": 4
      },
      "document_type": {
        "mean": 0.98125,
        "median": 0.9875,
        "std_dev": 0.023935677693908475,
        "min": 0.95,
        "max": 1.0,
        "count": 4
      }
    },
    "per_field_f1": {
      "tone_preference": {
        "mean": 0.08125,
        "median": 0.075,
        "std_dev": 0.03145764348029479,
        "min": 0.05,
        "max": 0.125,
        "count": 4
      },
      "temporal_scope": {
        "mean": 0.3625,
        "median": 0.375,
        "std_dev": 0.06614378277661476,
        "min": 0.275,
        "max": 0.425,
        "count": 4
      },
      "target_audience": {
        "mean": 0.6,
        "median": 0.6375,
        "std_dev": 0.10206207261596577,
        "min": 0.45,
        "max": 0.675,
        "count": 4
      },
      "detail_level": {
        "mean": 0.29375,
        "median": 0.275,
        "std_dev": 0.0746519702798705,
        "min": 0.225,
        "max": 0.4,
        "count": 4
      },
      "document_type": {
        "mean": 0.98125,
        "median": 0.9875,
        "std_dev": 0.023935677693908475,
        "min": 0.95,
        "max": 1.0,
        "count": 4
      }
    }
  },
  "quality_dimensions_averages": {
    "personalization_fidelity": {
      "mean": 4.1875,
      "median": 4.0,
      "std_dev": 0.7284643967679482,
      "min": 0.0,
      "max": 5.0,
      "count": 160
    },
    "factuality": {
      "mean": 3.475,
      "median": 3.0,
      "std_dev": 0.5929353481148052,
      "min": 0.0,
      "max": 5.0,
      "count": 160
    },
    "citation_quality": {
      "mean": 3.7924601226993864,
      "median": 4.0,
      "std_dev": 0.49792733672619754,
      "min": 3.0,
      "max": 5.0,
      "count": 163
    },
    "fluency": {
      "mean": 4.93125,
      "median": 5.0,
      "std_dev": 0.43613331006808387,
      "min": 0.0,
      "max": 5.0,
      "count": 160
    },
    "structure": {
      "mean": 4.71875,
      "median": 5.0,
      "std_dev": 0.5846234610370752,
      "min": 0.0,
      "max": 5.0,
      "count": 160
    },
    "temporal_task_accuracy": {
      "mean": 4.182389937106918,
      "median": 4.0,
      "std_dev": 0.5830446702380822,
      "min": 3.0,
      "max": 5.0,
      "count": 159
    },
    "overall_score": {
      "mean": 4.20575,
      "median": 4.25,
      "std_dev": 0.43617195558949834,
      "min": 0.0,
      "max": 4.8,
      "count": 160
    },
    "temporal_accuracy": {
      "mean": 0.0,
      "median": 0.0,
      "std_dev": 0.0,
      "min": 0.0,
      "max": 0.0,
      "count": 1
    },
    "task_accuracy": {
      "mean": 0.0,
      "median": 0.0,
      "std_dev": 0.0,
      "min": 0.0,
      "max": 0.0,
      "count": 1
    }
  },
  "aggregated_score_distribution": {
    "excellent": 0,
    "good": 0,
    "fair": 0,
    "poor": 160
  },
  "overall_distribution_percentage": {
    "excellent": 0.0,
    "good": 0.0,
    "fair": 0.0,
    "poor": 100.0
  },
  "per_domain_results": {
    "gpt_5_benchmark_results_finance": {
      "total_queries": 40,
      "average_scores": {
        "user_profile_accuracy": 0.39159689750562166,
        "intent_capture_accuracy": 0.4549999999999999,
        "intent_macro_f1": 0.4549999999999999,
        "context_retrieval_accuracy": 0.23025533510470608,
        "citation_accuracy": 0.20071063940734346,
        "document_quality_score": 4.175,
        "overall_score": 1.0905125744035344
      },
      "context_retrieval_metrics": {
        "precision": {
          "mean": 0.23033062174675512,
          "count": 40
        },
        "recall": {
          "mean": 0.23018558173254963,
          "count": 40
        },
        "f1_score": {
          "mean": 0.2302553351047061,
          "count": 40
        }
      },
      "intent_detailed_metrics": {
        "per_field_precision": {
          "tone_preference": 0.075,
          "temporal_scope": 0.35,
          "target_audience": 0.65,
          "detail_level": 0.225,
          "document_type": 0.975
        },
        "per_field_recall": {
          "tone_preference": 0.075,
          "temporal_scope": 0.35,
          "target_audience": 0.65,
          "detail_level": 0.225,
          "document_type": 0.975
        },
        "per_field_f1": {
          "tone_preference": 0.075,
          "temporal_scope": 0.35,
          "target_audience": 0.65,
          "detail_level": 0.225,
          "document_type": 0.975
        },
        "average_macro_f1": 0.4549999999999999,
        "evaluated_fields": [
          "tone_preference",
          "temporal_scope",
          "target_audience",
          "detail_level",
          "document_type"
        ]
      },
      "score_distribution": {
        "excellent": 0,
        "good": 0,
        "fair": 0,
        "poor": 40
      },
      "quality_dimensions": {
        "personalization_fidelity": {
          "mean": 4.25,
          "median": 4.0,
          "std_dev": 0.6304251719561152,
          "min": 2.0,
          "max": 5.0,
          "count": 40
        },
        "factuality": {
          "mean": 3.375,
          "median": 3.0,
          "std_dev": 0.49029033784546006,
          "min": 3.0,
          "max": 4.0,
          "count": 40
        },
        "citation_quality": {
          "mean": 3.725,
          "median": 4.0,
          "std_dev": 0.5057363253408151,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "fluency": {
          "mean": 4.95,
          "median": 5.0,
          "std_dev": 0.22072142786315224,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "structure": {
          "mean": 4.75,
          "median": 5.0,
          "std_dev": 0.4385290096535146,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "temporal_task_accuracy": {
          "mean": 4.025,
          "median": 4.0,
          "std_dev": 0.6196566460342041,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "overall_score": {
          "mean": 4.175,
          "median": 4.2,
          "std_dev": 0.26363338958213184,
          "min": 3.7,
          "max": 4.5,
          "count": 40
        },
        "temporal_accuracy": {
          "mean": 0.0,
          "median": 0.0,
          "std_dev": 0.0,
          "min": 0.0,
          "max": 0.0,
          "count": 1
        },
        "task_accuracy": {
          "mean": 0.0,
          "median": 0.0,
          "std_dev": 0.0,
          "min": 0.0,
          "max": 0.0,
          "count": 1
        }
      }
    },
    "gpt_5_benchmark_results_healthcare": {
      "total_queries": 40,
      "average_scores": {
        "user_profile_accuracy": 0.42055924400371464,
        "intent_capture_accuracy": 0.4299999999999999,
        "intent_macro_f1": 0.4299999999999999,
        "context_retrieval_accuracy": 0.2574064244719019,
        "citation_accuracy": 0.2427819543580413,
        "document_quality_score": 4.239,
        "overall_score": 1.1179495245667317
      },
      "context_retrieval_metrics": {
        "precision": {
          "mean": 0.25742447445446004,
          "count": 40
        },
        "recall": {
          "mean": 0.2573885710873376,
          "count": 40
        },
        "f1_score": {
          "mean": 0.2574064244719019,
          "count": 40
        }
      },
      "intent_detailed_metrics": {
        "per_field_precision": {
          "temporal_scope": 0.4,
          "target_audience": 0.45,
          "document_type": 0.95,
          "tone_preference": 0.075,
          "detail_level": 0.275
        },
        "per_field_recall": {
          "temporal_scope": 0.4,
          "target_audience": 0.45,
          "document_type": 0.95,
          "tone_preference": 0.075,
          "detail_level": 0.275
        },
        "per_field_f1": {
          "temporal_scope": 0.4,
          "target_audience": 0.45,
          "document_type": 0.95,
          "tone_preference": 0.075,
          "detail_level": 0.275
        },
        "average_macro_f1": 0.4299999999999999,
        "evaluated_fields": [
          "temporal_scope",
          "target_audience",
          "document_type",
          "tone_preference",
          "detail_level"
        ]
      },
      "score_distribution": {
        "excellent": 0,
        "good": 0,
        "fair": 0,
        "poor": 40
      },
      "quality_dimensions": {
        "personalization_fidelity": {
          "mean": 4.35,
          "median": 4.0,
          "std_dev": 0.6222374845503019,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "factuality": {
          "mean": 3.5,
          "median": 4.0,
          "std_dev": 0.5547001962252291,
          "min": 2.0,
          "max": 4.0,
          "count": 40
        },
        "citation_quality": {
          "mean": 3.75,
          "median": 4.0,
          "std_dev": 0.5883484054145521,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "fluency": {
          "mean": 4.975,
          "median": 5.0,
          "std_dev": 0.15811388300841897,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "structure": {
          "mean": 4.75,
          "median": 5.0,
          "std_dev": 0.49354811679282456,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "temporal_task_accuracy": {
          "mean": 4.125,
          "median": 4.0,
          "std_dev": 0.6071159226949525,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "overall_score": {
          "mean": 4.239,
          "median": 4.3,
          "std_dev": 0.33130511152573083,
          "min": 3.3,
          "max": 4.8,
          "count": 40
        }
      }
    },
    "gpt_5_benchmark_results_manufacturing": {
      "total_queries": 40,
      "average_scores": {
        "user_profile_accuracy": 0.5950236222348176,
        "intent_capture_accuracy": 0.45499999999999996,
        "intent_macro_f1": 0.45499999999999996,
        "context_retrieval_accuracy": 0.23653790203694558,
        "citation_accuracy": 0.20944323867270837,
        "document_quality_score": 4.254250000000001,
        "overall_score": 1.1500509525888942
      },
      "context_retrieval_metrics": {
        "precision": {
          "mean": 0.2366877619342367,
          "count": 40
        },
        "recall": {
          "mean": 0.23639145714070706,
          "count": 40
        },
        "f1_score": {
          "mean": 0.23653790203694564,
          "count": 40
        }
      },
      "intent_detailed_metrics": {
        "per_field_precision": {
          "tone_preference": 0.05,
          "document_type": 1.0,
          "temporal_scope": 0.275,
          "target_audience": 0.675,
          "detail_level": 0.275
        },
        "per_field_recall": {
          "tone_preference": 0.05,
          "document_type": 1.0,
          "temporal_scope": 0.275,
          "target_audience": 0.675,
          "detail_level": 0.275
        },
        "per_field_f1": {
          "tone_preference": 0.05,
          "document_type": 1.0,
          "temporal_scope": 0.275,
          "target_audience": 0.675,
          "detail_level": 0.275
        },
        "average_macro_f1": 0.45499999999999996,
        "evaluated_fields": [
          "tone_preference",
          "document_type",
          "temporal_scope",
          "target_audience",
          "detail_level"
        ]
      },
      "score_distribution": {
        "excellent": 0,
        "good": 0,
        "fair": 0,
        "poor": 40
      },
      "quality_dimensions": {
        "personalization_fidelity": {
          "mean": 4.075,
          "median": 4.0,
          "std_dev": 0.7970297423913428,
          "min": 2.0,
          "max": 5.0,
          "count": 40
        },
        "factuality": {
          "mean": 3.575,
          "median": 4.0,
          "std_dev": 0.5006406152531231,
          "min": 3.0,
          "max": 4.0,
          "count": 40
        },
        "citation_quality": {
          "mean": 3.875,
          "median": 4.0,
          "std_dev": 0.40430377003131995,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "fluency": {
          "mean": 4.95,
          "median": 5.0,
          "std_dev": 0.22072142786315224,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "structure": {
          "mean": 4.725,
          "median": 5.0,
          "std_dev": 0.4522025867763026,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "temporal_task_accuracy": {
          "mean": 4.325,
          "median": 4.0,
          "std_dev": 0.6155047959570695,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "overall_score": {
          "mean": 4.25425,
          "median": 4.3,
          "std_dev": 0.2996141749753404,
          "min": 3.7,
          "max": 4.7,
          "count": 40
        }
      }
    },
    "gpt_5_benchmark_results_technology": {
      "total_queries": 40,
      "average_scores": {
        "user_profile_accuracy": 0.5227221757183734,
        "intent_capture_accuracy": 0.515,
        "intent_macro_f1": 0.515,
        "context_retrieval_accuracy": 0.2858151809212134,
        "citation_accuracy": 0.22477139527139528,
        "document_quality_score": 4.15475,
        "overall_score": 1.1406117503821966
      },
      "context_retrieval_metrics": {
        "precision": {
          "mean": 0.28752181228263446,
          "count": 40
        },
        "recall": {
          "mean": 0.28423936209995243,
          "count": 40
        },
        "f1_score": {
          "mean": 0.2858151809212134,
          "count": 40
        }
      },
      "intent_detailed_metrics": {
        "per_field_precision": {
          "tone_preference": 0.125,
          "target_audience": 0.625,
          "detail_level": 0.4,
          "document_type": 1.0,
          "temporal_scope": 0.425
        },
        "per_field_recall": {
          "tone_preference": 0.125,
          "target_audience": 0.625,
          "detail_level": 0.4,
          "document_type": 1.0,
          "temporal_scope": 0.425
        },
        "per_field_f1": {
          "tone_preference": 0.125,
          "target_audience": 0.625,
          "detail_level": 0.4,
          "document_type": 1.0,
          "temporal_scope": 0.425
        },
        "average_macro_f1": 0.515,
        "evaluated_fields": [
          "tone_preference",
          "target_audience",
          "detail_level",
          "document_type",
          "temporal_scope"
        ]
      },
      "score_distribution": {
        "excellent": 0,
        "good": 0,
        "fair": 0,
        "poor": 40
      },
      "quality_dimensions": {
        "personalization_fidelity": {
          "mean": 4.075,
          "median": 4.0,
          "std_dev": 0.8285761652005575,
          "min": 0.0,
          "max": 5.0,
          "count": 40
        },
        "factuality": {
          "mean": 3.45,
          "median": 3.5,
          "std_dev": 0.7828285334736547,
          "min": 0.0,
          "max": 5.0,
          "count": 40
        },
        "citation_quality": {
          "mean": 3.8205128205128207,
          "median": 4.0,
          "std_dev": 0.5063696835418333,
          "min": 3.0,
          "max": 5.0,
          "count": 39
        },
        "fluency": {
          "mean": 4.85,
          "median": 5.0,
          "std_dev": 0.8022404524765275,
          "min": 0.0,
          "max": 5.0,
          "count": 40
        },
        "structure": {
          "mean": 4.65,
          "median": 5.0,
          "std_dev": 0.8638019716079873,
          "min": 0.0,
          "max": 5.0,
          "count": 40
        },
        "temporal_task_accuracy": {
          "mean": 4.256410256410256,
          "median": 4.0,
          "std_dev": 0.44235903005475674,
          "min": 4.0,
          "max": 5.0,
          "count": 39
        },
        "overall_score": {
          "mean": 4.15475,
          "median": 4.2,
          "std_dev": 0.70668234440761,
          "min": 0.0,
          "max": 4.8,
          "count": 40
        }
      }
    }
  }
}