{
  "number_of_domains": 4,
  "total_queries_across_domains": 160,
  "queries_per_domain": {
    "mean": 40,
    "median": 40.0,
    "values": [
      40,
      40,
      40,
      40
    ]
  },
  "averaged_scores": {
    "user_profile_accuracy": {
      "mean": 0.4795637900241193,
      "median": 0.4828584240565057,
      "std_dev": 0.09593942703724077,
      "min": 0.37441328972552135,
      "max": 0.5781250222579445,
      "count": 4
    },
    "intent_capture_accuracy": {
      "mean": 0.53,
      "median": 0.5274999999999999,
      "std_dev": 0.014719601443879859,
      "min": 0.5149999999999999,
      "max": 0.5500000000000002,
      "count": 4
    },
    "intent_macro_f1": {
      "mean": 0.53,
      "median": 0.5274999999999999,
      "std_dev": 0.014719601443879859,
      "min": 0.5149999999999999,
      "max": 0.5500000000000002,
      "count": 4
    },
    "context_retrieval_accuracy": {
      "mean": 0.1634412134888092,
      "median": 0.16793208099383466,
      "std_dev": 0.01854614898421943,
      "min": 0.1375014775852325,
      "max": 0.18039921438233483,
      "count": 4
    },
    "citation_accuracy": {
      "mean": 0.10268585963270226,
      "median": 0.10636474077103705,
      "std_dev": 0.014212154604331733,
      "min": 0.08326711665527455,
      "max": 0.11474684033346035,
      "count": 4
    },
    "document_quality_score": {
      "mean": 4.464874999999999,
      "median": 4.4628749999999995,
      "std_dev": 0.02691847259163628,
      "min": 4.435249999999999,
      "max": 4.498499999999999,
      "count": 4
    },
    "overall_score": {
      "mean": 1.1481131726291263,
      "median": 1.1465971049624903,
      "std_dev": 0.02685985714085113,
      "min": 1.1212240079897147,
      "max": 1.1780344726018097,
      "count": 4
    }
  },
  "context_retrieval_detailed_metrics": {
    "precision": {
      "mean": 0.2687994614201577,
      "median": 0.1111111111111111,
      "std_dev": 0.3347502575998129,
      "min": 0.0,
      "max": 1.0,
      "count": 160
    },
    "recall": {
      "mean": 0.14096061517925096,
      "median": 0.0510752688172043,
      "std_dev": 0.19268992042039373,
      "min": 0.0,
      "max": 0.86,
      "count": 160
    },
    "f1_score": {
      "mean": 0.16344121348880922,
      "median": 0.06639247943595769,
      "std_dev": 0.21482380052279787,
      "min": 0.0,
      "max": 0.888888888888889,
      "count": 160
    }
  },
  "intent_detailed_averages": {
    "per_field_precision": {
      "tone_preference": {
        "mean": 0.1625,
        "median": 0.1625,
        "std_dev": 0.05951190357119042,
        "min": 0.1,
        "max": 0.225,
        "count": 4
      },
      "temporal_scope": {
        "mean": 0.3,
        "median": 0.2875,
        "std_dev": 0.06123724356957946,
        "min": 0.25,
        "max": 0.375,
        "count": 4
      },
      "target_audience": {
        "mean": 0.7,
        "median": 0.6875,
        "std_dev": 0.0790569415042095,
        "min": 0.625,
        "max": 0.8,
        "count": 4
      },
      "document_type": {
        "mean": 0.9875,
        "median": 0.9875,
        "std_dev": 0.014433756729740658,
        "min": 0.975,
        "max": 1.0,
        "count": 4
      },
      "detail_level": {
        "mean": 0.5,
        "median": 0.4875,
        "std_dev": 0.03535533905932741,
        "min": 0.475,
        "max": 0.55,
        "count": 4
      }
    },
    "per_field_recall": {
      "tone_preference": {
        "mean": 0.1625,
        "median": 0.1625,
        "std_dev": 0.05951190357119042,
        "min": 0.1,
        "max": 0.225,
        "count": 4
      },
      "temporal_scope": {
        "mean": 0.3,
        "median": 0.2875,
        "std_dev": 0.06123724356957946,
        "min": 0.25,
        "max": 0.375,
        "count": 4
      },
      "target_audience": {
        "mean": 0.7,
        "median": 0.6875,
        "std_dev": 0.0790569415042095,
        "min": 0.625,
        "max": 0.8,
        "count": 4
      },
      "document_type": {
        "mean": 0.9875,
        "median": 0.9875,
        "std_dev": 0.014433756729740658,
        "min": 0.975,
        "max": 1.0,
        "count": 4
      },
      "detail_level": {
        "mean": 0.5,
        "median": 0.4875,
        "std_dev": 0.03535533905932741,
        "min": 0.475,
        "max": 0.55,
        "count": 4
      }
    },
    "per_field_f1": {
      "tone_preference": {
        "mean": 0.1625,
        "median": 0.1625,
        "std_dev": 0.05951190357119042,
        "min": 0.1,
        "max": 0.225,
        "count": 4
      },
      "temporal_scope": {
        "mean": 0.3,
        "median": 0.2875,
        "std_dev": 0.06123724356957946,
        "min": 0.25,
        "max": 0.375,
        "count": 4
      },
      "target_audience": {
        "mean": 0.7,
        "median": 0.6875,
        "std_dev": 0.0790569415042095,
        "min": 0.625,
        "max": 0.8,
        "count": 4
      },
      "document_type": {
        "mean": 0.9875,
        "median": 0.9875,
        "std_dev": 0.014433756729740658,
        "min": 0.975,
        "max": 1.0,
        "count": 4
      },
      "detail_level": {
        "mean": 0.5,
        "median": 0.4875,
        "std_dev": 0.03535533905932741,
        "min": 0.475,
        "max": 0.55,
        "count": 4
      }
    }
  },
  "quality_dimensions_averages": {
    "personalization_fidelity": {
      "mean": 4.3875,
      "median": 4.0,
      "std_dev": 0.5138031828354355,
      "min": 3.0,
      "max": 5.0,
      "count": 160
    },
    "factuality": {
      "mean": 4.30625,
      "median": 4.0,
      "std_dev": 0.5139179265409272,
      "min": 3.0,
      "max": 5.0,
      "count": 160
    },
    "citation_quality": {
      "mean": 4.23125,
      "median": 4.0,
      "std_dev": 0.44619535976941155,
      "min": 3.0,
      "max": 5.0,
      "count": 164
    },
    "fluency": {
      "mean": 4.9875,
      "median": 5.0,
      "std_dev": 0.11145126131093691,
      "min": 4.0,
      "max": 5.0,
      "count": 160
    },
    "structure": {
      "mean": 4.50625,
      "median": 5.0,
      "std_dev": 0.5015306758992161,
      "min": 4.0,
      "max": 5.0,
      "count": 160
    },
    "temporal_task_accuracy": {
      "mean": 4.29375,
      "median": 4.0,
      "std_dev": 0.4964892469399957,
      "min": 3.0,
      "max": 5.0,
      "count": 160
    },
    "overall_score": {
      "mean": 4.464875,
      "median": 4.3,
      "std_dev": 0.34931737114859246,
      "min": 3.33,
      "max": 5.0,
      "count": 160
    }
  },
  "aggregated_score_distribution": {
    "excellent": 0,
    "good": 0,
    "fair": 0,
    "poor": 160
  },
  "overall_distribution_percentage": {
    "excellent": 0.0,
    "good": 0.0,
    "fair": 0.0,
    "poor": 100.0
  },
  "per_domain_results": {
    "gpt_4o_benchmark_results_finance": {
      "total_queries": 40,
      "average_scores": {
        "user_profile_accuracy": 0.37441328972552135,
        "intent_capture_accuracy": 0.5149999999999999,
        "intent_macro_f1": 0.5149999999999999,
        "context_retrieval_accuracy": 0.18039921438233483,
        "citation_accuracy": 0.10105753584071664,
        "document_quality_score": 4.435249999999999,
        "overall_score": 1.1212240079897147
      },
      "context_retrieval_metrics": {
        "precision": {
          "mean": 0.30069518100535847,
          "count": 40
        },
        "recall": {
          "mean": 0.15395767545261976,
          "count": 40
        },
        "f1_score": {
          "mean": 0.18039921438233483,
          "count": 40
        }
      },
      "intent_detailed_metrics": {
        "per_field_precision": {
          "tone_preference": 0.125,
          "temporal_scope": 0.25,
          "target_audience": 0.65,
          "document_type": 1.0,
          "detail_level": 0.55
        },
        "per_field_recall": {
          "tone_preference": 0.125,
          "temporal_scope": 0.25,
          "target_audience": 0.65,
          "document_type": 1.0,
          "detail_level": 0.55
        },
        "per_field_f1": {
          "tone_preference": 0.125,
          "temporal_scope": 0.25,
          "target_audience": 0.65,
          "document_type": 1.0,
          "detail_level": 0.55
        },
        "average_macro_f1": 0.5149999999999999,
        "evaluated_fields": [
          "tone_preference",
          "temporal_scope",
          "target_audience",
          "document_type",
          "detail_level"
        ]
      },
      "score_distribution": {
        "excellent": 0,
        "good": 0,
        "fair": 0,
        "poor": 40
      },
      "quality_dimensions": {
        "personalization_fidelity": {
          "mean": 4.4,
          "median": 4.0,
          "std_dev": 0.5453768398418634,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "factuality": {
          "mean": 4.225,
          "median": 4.0,
          "std_dev": 0.5304811555656397,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "citation_quality": {
          "mean": 4.175,
          "median": 4.0,
          "std_dev": 0.4464963318538847,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "fluency": {
          "mean": 4.975,
          "median": 5.0,
          "std_dev": 0.15811388300841897,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "structure": {
          "mean": 4.5,
          "median": 4.5,
          "std_dev": 0.5063696835418333,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "temporal_task_accuracy": {
          "mean": 4.275,
          "median": 4.0,
          "std_dev": 0.5541220822628188,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "overall_score": {
          "mean": 4.43525,
          "median": 4.4,
          "std_dev": 0.3572256881968209,
          "min": 3.5,
          "max": 5.0,
          "count": 40
        }
      }
    },
    "gpt_4o_benchmark_results_healthcare": {
      "total_queries": 40,
      "average_scores": {
        "user_profile_accuracy": 0.42446121718240964,
        "intent_capture_accuracy": 0.5249999999999999,
        "intent_macro_f1": 0.5249999999999999,
        "context_retrieval_accuracy": 0.16398876693792427,
        "citation_accuracy": 0.08326711665527455,
        "document_quality_score": 4.453999999999999,
        "overall_score": 1.1301434201551217
      },
      "context_retrieval_metrics": {
        "precision": {
          "mean": 0.24803748059655767,
          "count": 40
        },
        "recall": {
          "mean": 0.14526316318803986,
          "count": 40
        },
        "f1_score": {
          "mean": 0.16398876693792433,
          "count": 40
        }
      },
      "intent_detailed_metrics": {
        "per_field_precision": {
          "tone_preference": 0.225,
          "detail_level": 0.475,
          "target_audience": 0.625,
          "document_type": 0.975,
          "temporal_scope": 0.325
        },
        "per_field_recall": {
          "tone_preference": 0.225,
          "detail_level": 0.475,
          "target_audience": 0.625,
          "document_type": 0.975,
          "temporal_scope": 0.325
        },
        "per_field_f1": {
          "tone_preference": 0.225,
          "detail_level": 0.475,
          "target_audience": 0.625,
          "document_type": 0.975,
          "temporal_scope": 0.325
        },
        "average_macro_f1": 0.5249999999999999,
        "evaluated_fields": [
          "tone_preference",
          "detail_level",
          "target_audience",
          "document_type",
          "temporal_scope"
        ]
      },
      "score_distribution": {
        "excellent": 0,
        "good": 0,
        "fair": 0,
        "poor": 40
      },
      "quality_dimensions": {
        "personalization_fidelity": {
          "mean": 4.375,
          "median": 4.0,
          "std_dev": 0.5400617248673217,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "factuality": {
          "mean": 4.3,
          "median": 4.0,
          "std_dev": 0.563869415688347,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "citation_quality": {
          "mean": 4.225,
          "median": 4.0,
          "std_dev": 0.47971679679598994,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "fluency": {
          "mean": 4.975,
          "median": 5.0,
          "std_dev": 0.15811388300841897,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "structure": {
          "mean": 4.525,
          "median": 5.0,
          "std_dev": 0.5057363253408151,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "temporal_task_accuracy": {
          "mean": 4.25,
          "median": 4.0,
          "std_dev": 0.49354811679282456,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "overall_score": {
          "mean": 4.454,
          "median": 4.3,
          "std_dev": 0.3840793187310702,
          "min": 3.33,
          "max": 5.0,
          "count": 40
        }
      }
    },
    "gpt_4o_benchmark_results_manufacturing": {
      "total_queries": 40,
      "average_scores": {
        "user_profile_accuracy": 0.5781250222579445,
        "intent_capture_accuracy": 0.5299999999999999,
        "intent_macro_f1": 0.5299999999999999,
        "context_retrieval_accuracy": 0.17187539504974508,
        "citation_accuracy": 0.11167194570135748,
        "document_quality_score": 4.498499999999999,
        "overall_score": 1.1780344726018097
      },
      "context_retrieval_metrics": {
        "precision": {
          "mean": 0.31855394388885055,
          "count": 40
        },
        "recall": {
          "mean": 0.1435244254226296,
          "count": 40
        },
        "f1_score": {
          "mean": 0.1718753950497451,
          "count": 40
        }
      },
      "intent_detailed_metrics": {
        "per_field_precision": {
          "document_type": 1.0,
          "temporal_scope": 0.25,
          "detail_level": 0.5,
          "tone_preference": 0.1,
          "target_audience": 0.8
        },
        "per_field_recall": {
          "document_type": 1.0,
          "temporal_scope": 0.25,
          "detail_level": 0.5,
          "tone_preference": 0.1,
          "target_audience": 0.8
        },
        "per_field_f1": {
          "document_type": 1.0,
          "temporal_scope": 0.25,
          "detail_level": 0.5,
          "tone_preference": 0.1,
          "target_audience": 0.8
        },
        "average_macro_f1": 0.5299999999999999,
        "evaluated_fields": [
          "document_type",
          "temporal_scope",
          "detail_level",
          "tone_preference",
          "target_audience"
        ]
      },
      "score_distribution": {
        "excellent": 0,
        "good": 0,
        "fair": 0,
        "poor": 40
      },
      "quality_dimensions": {
        "personalization_fidelity": {
          "mean": 4.425,
          "median": 4.0,
          "std_dev": 0.5006406152531231,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "factuality": {
          "mean": 4.35,
          "median": 4.0,
          "std_dev": 0.48304589153964794,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "citation_quality": {
          "mean": 4.3,
          "median": 4.0,
          "std_dev": 0.4640954808922571,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "fluency": {
          "mean": 5.0,
          "median": 5.0,
          "std_dev": 0.0,
          "min": 5.0,
          "max": 5.0,
          "count": 40
        },
        "structure": {
          "mean": 4.5,
          "median": 4.5,
          "std_dev": 0.5063696835418333,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "temporal_task_accuracy": {
          "mean": 4.325,
          "median": 4.0,
          "std_dev": 0.4743416490252569,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "overall_score": {
          "mean": 4.4985,
          "median": 4.4,
          "std_dev": 0.34185429494794844,
          "min": 4.2,
          "max": 5.0,
          "count": 40
        }
      }
    },
    "gpt_4o_benchmark_results_technology": {
      "total_queries": 40,
      "average_scores": {
        "user_profile_accuracy": 0.5412556309306018,
        "intent_capture_accuracy": 0.5500000000000002,
        "intent_macro_f1": 0.5500000000000002,
        "context_retrieval_accuracy": 0.1375014775852325,
        "citation_accuracy": 0.11474684033346035,
        "document_quality_score": 4.47175,
        "overall_score": 1.163050789769859
      },
      "context_retrieval_metrics": {
        "precision": {
          "mean": 0.20791124018986418,
          "count": 40
        },
        "recall": {
          "mean": 0.12109719665371457,
          "count": 40
        },
        "f1_score": {
          "mean": 0.13750147758523254,
          "count": 40
        }
      },
      "intent_detailed_metrics": {
        "per_field_precision": {
          "tone_preference": 0.2,
          "document_type": 0.975,
          "detail_level": 0.475,
          "temporal_scope": 0.375,
          "target_audience": 0.725
        },
        "per_field_recall": {
          "tone_preference": 0.2,
          "document_type": 0.975,
          "detail_level": 0.475,
          "temporal_scope": 0.375,
          "target_audience": 0.725
        },
        "per_field_f1": {
          "tone_preference": 0.2,
          "document_type": 0.975,
          "detail_level": 0.475,
          "temporal_scope": 0.375,
          "target_audience": 0.725
        },
        "average_macro_f1": 0.5500000000000002,
        "evaluated_fields": [
          "tone_preference",
          "document_type",
          "detail_level",
          "temporal_scope",
          "target_audience"
        ]
      },
      "score_distribution": {
        "excellent": 0,
        "good": 0,
        "fair": 0,
        "poor": 40
      },
      "quality_dimensions": {
        "personalization_fidelity": {
          "mean": 4.35,
          "median": 4.0,
          "std_dev": 0.48304589153964794,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "factuality": {
          "mean": 4.35,
          "median": 4.0,
          "std_dev": 0.48304589153964794,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "citation_quality": {
          "mean": 4.225,
          "median": 4.0,
          "std_dev": 0.42290206176626033,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "fluency": {
          "mean": 5.0,
          "median": 5.0,
          "std_dev": 0.0,
          "min": 5.0,
          "max": 5.0,
          "count": 40
        },
        "structure": {
          "mean": 4.5,
          "median": 4.5,
          "std_dev": 0.5063696835418333,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "temporal_task_accuracy": {
          "mean": 4.325,
          "median": 4.0,
          "std_dev": 0.4743416490252569,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "overall_score": {
          "mean": 4.47175,
          "median": 4.3,
          "std_dev": 0.32133445712113307,
          "min": 4.2,
          "max": 5.0,
          "count": 40
        }
      }
    }
  }
}