{
  "number_of_domains": 4,
  "total_queries_across_domains": 160,
  "queries_per_domain": {
    "mean": 40,
    "median": 40.0,
    "values": [
      40,
      40,
      40,
      40
    ]
  },
  "averaged_scores": {
    "user_profile_accuracy": {
      "mean": 0.495035596714303,
      "median": 0.4920271356901378,
      "std_dev": 0.09845010723772495,
      "min": 0.3963720281329078,
      "max": 0.5997160873440286,
      "count": 4
    },
    "intent_capture_accuracy": {
      "mean": 0.5025,
      "median": 0.4899999999999999,
      "std_dev": 0.03947573094109023,
      "min": 0.46999999999999986,
      "max": 0.5600000000000003,
      "count": 4
    },
    "intent_macro_f1": {
      "mean": 0.5025,
      "median": 0.4899999999999999,
      "std_dev": 0.03947573094109023,
      "min": 0.46999999999999986,
      "max": 0.5600000000000003,
      "count": 4
    },
    "context_retrieval_accuracy": {
      "mean": 0.16713641965143963,
      "median": 0.16052880732055555,
      "std_dev": 0.02240069642401486,
      "min": 0.14955937714834425,
      "max": 0.19792868681630316,
      "count": 4
    },
    "citation_accuracy": {
      "mean": 0.13038981758530652,
      "median": 0.13386441354543216,
      "std_dev": 0.016834608230848733,
      "min": 0.10747122145363996,
      "max": 0.14635922179672178,
      "count": 4
    },
    "document_quality_score": {
      "mean": 4.2630625,
      "median": 4.284750000000001,
      "std_dev": 0.04507973260420545,
      "min": 4.1955,
      "max": 4.287249999999999,
      "count": 4
    },
    "overall_score": {
      "mean": 1.1116248667902098,
      "median": 1.106467174469853,
      "std_dev": 0.023060003123654537,
      "min": 1.0896022672278551,
      "max": 1.1439628509932784,
      "count": 4
    }
  },
  "context_retrieval_detailed_metrics": {
    "precision": {
      "mean": 0.19247971755877727,
      "median": 0.08452380952380953,
      "std_dev": 0.24611978275166657,
      "min": 0.0,
      "max": 1.0,
      "count": 160
    },
    "recall": {
      "mean": 0.16053073215049055,
      "median": 0.05785296574770259,
      "std_dev": 0.22112306453155464,
      "min": 0.0,
      "max": 1.0,
      "count": 160
    },
    "f1_score": {
      "mean": 0.16713641965143963,
      "median": 0.06704119850187265,
      "std_dev": 0.225518170384443,
      "min": 0.0,
      "max": 1.0,
      "count": 160
    }
  },
  "intent_detailed_averages": {
    "per_field_precision": {
      "target_audience": {
        "mean": 0.65625,
        "median": 0.6875,
        "std_dev": 0.07180703308172533,
        "min": 0.55,
        "max": 0.7,
        "count": 4
      },
      "tone_preference": {
        "mean": 0.16875,
        "median": 0.1875,
        "std_dev": 0.07180703308172537,
        "min": 0.075,
        "max": 0.225,
        "count": 4
      },
      "temporal_scope": {
        "mean": 0.325,
        "median": 0.30000000000000004,
        "std_dev": 0.08897565210026093,
        "min": 0.25,
        "max": 0.45,
        "count": 4
      },
      "detail_level": {
        "mean": 0.3625,
        "median": 0.3875,
        "std_dev": 0.07772815877574013,
        "min": 0.25,
        "max": 0.425,
        "count": 4
      },
      "document_type": {
        "mean": 1.0,
        "median": 1.0,
        "std_dev": 0.0,
        "min": 1.0,
        "max": 1.0,
        "count": 4
      }
    },
    "per_field_recall": {
      "target_audience": {
        "mean": 0.65625,
        "median": 0.6875,
        "std_dev": 0.07180703308172533,
        "min": 0.55,
        "max": 0.7,
        "count": 4
      },
      "tone_preference": {
        "mean": 0.16875,
        "median": 0.1875,
        "std_dev": 0.07180703308172537,
        "min": 0.075,
        "max": 0.225,
        "count": 4
      },
      "temporal_scope": {
        "mean": 0.325,
        "median": 0.30000000000000004,
        "std_dev": 0.08897565210026093,
        "min": 0.25,
        "max": 0.45,
        "count": 4
      },
      "detail_level": {
        "mean": 0.3625,
        "median": 0.3875,
        "std_dev": 0.07772815877574013,
        "min": 0.25,
        "max": 0.425,
        "count": 4
      },
      "document_type": {
        "mean": 1.0,
        "median": 1.0,
        "std_dev": 0.0,
        "min": 1.0,
        "max": 1.0,
        "count": 4
      }
    },
    "per_field_f1": {
      "target_audience": {
        "mean": 0.65625,
        "median": 0.6875,
        "std_dev": 0.07180703308172533,
        "min": 0.55,
        "max": 0.7,
        "count": 4
      },
      "tone_preference": {
        "mean": 0.16875,
        "median": 0.1875,
        "std_dev": 0.07180703308172537,
        "min": 0.075,
        "max": 0.225,
        "count": 4
      },
      "temporal_scope": {
        "mean": 0.325,
        "median": 0.30000000000000004,
        "std_dev": 0.08897565210026093,
        "min": 0.25,
        "max": 0.45,
        "count": 4
      },
      "detail_level": {
        "mean": 0.3625,
        "median": 0.3875,
        "std_dev": 0.07772815877574013,
        "min": 0.25,
        "max": 0.425,
        "count": 4
      },
      "document_type": {
        "mean": 1.0,
        "median": 1.0,
        "std_dev": 0.0,
        "min": 1.0,
        "max": 1.0,
        "count": 4
      }
    }
  },
  "quality_dimensions_averages": {
    "personalization_fidelity": {
      "mean": 4.13125,
      "median": 4.0,
      "std_dev": 0.356818136296261,
      "min": 3.0,
      "max": 5.0,
      "count": 160
    },
    "factuality": {
      "mean": 3.68125,
      "median": 4.0,
      "std_dev": 0.6575337473702909,
      "min": 3.0,
      "max": 5.0,
      "count": 160
    },
    "citation_quality": {
      "mean": 4.11875,
      "median": 4.0,
      "std_dev": 0.3205556788633353,
      "min": 4.0,
      "max": 5.0,
      "count": 164
    },
    "fluency": {
      "mean": 5.0,
      "median": 5.0,
      "std_dev": 0.0,
      "min": 5.0,
      "max": 5.0,
      "count": 160
    },
    "structure": {
      "mean": 4.45625,
      "median": 4.0,
      "std_dev": 0.49964610117071295,
      "min": 4.0,
      "max": 5.0,
      "count": 160
    },
    "temporal_task_accuracy": {
      "mean": 4.19375,
      "median": 4.0,
      "std_dev": 0.3964762240367196,
      "min": 4.0,
      "max": 5.0,
      "count": 160
    },
    "overall_score": {
      "mean": 4.2630625,
      "median": 4.17,
      "std_dev": 0.31718295590356566,
      "min": 4.0,
      "max": 5.0,
      "count": 160
    }
  },
  "aggregated_score_distribution": {
    "excellent": 0,
    "good": 0,
    "fair": 0,
    "poor": 160
  },
  "overall_distribution_percentage": {
    "excellent": 0.0,
    "good": 0.0,
    "fair": 0.0,
    "poor": 100.0
  },
  "per_domain_results": {
    "gpt_41_benchmark_results_finance": {
      "total_queries": 40,
      "average_scores": {
        "user_profile_accuracy": 0.3963720281329078,
        "intent_capture_accuracy": 0.48999999999999994,
        "intent_macro_f1": 0.48999999999999994,
        "context_retrieval_accuracy": 0.14955937714834425,
        "citation_accuracy": 0.12907993085802338,
        "document_quality_score": 4.283,
        "overall_score": 1.0896022672278551
      },
      "context_retrieval_metrics": {
        "precision": {
          "mean": 0.16485666751049985,
          "count": 40
        },
        "recall": {
          "mean": 0.14536368827380416,
          "count": 40
        },
        "f1_score": {
          "mean": 0.14955937714834427,
          "count": 40
        }
      },
      "intent_detailed_metrics": {
        "per_field_precision": {
          "target_audience": 0.675,
          "tone_preference": 0.15,
          "temporal_scope": 0.25,
          "detail_level": 0.375,
          "document_type": 1.0
        },
        "per_field_recall": {
          "target_audience": 0.675,
          "tone_preference": 0.15,
          "temporal_scope": 0.25,
          "detail_level": 0.375,
          "document_type": 1.0
        },
        "per_field_f1": {
          "target_audience": 0.675,
          "tone_preference": 0.15,
          "temporal_scope": 0.25,
          "detail_level": 0.375,
          "document_type": 1.0
        },
        "average_macro_f1": 0.48999999999999994,
        "evaluated_fields": [
          "target_audience",
          "tone_preference",
          "temporal_scope",
          "detail_level",
          "document_type"
        ]
      },
      "score_distribution": {
        "excellent": 0,
        "good": 0,
        "fair": 0,
        "poor": 40
      },
      "quality_dimensions": {
        "personalization_fidelity": {
          "mean": 4.1,
          "median": 4.0,
          "std_dev": 0.3789323733725367,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "factuality": {
          "mean": 3.8,
          "median": 4.0,
          "std_dev": 0.6076436202502,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "citation_quality": {
          "mean": 4.1,
          "median": 4.0,
          "std_dev": 0.3038218101251,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "fluency": {
          "mean": 5.0,
          "median": 5.0,
          "std_dev": 0.0,
          "min": 5.0,
          "max": 5.0,
          "count": 40
        },
        "structure": {
          "mean": 4.5,
          "median": 4.5,
          "std_dev": 0.5063696835418333,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "temporal_task_accuracy": {
          "mean": 4.2,
          "median": 4.0,
          "std_dev": 0.40509574683346666,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "overall_score": {
          "mean": 4.283,
          "median": 4.25,
          "std_dev": 0.29976229899277845,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        }
      }
    },
    "gpt_41_benchmark_results_healthcare": {
      "total_queries": 40,
      "average_scores": {
        "user_profile_accuracy": 0.4273484518515073,
        "intent_capture_accuracy": 0.46999999999999986,
        "intent_macro_f1": 0.46999999999999986,
        "context_retrieval_accuracy": 0.19792868681630316,
        "citation_accuracy": 0.1386488962328409,
        "document_quality_score": 4.2865,
        "overall_score": 1.10408520698013
      },
      "context_retrieval_metrics": {
        "precision": {
          "mean": 0.21092485021884513,
          "count": 40
        },
        "recall": {
          "mean": 0.1916533890105192,
          "count": 40
        },
        "f1_score": {
          "mean": 0.19792868681630316,
          "count": 40
        }
      },
      "intent_detailed_metrics": {
        "per_field_precision": {
          "target_audience": 0.55,
          "document_type": 1.0,
          "temporal_scope": 0.325,
          "detail_level": 0.25,
          "tone_preference": 0.225
        },
        "per_field_recall": {
          "target_audience": 0.55,
          "document_type": 1.0,
          "temporal_scope": 0.325,
          "detail_level": 0.25,
          "tone_preference": 0.225
        },
        "per_field_f1": {
          "target_audience": 0.55,
          "document_type": 1.0,
          "temporal_scope": 0.325,
          "detail_level": 0.25,
          "tone_preference": 0.225
        },
        "average_macro_f1": 0.46999999999999986,
        "evaluated_fields": [
          "target_audience",
          "document_type",
          "temporal_scope",
          "detail_level",
          "tone_preference"
        ]
      },
      "score_distribution": {
        "excellent": 0,
        "good": 0,
        "fair": 0,
        "poor": 40
      },
      "quality_dimensions": {
        "personalization_fidelity": {
          "mean": 4.125,
          "median": 4.0,
          "std_dev": 0.3349320635285418,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "factuality": {
          "mean": 3.75,
          "median": 4.0,
          "std_dev": 0.6304251719561152,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "citation_quality": {
          "mean": 4.125,
          "median": 4.0,
          "std_dev": 0.3349320635285418,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "fluency": {
          "mean": 5.0,
          "median": 5.0,
          "std_dev": 0.0,
          "min": 5.0,
          "max": 5.0,
          "count": 40
        },
        "structure": {
          "mean": 4.55,
          "median": 5.0,
          "std_dev": 0.503831473655779,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "temporal_task_accuracy": {
          "mean": 4.175,
          "median": 4.0,
          "std_dev": 0.3848076442547927,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "overall_score": {
          "mean": 4.2865,
          "median": 4.33,
          "std_dev": 0.30612172876245297,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        }
      }
    },
    "gpt_41_benchmark_results_manufacturing": {
      "total_queries": 40,
      "average_scores": {
        "user_profile_accuracy": 0.5997160873440286,
        "intent_capture_accuracy": 0.4899999999999999,
        "intent_macro_f1": 0.4899999999999999,
        "context_retrieval_accuracy": 0.15155840100021087,
        "citation_accuracy": 0.10747122145363996,
        "document_quality_score": 4.1955,
        "overall_score": 1.108849141959576
      },
      "context_retrieval_metrics": {
        "precision": {
          "mean": 0.16950293637817818,
          "count": 40
        },
        "recall": {
          "mean": 0.14350735425454578,
          "count": 40
        },
        "f1_score": {
          "mean": 0.15155840100021087,
          "count": 40
        }
      },
      "intent_detailed_metrics": {
        "per_field_precision": {
          "temporal_scope": 0.275,
          "detail_level": 0.4,
          "tone_preference": 0.075,
          "target_audience": 0.7,
          "document_type": 1.0
        },
        "per_field_recall": {
          "temporal_scope": 0.275,
          "detail_level": 0.4,
          "tone_preference": 0.075,
          "target_audience": 0.7,
          "document_type": 1.0
        },
        "per_field_f1": {
          "temporal_scope": 0.275,
          "detail_level": 0.4,
          "tone_preference": 0.075,
          "target_audience": 0.7,
          "document_type": 1.0
        },
        "average_macro_f1": 0.4899999999999999,
        "evaluated_fields": [
          "temporal_scope",
          "detail_level",
          "tone_preference",
          "target_audience",
          "document_type"
        ]
      },
      "score_distribution": {
        "excellent": 0,
        "good": 0,
        "fair": 0,
        "poor": 40
      },
      "quality_dimensions": {
        "personalization_fidelity": {
          "mean": 4.1,
          "median": 4.0,
          "std_dev": 0.3038218101251,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "factuality": {
          "mean": 3.475,
          "median": 3.0,
          "std_dev": 0.5986094998689324,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "citation_quality": {
          "mean": 4.075,
          "median": 4.0,
          "std_dev": 0.2667467828369185,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "fluency": {
          "mean": 5.0,
          "median": 5.0,
          "std_dev": 0.0,
          "min": 5.0,
          "max": 5.0,
          "count": 40
        },
        "structure": {
          "mean": 4.35,
          "median": 4.0,
          "std_dev": 0.48304589153964794,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "temporal_task_accuracy": {
          "mean": 4.175,
          "median": 4.0,
          "std_dev": 0.3848076442547927,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "overall_score": {
          "mean": 4.1955,
          "median": 4.0,
          "std_dev": 0.2868033365324641,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        }
      }
    },
    "gpt_41_benchmark_results_technology": {
      "total_queries": 40,
      "average_scores": {
        "user_profile_accuracy": 0.5567058195287683,
        "intent_capture_accuracy": 0.5600000000000003,
        "intent_macro_f1": 0.5600000000000003,
        "context_retrieval_accuracy": 0.16949921364090023,
        "citation_accuracy": 0.14635922179672178,
        "document_quality_score": 4.287249999999999,
        "overall_score": 1.1439628509932784
      },
      "context_retrieval_metrics": {
        "precision": {
          "mean": 0.2246344161275859,
          "count": 40
        },
        "recall": {
          "mean": 0.161598497063093,
          "count": 40
        },
        "f1_score": {
          "mean": 0.16949921364090023,
          "count": 40
        }
      },
      "intent_detailed_metrics": {
        "per_field_precision": {
          "document_type": 1.0,
          "target_audience": 0.7,
          "temporal_scope": 0.45,
          "detail_level": 0.425,
          "tone_preference": 0.225
        },
        "per_field_recall": {
          "document_type": 1.0,
          "target_audience": 0.7,
          "temporal_scope": 0.45,
          "detail_level": 0.425,
          "tone_preference": 0.225
        },
        "per_field_f1": {
          "document_type": 1.0,
          "target_audience": 0.7,
          "temporal_scope": 0.45,
          "detail_level": 0.425,
          "tone_preference": 0.225
        },
        "average_macro_f1": 0.5600000000000003,
        "evaluated_fields": [
          "document_type",
          "target_audience",
          "temporal_scope",
          "detail_level",
          "tone_preference"
        ]
      },
      "score_distribution": {
        "excellent": 0,
        "good": 0,
        "fair": 0,
        "poor": 40
      },
      "quality_dimensions": {
        "personalization_fidelity": {
          "mean": 4.2,
          "median": 4.0,
          "std_dev": 0.40509574683346666,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "factuality": {
          "mean": 3.7,
          "median": 4.0,
          "std_dev": 0.7578647467450734,
          "min": 3.0,
          "max": 5.0,
          "count": 40
        },
        "citation_quality": {
          "mean": 4.175,
          "median": 4.0,
          "std_dev": 0.3848076442547927,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "fluency": {
          "mean": 5.0,
          "median": 5.0,
          "std_dev": 0.0,
          "min": 5.0,
          "max": 5.0,
          "count": 40
        },
        "structure": {
          "mean": 4.425,
          "median": 4.0,
          "std_dev": 0.5006406152531231,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "temporal_task_accuracy": {
          "mean": 4.225,
          "median": 4.0,
          "std_dev": 0.42290206176626033,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        },
        "overall_score": {
          "mean": 4.28725,
          "median": 4.17,
          "std_dev": 0.3715938470689298,
          "min": 4.0,
          "max": 5.0,
          "count": 40
        }
      }
    }
  }
}