{
  "hypotheses": [
    {
      "id": "H1_calibration",
      "type": "calibration",
      "question": "Does evidence grounding improve PTRS calibration vs prompt-only?",
      "method_a": "Evidence-grounded multi-agent system",
      "method_b": "Prompt-only LLM baseline",
      "metrics": [
        "brier_score",
        "log_loss",
        "calibration_slope",
        "pi_coverage_80"
      ],
      "expected_outcome": "Evidence grounding reduces calibration error by >20%",
      "confidence": 0.8,
      "reasoning": "Source-grounded claims should provide more reliable probability estimates"
    },
    {
      "id": "H2_architecture",
      "type": "architecture",
      "question": "Do specialized agents outperform monolithic LLM?",
      "method_a": "Multi-agent pharmaceutical system",
      "method_b": "Single LLM with prompt engineering",
      "metrics": [
        "mape_peak_sales",
        "portfolio_rnpv",
        "decision_accuracy"
      ],
      "expected_outcome": "Multi-agent system achieves 15%+ better accuracy",
      "confidence": 0.85,
      "reasoning": "Task specialization should reduce cognitive load and improve domain reasoning"
    },
    {
      "id": "H3_constraints",
      "type": "constraints",
      "question": "Do Bass constraints improve prediction intervals?",
      "method_a": "Bass diffusion with pharmaceutical constraints",
      "method_b": "Unconstrained LLM forecasts",
      "metrics": [
        "pi_coverage_80",
        "pi_coverage_90",
        "rmse",
        "constraint_violations"
      ],
      "expected_outcome": "Constraints improve PI coverage to >75% from baseline ~60%",
      "confidence": 0.75,
      "reasoning": "Domain constraints should prevent physically impossible forecasts"
    }
  ],
  "data_split": {
    "train": "\u22642018 pharmaceutical launches",
    "validation": "2019-2021 launches",
    "test": "2022-2024 launches (held-out)"
  },
  "baselines": [
    "analog_spreadsheet_method",
    "deterministic_epidemiological_funnel",
    "prompt_only_llm_no_tools"
  ],
  "metrics": [
    "mape",
    "brier_score",
    "pi_coverage",
    "portfolio_rnpv"
  ],
  "statistical_tests": [
    "paired_bootstrap_significance",
    "spiegelhalter_calibration_test",
    "portfolio_value_under_budget_constraint"
  ],
  "sample_size": 100,
  "random_seed": 42,
  "created_by": "Claude-3.5-Sonnet-20241022",
  "created_at": "2025-09-14T01:53:49.456417"
}