{
  "id": "2e9e8c13-2259-4908-92fb-a7b58d588abf",
  "generation": 2,
  "iteration": 23,
  "timestamp": 1767618428.986909,
  "parent_id": "2eada941-048f-4828-b81c-786434f63240",
  "metrics": {
    "success": true,
    "final_score": 2.9589006960980697,
    "combined_score": 2.9589006960980697,
    "performance_metrics": {
      "avg_decode_speed": 53.41525,
      "min_decode_speed": 46.023,
      "max_decode_speed": 63.108,
      "avg_prefill_speed": 244.14375,
      "avg_memory_gb": 1.39325,
      "max_memory_gb": 1.554,
      "num_successful_tests": 4,
      "decode_speed_std": 7.40971879948895
    },
    "correctness_score": 1.0,
    "benchmark_results": [
      {
        "name": "short_context_quick",
        "decode_tokens_per_sec": 63.108,
        "prefill_tokens_per_sec": 135.605,
        "peak_memory_gb": 1.35,
        "generated_tokens": 50,
        "total_time_sec": 4.579478166997433
      },
      {
        "name": "code_generation",
        "decode_tokens_per_sec": 58.112,
        "prefill_tokens_per_sec": 355.6,
        "peak_memory_gb": 1.291,
        "generated_tokens": 300,
        "total_time_sec": 9.802399624997634
      },
      {
        "name": "long_context_detailed",
        "decode_tokens_per_sec": 46.023,
        "prefill_tokens_per_sec": 303.484,
        "peak_memory_gb": 1.554,
        "generated_tokens": 500,
        "total_time_sec": 17.30625654100004
      },
      {
        "name": "long_generation",
        "decode_tokens_per_sec": 46.418,
        "prefill_tokens_per_sec": 181.886,
        "peak_memory_gb": 1.378,
        "generated_tokens": 1000,
        "total_time_sec": 27.308918209004332
      }
    ],
    "baseline_comparison": {
      "avg_decode_improvement_pct": -3.199533101300643,
      "avg_decode_improvement_absolute": -1.6237500000000011,
      "memory_change_gb": -0.005749999999999922,
      "target_achieved": false,
      "num_benchmarks_improved": 1,
      "total_benchmarks": 4,
      "safety_score": 100.0
    },
    "individual_comparisons": [
      {
        "benchmark_name": "short_context_quick",
        "baseline": {
          "name": "short_context_quick",
          "decode_tokens_per_sec": 59.061,
          "prefill_tokens_per_sec": 48.778,
          "peak_memory_gb": 1.416,
          "generated_tokens": 50,
          "total_time_sec": 4.753752667005756
        },
        "custom": {
          "name": "short_context_quick",
          "decode_tokens_per_sec": 63.108,
          "prefill_tokens_per_sec": 135.605,
          "peak_memory_gb": 1.35,
          "generated_tokens": 50,
          "total_time_sec": 4.579478166997433
        },
        "improvements": {
          "decode_speed_pct": 6.852237517143288,
          "prefill_speed_pct": 178.00442822583952,
          "total_speed_pct": 3.80555368216958,
          "memory_reduction_pct": 4.888888888888877,
          "time_reduction_pct": 3.80555368216959
        }
      },
      {
        "benchmark_name": "code_generation",
        "baseline": {
          "name": "code_generation",
          "decode_tokens_per_sec": 58.335,
          "prefill_tokens_per_sec": 214.398,
          "peak_memory_gb": 1.277,
          "generated_tokens": 300,
          "total_time_sec": 9.370090207994508
        },
        "custom": {
          "name": "code_generation",
          "decode_tokens_per_sec": 58.112,
          "prefill_tokens_per_sec": 355.6,
          "peak_memory_gb": 1.291,
          "generated_tokens": 300,
          "total_time_sec": 9.802399624997634
        },
        "improvements": {
          "decode_speed_pct": -0.382274792148794,
          "prefill_speed_pct": 65.85975615444175,
          "total_speed_pct": -4.41024069147997,
          "memory_reduction_pct": -1.0844306738962055,
          "time_reduction_pct": -4.4102406914799674
        }
      },
      {
        "benchmark_name": "long_context_detailed",
        "baseline": {
          "name": "long_context_detailed",
          "decode_tokens_per_sec": 54.724,
          "prefill_tokens_per_sec": 853.686,
          "peak_memory_gb": 1.543,
          "generated_tokens": 500,
          "total_time_sec": 13.029723457999353
        },
        "custom": {
          "name": "long_context_detailed",
          "decode_tokens_per_sec": 46.023,
          "prefill_tokens_per_sec": 303.484,
          "peak_memory_gb": 1.554,
          "generated_tokens": 500,
          "total_time_sec": 17.30625654100004
        },
        "improvements": {
          "decode_speed_pct": -15.899788027190983,
          "prefill_speed_pct": -64.45016083196866,
          "total_speed_pct": -24.710907716346416,
          "memory_reduction_pct": -0.7078507078507156,
          "time_reduction_pct": -24.710907716346423
        }
      },
      {
        "benchmark_name": "long_generation",
        "baseline": {
          "name": "long_generation",
          "decode_tokens_per_sec": 48.036,
          "prefill_tokens_per_sec": 164.979,
          "peak_memory_gb": 1.36,
          "generated_tokens": 1000,
          "total_time_sec": 26.132775457997923
        },
        "custom": {
          "name": "long_generation",
          "decode_tokens_per_sec": 46.418,
          "prefill_tokens_per_sec": 181.886,
          "peak_memory_gb": 1.378,
          "generated_tokens": 1000,
          "total_time_sec": 27.308918209004332
        },
        "improvements": {
          "decode_speed_pct": -3.368307103006083,
          "prefill_speed_pct": 10.247970953878967,
          "total_speed_pct": -4.306808281474169,
          "memory_reduction_pct": -1.3062409288824235,
          "time_reduction_pct": -4.306808281474182
        }
      }
    ],
    "summary": "Bulletproof Custom GQA Implementation Results:\n\u2022 Decode Speed: 53.4 tokens/sec (baseline: 55.0)\n\u2022 Improvement: -3.2%\n\u2022 Memory Usage: 1.39 GB\n\u2022 Correctness: 100.0%\n\u2022 Safety Score: 100.0/100\n\u2022 Tests Passed: 4/4\n\u2022 Benchmarks Improved: 1/4\n\u2022 Metal Errors Handled: 0\n\ud83d\udee1\ufe0f  PERFECT SAFETY: No Metal kernel errors\n\u26a0\ufe0f  NO IMPROVEMENT: Performance regression",
    "metal_safety_statistics": {
      "metal_command_buffer_errors": 0,
      "metal_memory_violations": 0,
      "metal_compilation_errors": 0,
      "gpu_resource_errors": 0,
      "total_metal_errors": 0,
      "successful_fallbacks": 0,
      "retry_attempts_used": 0,
      "safety_score": 100.0,
      "error_breakdown": {
        "command_buffer_pct": 0.0,
        "memory_violation_pct": 0.0,
        "compilation_error_pct": 0.0,
        "resource_error_pct": 0.0
      }
    },
    "safety_validation": {
      "success": true,
      "validated": true
    }
  },
  "language": "python",
  "saved_at": 1767619206.585282
}