{
  "summary": {
    "total_instances": 57,
    "avg_turns": 25.65,
    "stuck_in_loop_percentage": 1.75,
    "overall_tool_accuracy": 89.53,
    "total_prompt_tokens": 10766139,
    "total_completion_tokens": 163827,
    "avg_prompt_tokens": 188879.63,
    "avg_completion_tokens": 2874.16,
    "classification_counts": {
      "empty_patch": 57,
      "resolved": 0,
      "unresolved": 0,
      "other": 0
    }
  },
  "instance_ids": [
    "django__django-15695",
    "django__django-15738",
    "pydata__xarray-4493",
    "sympy__sympy-18087",
    "django__django-16873",
    "django__django-12747",
    "sympy__sympy-18621",
    "django__django-12908",
    "sphinx-doc__sphinx-8801",
    "django__django-14667",
    "sympy__sympy-16503",
    "django__django-15388",
    "sympy__sympy-21614",
    "sphinx-doc__sphinx-8435",
    "django__django-11422",
    "sphinx-doc__sphinx-8595",
    "astropy__astropy-14995",
    "sympy__sympy-21171",
    "pytest-dev__pytest-6116",
    "sphinx-doc__sphinx-10325",
    "pytest-dev__pytest-11148",
    "scikit-learn__scikit-learn-25638",
    "sympy__sympy-14396",
    "sympy__sympy-13915",
    "sympy__sympy-24102",
    "django__django-15213",
    "pytest-dev__pytest-5221",
    "sympy__sympy-13971",
    "django__django-16229",
    "django__django-11630",
    "astropy__astropy-14182",
    "pytest-dev__pytest-9359",
    "django__django-15320",
    "sympy__sympy-20212",
    "pytest-dev__pytest-7490",
    "psf__requests-863",
    "sphinx-doc__sphinx-10451",
    "django__django-12284",
    "sympy__sympy-18189",
    "sympy__sympy-20639",
    "django__django-16910",
    "matplotlib__matplotlib-26011",
    "sympy__sympy-13647",
    "django__django-14580",
    "matplotlib__matplotlib-23476",
    "django__django-12983",
    "django__django-14238",
    "sphinx-doc__sphinx-8474",
    "django__django-17087",
    "sphinx-doc__sphinx-8282",
    "pytest-dev__pytest-5495",
    "mwaskom__seaborn-2848",
    "scikit-learn__scikit-learn-13779",
    "django__django-12497",
    "django__django-13964",
    "django__django-12708",
    "sympy__sympy-13031"
  ],
  "turns_list": [
    29,
    21,
    28,
    18,
    28,
    28,
    29,
    31,
    28,
    24,
    29,
    30,
    29,
    6,
    30,
    30,
    30,
    25,
    26,
    29,
    20,
    27,
    3,
    30,
    28,
    3,
    30,
    28,
    28,
    28,
    28,
    29,
    29,
    15,
    29,
    17,
    27,
    30,
    28,
    27,
    30,
    29,
    30,
    29,
    21,
    2,
    30,
    29,
    26,
    29,
    27,
    28,
    31,
    26,
    30,
    27,
    26
  ],
  "stuck_in_loop_list": [
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    1,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0
  ],
  "tool_accuracy_list": [
    96.55,
    85.71,
    92.86,
    88.89,
    96.43,
    92.86,
    13.79,
    96.77,
    96.43,
    91.67,
    96.55,
    80.0,
    100.0,
    50.0,
    90.0,
    93.33,
    96.67,
    100.0,
    88.46,
    93.1,
    90.0,
    100.0,
    66.67,
    93.33,
    92.86,
    100.0,
    96.67,
    96.43,
    96.43,
    96.43,
    78.57,
    96.55,
    93.1,
    100.0,
    93.1,
    88.24,
    92.59,
    76.67,
    96.43,
    92.59,
    93.33,
    86.21,
    93.33,
    96.55,
    95.24,
    50.0,
    90.0,
    93.1,
    92.31,
    89.66,
    96.3,
    85.71,
    100.0,
    84.62,
    90.0,
    22.22,
    92.31
  ],
  "prompt_tokens_list": [
    182134,
    152778,
    166772,
    91130,
    160316,
    119355,
    369643,
    268947,
    170301,
    102624,
    269212,
    173650,
    183555,
    75577,
    136400,
    212648,
    218605,
    202418,
    239508,
    141808,
    115328,
    150446,
    36465,
    128001,
    129088,
    4739,
    216565,
    307317,
    174355,
    112786,
    252582,
    185682,
    289356,
    90570,
    281110,
    100061,
    119428,
    159940,
    126782,
    347195,
    175095,
    207096,
    287799,
    147062,
    220093,
    16785,
    187482,
    145978,
    360317,
    174766,
    222356,
    485083,
    139162,
    318472,
    291950,
    200208,
    221258
  ],
  "completion_tokens_list": [
    2508,
    1783,
    3617,
    2314,
    3997,
    3024,
    6894,
    3903,
    2166,
    2848,
    2763,
    1621,
    3819,
    939,
    2519,
    6485,
    1819,
    2705,
    1947,
    2247,
    1455,
    3510,
    490,
    3555,
    2216,
    549,
    2114,
    2412,
    2941,
    3149,
    2818,
    3211,
    3735,
    2239,
    3867,
    2701,
    2205,
    2907,
    2110,
    4012,
    3206,
    3121,
    3362,
    2841,
    3017,
    201,
    3923,
    1562,
    3910,
    2941,
    2889,
    4692,
    2254,
    4834,
    1773,
    2950,
    4237
  ],
  "category_breakdown": {
    "empty_patch": {
      "instance_ids": [
        "django__django-15695",
        "django__django-15738",
        "pydata__xarray-4493",
        "sympy__sympy-18087",
        "django__django-16873",
        "django__django-12747",
        "sympy__sympy-18621",
        "django__django-12908",
        "sphinx-doc__sphinx-8801",
        "django__django-14667",
        "sympy__sympy-16503",
        "django__django-15388",
        "sympy__sympy-21614",
        "sphinx-doc__sphinx-8435",
        "django__django-11422",
        "sphinx-doc__sphinx-8595",
        "astropy__astropy-14995",
        "sympy__sympy-21171",
        "pytest-dev__pytest-6116",
        "sphinx-doc__sphinx-10325",
        "pytest-dev__pytest-11148",
        "scikit-learn__scikit-learn-25638",
        "sympy__sympy-14396",
        "sympy__sympy-13915",
        "sympy__sympy-24102",
        "django__django-15213",
        "pytest-dev__pytest-5221",
        "sympy__sympy-13971",
        "django__django-16229",
        "django__django-11630",
        "astropy__astropy-14182",
        "pytest-dev__pytest-9359",
        "django__django-15320",
        "sympy__sympy-20212",
        "pytest-dev__pytest-7490",
        "psf__requests-863",
        "sphinx-doc__sphinx-10451",
        "django__django-12284",
        "sympy__sympy-18189",
        "sympy__sympy-20639",
        "django__django-16910",
        "matplotlib__matplotlib-26011",
        "sympy__sympy-13647",
        "django__django-14580",
        "matplotlib__matplotlib-23476",
        "django__django-12983",
        "django__django-14238",
        "sphinx-doc__sphinx-8474",
        "django__django-17087",
        "sphinx-doc__sphinx-8282",
        "pytest-dev__pytest-5495",
        "mwaskom__seaborn-2848",
        "scikit-learn__scikit-learn-13779",
        "django__django-12497",
        "django__django-13964",
        "django__django-12708",
        "sympy__sympy-13031"
      ],
      "turns_list": [
        29,
        21,
        28,
        18,
        28,
        28,
        29,
        31,
        28,
        24,
        29,
        30,
        29,
        6,
        30,
        30,
        30,
        25,
        26,
        29,
        20,
        27,
        3,
        30,
        28,
        3,
        30,
        28,
        28,
        28,
        28,
        29,
        29,
        15,
        29,
        17,
        27,
        30,
        28,
        27,
        30,
        29,
        30,
        29,
        21,
        2,
        30,
        29,
        26,
        29,
        27,
        28,
        31,
        26,
        30,
        27,
        26
      ],
      "stuck_in_loop_list": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0
      ],
      "tool_accuracy_list": [
        96.55,
        85.71,
        92.86,
        88.89,
        96.43,
        92.86,
        13.79,
        96.77,
        96.43,
        91.67,
        96.55,
        80.0,
        100.0,
        50.0,
        90.0,
        93.33,
        96.67,
        100.0,
        88.46,
        93.1,
        90.0,
        100.0,
        66.67,
        93.33,
        92.86,
        100.0,
        96.67,
        96.43,
        96.43,
        96.43,
        78.57,
        96.55,
        93.1,
        100.0,
        93.1,
        88.24,
        92.59,
        76.67,
        96.43,
        92.59,
        93.33,
        86.21,
        93.33,
        96.55,
        95.24,
        50.0,
        90.0,
        93.1,
        92.31,
        89.66,
        96.3,
        85.71,
        100.0,
        84.62,
        90.0,
        22.22,
        92.31
      ],
      "prompt_tokens_list": [
        182134,
        152778,
        166772,
        91130,
        160316,
        119355,
        369643,
        268947,
        170301,
        102624,
        269212,
        173650,
        183555,
        75577,
        136400,
        212648,
        218605,
        202418,
        239508,
        141808,
        115328,
        150446,
        36465,
        128001,
        129088,
        4739,
        216565,
        307317,
        174355,
        112786,
        252582,
        185682,
        289356,
        90570,
        281110,
        100061,
        119428,
        159940,
        126782,
        347195,
        175095,
        207096,
        287799,
        147062,
        220093,
        16785,
        187482,
        145978,
        360317,
        174766,
        222356,
        485083,
        139162,
        318472,
        291950,
        200208,
        221258
      ],
      "completion_tokens_list": [
        2508,
        1783,
        3617,
        2314,
        3997,
        3024,
        6894,
        3903,
        2166,
        2848,
        2763,
        1621,
        3819,
        939,
        2519,
        6485,
        1819,
        2705,
        1947,
        2247,
        1455,
        3510,
        490,
        3555,
        2216,
        549,
        2114,
        2412,
        2941,
        3149,
        2818,
        3211,
        3735,
        2239,
        3867,
        2701,
        2205,
        2907,
        2110,
        4012,
        3206,
        3121,
        3362,
        2841,
        3017,
        201,
        3923,
        1562,
        3910,
        2941,
        2889,
        4692,
        2254,
        4834,
        1773,
        2950,
        4237
      ],
      "averages": {
        "avg_turns": 25.65,
        "avg_stuck_in_loop_rate": 1.75,
        "avg_tool_accuracy": 88.2,
        "avg_prompt_tokens": 188879.63,
        "avg_completion_tokens": 2874.16
      }
    },
    "resolved": {
      "instance_ids": [],
      "turns_list": [],
      "stuck_in_loop_list": [],
      "tool_accuracy_list": [],
      "prompt_tokens_list": [],
      "completion_tokens_list": [],
      "averages": {
        "avg_turns": 0,
        "avg_stuck_in_loop_rate": 0,
        "avg_tool_accuracy": 0,
        "avg_prompt_tokens": 0,
        "avg_completion_tokens": 0
      }
    },
    "unresolved": {
      "instance_ids": [],
      "turns_list": [],
      "stuck_in_loop_list": [],
      "tool_accuracy_list": [],
      "prompt_tokens_list": [],
      "completion_tokens_list": [],
      "averages": {
        "avg_turns": 0,
        "avg_stuck_in_loop_rate": 0,
        "avg_tool_accuracy": 0,
        "avg_prompt_tokens": 0,
        "avg_completion_tokens": 0
      }
    },
    "other": {
      "instance_ids": [],
      "turns_list": [],
      "stuck_in_loop_list": [],
      "tool_accuracy_list": [],
      "prompt_tokens_list": [],
      "completion_tokens_list": [],
      "averages": {
        "avg_turns": 0,
        "avg_stuck_in_loop_rate": 0,
        "avg_tool_accuracy": 0,
        "avg_prompt_tokens": 0,
        "avg_completion_tokens": 0
      }
    }
  },
  "timestamp": "2025-07-29T14:42:42.065604",
  "intra_correctness_list": [
    44.58620689655172,
    18.666666666666668,
    32.035714285714285,
    37.5,
    25.642857142857142,
    35.642857142857146,
    26.620689655172413,
    24.580645161290324,
    21.785714285714285,
    38.291666666666664,
    22.724137931034484,
    27.5,
    26.551724137931036,
    44,
    34.733333333333334,
    39.53333333333333,
    33.13333333333333,
    37.72,
    32.57692307692308,
    39.44827586206897,
    22.65,
    28.444444444444443,
    52.333333333333336,
    29.133333333333333,
    39.857142857142854,
    46.666666666666664,
    29.4,
    50.035714285714285,
    17.392857142857142,
    29.285714285714285,
    28.035714285714285,
    30.79310344827586,
    27.137931034482758,
    43.06666666666667,
    43.241379310344826,
    27.823529411764707,
    23.11111111111111,
    34.2,
    27.357142857142858,
    36.55555555555556,
    37.36666666666667,
    25,
    46.7,
    28.82758620689655,
    8.333333333333334,
    42.5,
    37.4,
    26.379310344827587,
    16.615384615384617,
    41.793103448275865,
    37.111111111111114,
    28.857142857142858,
    30.032258064516128,
    33.5,
    33.56666666666667,
    34.074074074074076,
    32.46153846153846
  ],
  "inter_correctness_list": [
    83.96551724137932,
    51.42857142857143,
    85.71428571428571,
    65.27777777777777,
    80.71428571428571,
    67.85714285714286,
    86.55172413793103,
    85,
    77.32142857142857,
    69.79166666666667,
    87.24137931034483,
    58.833333333333336,
    54.310344827586206,
    75.83333333333333,
    81.16666666666667,
    83.5,
    91.5,
    77.6,
    58.65384615384615,
    80,
    48.5,
    91.29629629629629,
    43.333333333333336,
    83.66666666666667,
    66.42857142857143,
    50,
    82.66666666666667,
    88.75,
    68.57142857142857,
    55.714285714285715,
    53.92857142857143,
    72.93103448275862,
    87.41379310344827,
    92.66666666666667,
    85.51724137931035,
    80,
    81.11111111111111,
    77,
    77.85714285714286,
    85.18518518518519,
    85.5,
    75.51724137931035,
    87.66666666666667,
    79.82758620689656,
    84.04761904761905,
    42.5,
    57.833333333333336,
    81.89655172413794,
    84.03846153846153,
    72.75862068965517,
    69.81481481481481,
    66.60714285714286,
    82.58064516129032,
    78.46153846153847,
    79,
    80.37037037037037,
    73.84615384615384
  ],
  "informativeness_list": [
    37.06896551724138,
    35.23809523809524,
    28.214285714285715,
    51.388888888888886,
    41.07142857142857,
    31.25,
    41.89655172413793,
    30.483870967741936,
    64.64285714285714,
    45.833333333333336,
    45.51724137931034,
    48.333333333333336,
    35.172413793103445,
    63.333333333333336,
    41.333333333333336,
    36.833333333333336,
    38.166666666666664,
    25.4,
    48.46153846153846,
    48.10344827586207,
    54.5,
    45.55555555555556,
    53.333333333333336,
    41.333333333333336,
    55.892857142857146,
    38.333333333333336,
    58.166666666666664,
    34.107142857142854,
    35.535714285714285,
    47.857142857142854,
    56.785714285714285,
    44.6551724137931,
    25.517241379310345,
    39.666666666666664,
    38.44827586206897,
    43.8235294117647,
    47.592592592592595,
    59.166666666666664,
    30.535714285714285,
    28.14814814814815,
    70.33333333333333,
    40.3448275862069,
    39.333333333333336,
    34.13793103448276,
    36.19047619047619,
    37.5,
    48.833333333333336,
    50.3448275862069,
    40.76923076923077,
    54.13793103448276,
    52.592592592592595,
    35.892857142857146,
    34.67741935483871,
    40.38461538461539,
    38.06666666666667,
    53.148148148148145,
    40.76923076923077
  ]
}