{
  "summary": {
    "total_instances": 69,
    "avg_turns": 9.46,
    "stuck_in_loop_percentage": 0.0,
    "overall_tool_accuracy": 74.58,
    "total_prompt_tokens": 3234437,
    "total_completion_tokens": 55938,
    "avg_prompt_tokens": 46875.9,
    "avg_completion_tokens": 810.7,
    "classification_counts": {
      "empty_patch": 0,
      "resolved": 69,
      "unresolved": 0,
      "other": 0
    }
  },
  "instance_ids": [
    "matplotlib__matplotlib-23964",
    "scikit-learn__scikit-learn-15535",
    "django__django-16139",
    "scikit-learn__scikit-learn-13241",
    "django__django-11999",
    "django__django-13658",
    "matplotlib__matplotlib-23314",
    "matplotlib__matplotlib-24149",
    "django__django-10924",
    "scikit-learn__scikit-learn-13584",
    "scikit-learn__scikit-learn-15512",
    "django__django-10914",
    "django__django-13401",
    "django__django-11815",
    "sympy__sympy-18057",
    "django__django-14382",
    "sympy__sympy-15609",
    "pytest-dev__pytest-7373",
    "sphinx-doc__sphinx-8721",
    "django__django-12125",
    "sphinx-doc__sphinx-8713",
    "sympy__sympy-24152",
    "django__django-14787",
    "django__django-14016",
    "django__django-13230",
    "django__django-11049",
    "sympy__sympy-21055",
    "scikit-learn__scikit-learn-14894",
    "django__django-16595",
    "sympy__sympy-18532",
    "django__django-15851",
    "sympy__sympy-24213",
    "django__django-15814",
    "matplotlib__matplotlib-26020",
    "django__django-16527",
    "pytest-dev__pytest-5227",
    "psf__requests-3362",
    "matplotlib__matplotlib-25311",
    "django__django-12700",
    "sympy__sympy-23117",
    "pylint-dev__pylint-7993",
    "django__django-11133",
    "scikit-learn__scikit-learn-13439",
    "django__django-13158",
    "sympy__sympy-21847",
    "django__django-16255",
    "django__django-15789",
    "django__django-15347",
    "pytest-dev__pytest-7432",
    "django__django-13710",
    "scikit-learn__scikit-learn-13496",
    "django__django-11099",
    "django__django-16379",
    "django__django-16041",
    "matplotlib__matplotlib-25442",
    "django__django-12286",
    "scikit-learn__scikit-learn-13142",
    "django__django-11179",
    "django__django-12453",
    "pylint-dev__pylint-5859",
    "sympy__sympy-13480",
    "mwaskom__seaborn-3190",
    "sympy__sympy-13471",
    "sympy__sympy-14774",
    "mwaskom__seaborn-3010",
    "sympy__sympy-12419",
    "django__django-14855",
    "scikit-learn__scikit-learn-12471",
    "pydata__xarray-5131"
  ],
  "turns_list": [
    7,
    6,
    4,
    10,
    7,
    5,
    9,
    5,
    24,
    14,
    9,
    9,
    5,
    4,
    9,
    4,
    22,
    20,
    4,
    24,
    9,
    5,
    7,
    8,
    8,
    15,
    4,
    12,
    4,
    11,
    5,
    28,
    3,
    12,
    4,
    7,
    9,
    15,
    5,
    15,
    9,
    20,
    4,
    19,
    19,
    4,
    4,
    4,
    11,
    4,
    11,
    6,
    3,
    10,
    15,
    7,
    13,
    4,
    5,
    8,
    5,
    21,
    10,
    4,
    6,
    10,
    3,
    21,
    6
  ],
  "stuck_in_loop_list": [
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0
  ],
  "tool_accuracy_list": [
    85.71,
    83.33,
    100.0,
    90.0,
    100.0,
    60.0,
    100.0,
    40.0,
    95.83,
    85.71,
    88.89,
    77.78,
    80.0,
    75.0,
    55.56,
    25.0,
    100.0,
    85.0,
    75.0,
    91.67,
    100.0,
    80.0,
    85.71,
    50.0,
    87.5,
    0.0,
    100.0,
    66.67,
    100.0,
    63.64,
    100.0,
    7.14,
    100.0,
    75.0,
    100.0,
    71.43,
    77.78,
    86.67,
    20.0,
    40.0,
    100.0,
    75.0,
    100.0,
    94.74,
    84.21,
    0.0,
    75.0,
    75.0,
    54.55,
    75.0,
    90.91,
    100.0,
    0.0,
    70.0,
    66.67,
    14.29,
    92.31,
    0.0,
    80.0,
    87.5,
    100.0,
    76.19,
    70.0,
    100.0,
    100.0,
    90.0,
    100.0,
    80.95,
    100.0
  ],
  "prompt_tokens_list": [
    28953,
    19936,
    8672,
    19248,
    11452,
    8975,
    17453,
    8440,
    113366,
    36230,
    83488,
    70192,
    41796,
    20377,
    25289,
    3639,
    126114,
    92620,
    14446,
    156150,
    22932,
    46487,
    27512,
    13065,
    14650,
    270982,
    28570,
    36499,
    16435,
    89493,
    8495,
    169578,
    4088,
    122057,
    6035,
    33622,
    47983,
    133602,
    15604,
    97837,
    32121,
    67546,
    22347,
    69041,
    124236,
    20818,
    25194,
    10080,
    62751,
    28093,
    68390,
    13513,
    4279,
    21762,
    55902,
    14191,
    72329,
    3649,
    24677,
    22354,
    5744,
    118939,
    24744,
    4429,
    24658,
    22187,
    3395,
    127763,
    26943
  ],
  "completion_tokens_list": [
    411,
    386,
    379,
    406,
    603,
    180,
    660,
    289,
    1728,
    705,
    759,
    600,
    835,
    439,
    1543,
    200,
    1455,
    1133,
    293,
    2683,
    677,
    424,
    743,
    1018,
    515,
    784,
    582,
    1079,
    227,
    940,
    352,
    1503,
    206,
    755,
    372,
    243,
    764,
    1713,
    186,
    1329,
    764,
    1549,
    334,
    1342,
    2453,
    222,
    288,
    347,
    597,
    383,
    3090,
    628,
    159,
    578,
    793,
    1015,
    1289,
    289,
    203,
    697,
    394,
    845,
    865,
    416,
    296,
    4460,
    179,
    1053,
    311
  ],
  "category_breakdown": {
    "empty_patch": {
      "instance_ids": [],
      "turns_list": [],
      "stuck_in_loop_list": [],
      "tool_accuracy_list": [],
      "prompt_tokens_list": [],
      "completion_tokens_list": [],
      "averages": {
        "avg_turns": 0,
        "avg_stuck_in_loop_rate": 0,
        "avg_tool_accuracy": 0,
        "avg_prompt_tokens": 0,
        "avg_completion_tokens": 0
      }
    },
    "resolved": {
      "instance_ids": [
        "matplotlib__matplotlib-23964",
        "scikit-learn__scikit-learn-15535",
        "django__django-16139",
        "scikit-learn__scikit-learn-13241",
        "django__django-11999",
        "django__django-13658",
        "matplotlib__matplotlib-23314",
        "matplotlib__matplotlib-24149",
        "django__django-10924",
        "scikit-learn__scikit-learn-13584",
        "scikit-learn__scikit-learn-15512",
        "django__django-10914",
        "django__django-13401",
        "django__django-11815",
        "sympy__sympy-18057",
        "django__django-14382",
        "sympy__sympy-15609",
        "pytest-dev__pytest-7373",
        "sphinx-doc__sphinx-8721",
        "django__django-12125",
        "sphinx-doc__sphinx-8713",
        "sympy__sympy-24152",
        "django__django-14787",
        "django__django-14016",
        "django__django-13230",
        "django__django-11049",
        "sympy__sympy-21055",
        "scikit-learn__scikit-learn-14894",
        "django__django-16595",
        "sympy__sympy-18532",
        "django__django-15851",
        "sympy__sympy-24213",
        "django__django-15814",
        "matplotlib__matplotlib-26020",
        "django__django-16527",
        "pytest-dev__pytest-5227",
        "psf__requests-3362",
        "matplotlib__matplotlib-25311",
        "django__django-12700",
        "sympy__sympy-23117",
        "pylint-dev__pylint-7993",
        "django__django-11133",
        "scikit-learn__scikit-learn-13439",
        "django__django-13158",
        "sympy__sympy-21847",
        "django__django-16255",
        "django__django-15789",
        "django__django-15347",
        "pytest-dev__pytest-7432",
        "django__django-13710",
        "scikit-learn__scikit-learn-13496",
        "django__django-11099",
        "django__django-16379",
        "django__django-16041",
        "matplotlib__matplotlib-25442",
        "django__django-12286",
        "scikit-learn__scikit-learn-13142",
        "django__django-11179",
        "django__django-12453",
        "pylint-dev__pylint-5859",
        "sympy__sympy-13480",
        "mwaskom__seaborn-3190",
        "sympy__sympy-13471",
        "sympy__sympy-14774",
        "mwaskom__seaborn-3010",
        "sympy__sympy-12419",
        "django__django-14855",
        "scikit-learn__scikit-learn-12471",
        "pydata__xarray-5131"
      ],
      "turns_list": [
        7,
        6,
        4,
        10,
        7,
        5,
        9,
        5,
        24,
        14,
        9,
        9,
        5,
        4,
        9,
        4,
        22,
        20,
        4,
        24,
        9,
        5,
        7,
        8,
        8,
        15,
        4,
        12,
        4,
        11,
        5,
        28,
        3,
        12,
        4,
        7,
        9,
        15,
        5,
        15,
        9,
        20,
        4,
        19,
        19,
        4,
        4,
        4,
        11,
        4,
        11,
        6,
        3,
        10,
        15,
        7,
        13,
        4,
        5,
        8,
        5,
        21,
        10,
        4,
        6,
        10,
        3,
        21,
        6
      ],
      "stuck_in_loop_list": [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0
      ],
      "tool_accuracy_list": [
        85.71,
        83.33,
        100.0,
        90.0,
        100.0,
        60.0,
        100.0,
        40.0,
        95.83,
        85.71,
        88.89,
        77.78,
        80.0,
        75.0,
        55.56,
        25.0,
        100.0,
        85.0,
        75.0,
        91.67,
        100.0,
        80.0,
        85.71,
        50.0,
        87.5,
        0.0,
        100.0,
        66.67,
        100.0,
        63.64,
        100.0,
        7.14,
        100.0,
        75.0,
        100.0,
        71.43,
        77.78,
        86.67,
        20.0,
        40.0,
        100.0,
        75.0,
        100.0,
        94.74,
        84.21,
        0.0,
        75.0,
        75.0,
        54.55,
        75.0,
        90.91,
        100.0,
        0.0,
        70.0,
        66.67,
        14.29,
        92.31,
        0.0,
        80.0,
        87.5,
        100.0,
        76.19,
        70.0,
        100.0,
        100.0,
        90.0,
        100.0,
        80.95,
        100.0
      ],
      "prompt_tokens_list": [
        28953,
        19936,
        8672,
        19248,
        11452,
        8975,
        17453,
        8440,
        113366,
        36230,
        83488,
        70192,
        41796,
        20377,
        25289,
        3639,
        126114,
        92620,
        14446,
        156150,
        22932,
        46487,
        27512,
        13065,
        14650,
        270982,
        28570,
        36499,
        16435,
        89493,
        8495,
        169578,
        4088,
        122057,
        6035,
        33622,
        47983,
        133602,
        15604,
        97837,
        32121,
        67546,
        22347,
        69041,
        124236,
        20818,
        25194,
        10080,
        62751,
        28093,
        68390,
        13513,
        4279,
        21762,
        55902,
        14191,
        72329,
        3649,
        24677,
        22354,
        5744,
        118939,
        24744,
        4429,
        24658,
        22187,
        3395,
        127763,
        26943
      ],
      "completion_tokens_list": [
        411,
        386,
        379,
        406,
        603,
        180,
        660,
        289,
        1728,
        705,
        759,
        600,
        835,
        439,
        1543,
        200,
        1455,
        1133,
        293,
        2683,
        677,
        424,
        743,
        1018,
        515,
        784,
        582,
        1079,
        227,
        940,
        352,
        1503,
        206,
        755,
        372,
        243,
        764,
        1713,
        186,
        1329,
        764,
        1549,
        334,
        1342,
        2453,
        222,
        288,
        347,
        597,
        383,
        3090,
        628,
        159,
        578,
        793,
        1015,
        1289,
        289,
        203,
        697,
        394,
        845,
        865,
        416,
        296,
        4460,
        179,
        1053,
        311
      ],
      "averages": {
        "avg_turns": 9.46,
        "avg_stuck_in_loop_rate": 0.0,
        "avg_tool_accuracy": 74.76,
        "avg_prompt_tokens": 46875.9,
        "avg_completion_tokens": 810.7
      }
    },
    "unresolved": {
      "instance_ids": [],
      "turns_list": [],
      "stuck_in_loop_list": [],
      "tool_accuracy_list": [],
      "prompt_tokens_list": [],
      "completion_tokens_list": [],
      "averages": {
        "avg_turns": 0,
        "avg_stuck_in_loop_rate": 0,
        "avg_tool_accuracy": 0,
        "avg_prompt_tokens": 0,
        "avg_completion_tokens": 0
      }
    },
    "other": {
      "instance_ids": [],
      "turns_list": [],
      "stuck_in_loop_list": [],
      "tool_accuracy_list": [],
      "prompt_tokens_list": [],
      "completion_tokens_list": [],
      "averages": {
        "avg_turns": 0,
        "avg_stuck_in_loop_rate": 0,
        "avg_tool_accuracy": 0,
        "avg_prompt_tokens": 0,
        "avg_completion_tokens": 0
      }
    }
  },
  "timestamp": "2025-07-29T14:42:42.088414",
  "intra_correctness_list": [
    37,
    37,
    31.75,
    57.3,
    42.142857142857146,
    41,
    47.55555555555556,
    35,
    32.541666666666664,
    40.642857142857146,
    55.888888888888886,
    49.888888888888886,
    51.8,
    48,
    44.666666666666664,
    46.75,
    42.68181818181818,
    41.1,
    55,
    37.916666666666664,
    48.111111111111114,
    49,
    38.42857142857143,
    45.75,
    44.25,
    31.733333333333334,
    61,
    38.333333333333336,
    44.25,
    51.09090909090909,
    44.4,
    43,
    52.333333333333336,
    28.416666666666668,
    38.5,
    35.285714285714285,
    29.444444444444443,
    31.6,
    15.4,
    57.4,
    45.77777777777778,
    33.45,
    26.75,
    44.473684210526315,
    37.26315789473684,
    25.5,
    65,
    39.25,
    48.27272727272727,
    18.75,
    32.45454545454545,
    50,
    59,
    44.5,
    40.733333333333334,
    57,
    41.69230769230769,
    48.5,
    45,
    24,
    40.4,
    25.428571428571427,
    32.7,
    80,
    40.833333333333336,
    35.2,
    49.666666666666664,
    33.904761904761905,
    37.666666666666664
  ],
  "inter_correctness_list": [
    56.42857142857143,
    54.166666666666664,
    77.5,
    63.5,
    51.42857142857143,
    87,
    47.22222222222222,
    35,
    75.625,
    60.714285714285715,
    78.33333333333333,
    76.11111111111111,
    79,
    67.5,
    55,
    72.5,
    76.81818181818181,
    81.5,
    67.5,
    64.375,
    81.11111111111111,
    50,
    45.714285714285715,
    55.625,
    68.125,
    83.66666666666667,
    92.5,
    69.58333333333333,
    58.75,
    63.18181818181818,
    66,
    83.39285714285714,
    78.33333333333333,
    59.166666666666664,
    82.5,
    67.14285714285714,
    58.888888888888886,
    69,
    59,
    72.66666666666667,
    75,
    55.5,
    80,
    65.26315789473684,
    66.57894736842105,
    80,
    82.5,
    80,
    72.72727272727273,
    58.75,
    73.63636363636364,
    86.66666666666667,
    91.66666666666667,
    82.5,
    70.33333333333333,
    65,
    73.46153846153847,
    80,
    81,
    63.75,
    75,
    63.80952380952381,
    44.5,
    90,
    65,
    42.5,
    60,
    67.14285714285714,
    53.333333333333336
  ],
  "informativeness_list": [
    69.28571428571429,
    53.333333333333336,
    45,
    58,
    70,
    68,
    52.22222222222222,
    89,
    57.916666666666664,
    61.785714285714285,
    59.44444444444444,
    68.33333333333333,
    60,
    89.25,
    72.77777777777777,
    80,
    51.59090909090909,
    58.75,
    85,
    65.83333333333333,
    49.44444444444444,
    70,
    65,
    80,
    48.125,
    51,
    63.75,
    67.5,
    87.5,
    53.63636363636363,
    64,
    36.785714285714285,
    91.66666666666667,
    52.916666666666664,
    46.25,
    62.857142857142854,
    62.77777777777778,
    57.666666666666664,
    66,
    49.666666666666664,
    68.88888888888889,
    69.5,
    62.5,
    55.26315789473684,
    45.26315789473684,
    45,
    68.75,
    87.5,
    70.45454545454545,
    45,
    63.63636363636363,
    51.666666666666664,
    63.333333333333336,
    67.5,
    42,
    72.85714285714286,
    47.69230769230769,
    87.5,
    68,
    46.25,
    78,
    50.476190476190474,
    64.5,
    87.5,
    60,
    65.5,
    60,
    48.57142857142857,
    67.5
  ]
}