dataset,action,llm,?A1=A2,?A1=A3+A4,?A1>A3,?A1>A4,?A3∅A4,?A4=A1|3,?A1=A1*,?A1=A1**,?A1*=A1**,J(A1-A2),J(A1-A34),J(A3-A4),J(A4-A1|3),J(A1-A1*),J(A1-A1**),J(A1*-A1**),?SC(A1=A2),?SC(A1>A3),?SC(A1>A4),?SC(A3∅A4),?SC(A4=A1|3),idk_A1,idk_A2,idk_A3,idk_A4,?A1=A1(ave),J_A1_ave,idk,?A1=A2(+),?A1=A2(-),J(1-2)+,J(1-2)-,?A1>A3(+),?A1>A3(-),?A1>A4(+),?A1>A4(-),?A3∅A4(+),?A3∅A4(-),J(3-4)+,J(3-4)-,?A1=A3+A4(+),?A1=A3+A4(-),J(1-34)+,J(1-34)-,p(A1=A2),p(A1=A3+A4),p(A1>A3),p(A1>A4),p(A3∅A4),p(A4=A1|3)
LC-QuAD,classification,deepseek-chat,0.9,0.84,1.0,0.9133,0.58,0.4733,0.44,0.4133,0.44,0.9437,0.9205,0.4136,0.5471,0.6532,0.6185,0.6179,0.8933,0.1,,,0.52,0.22,0.24,1.0,0.4333,0.4311,0.6299,0.473325,0.9280575539568345,0.5454545454545454,0.9597827338129498,0.740909090909091,1.0,1.0,,,,,,,0.8571428571428571,0.8391608391608392,0.9642857142857143,0.9183146853146854,0.0,0.0,0.0,0.0,0.0883,0.0
LC-QuAD,classification,deepseek-reasoner,0.6333,0.5933,1.0,0.6933,0.8133,0.4867,0.34,0.2533,0.2667,0.6676,0.7521,0.1663,0.5834,0.5584,0.5024,0.503,0.6533,0.92,,,0.54,0.1933,0.1,1.0,0.1733,0.2867,0.5213,0.36665,0.6546762589928058,0.36363636363636365,0.6765122302158274,0.5545454545454546,1.0,1.0,,,,,,,0.6056338028169014,0.375,0.7532640845070423,0.731975,0.0,0.0,0.0,0.0,0.8721,0.0
LC-QuAD,classification,gemini-2.0-flash,0.9267,0.8,1.0,0.9467,0.8267,0.6667,0.3067,0.3,0.3,0.9631,0.9319,0.1028,0.8089,0.6074,0.5533,0.5822,0.9067,0.44,,,0.4267,0.0867,0.08,1.0,0.1467,0.3022,0.581,0.32835000000000003,0.9370629370629371,0.7142857142857143,0.9695622377622378,0.8300428571428571,1.0,1.0,,,,,,,0.7291666666666666,0.8333333333333334,0.9050166666666667,0.9445382352941176,0.0,0.0,0.0,0.0,0.0001,0.0
LC-QuAD,classification,gemini-2.5-flash,0.9067,0.94,1.0,0.9533,0.9267,0.8933,0.2267,0.2333,0.2467,0.9517,0.9573,0.0706,0.8778,0.5995,0.5652,0.5749,0.9,0.9467,,,0.8933,0.0333,0.04,1.0,0.1,0.2356,0.5799,0.293325,0.9548872180451128,0.5294117647058824,0.972472932330827,0.7889176470588235,1.0,1.0,,,,,,,0.9583333333333334,0.5,0.9631645833333332,0.81655,0.0,0.0,0.0,0.0,0.0145,0.0
LC-QuAD,classification,gemini-2.5-pro,0.94,0.9,1.0,0.9667,0.9533,0.8667,0.3067,0.3,0.3333,0.9747,0.971,0.0467,0.9275,0.6358,0.653,0.6654,0.98,0.9867,,,0.8733,0.0533,0.0533,1.0,0.0733,0.3133,0.6514,0.294975,0.9859154929577465,0.125,0.9907246478873241,0.6906749999999999,1.0,1.0,,,,,,,0.9047619047619048,0.6666666666666666,0.9707931972789117,0.9833333333333334,0.0,0.0,0.0,0.0,0.0012,0.0
LC-QuAD,classification,gpt-4.1-2025-04-14,0.9267,0.88,1.0,0.98,0.9,0.7867,0.22,0.22,0.26,0.9728,0.9751,0.0941,0.8781,0.5262,0.5489,0.5743,0.92,0.9867,,,0.7867,0.06,0.06,1.0,0.1067,0.2333,0.5498,0.30667500000000003,0.9379310344827586,0.6,0.9755744827586208,0.89286,1.0,1.0,,,,,,,0.8835616438356164,0.75,0.9750664383561644,0.975,0.0,0.0,0.0,0.0,0.0,0.0
LC-QuAD,classification,gpt-4.1-mini-2025-04-14,0.88,0.8867,1.0,0.96,0.76,0.6467,0.3,0.2667,0.3,0.9439,0.9571,0.24,0.7256,0.5445,0.4839,0.4966,0.8933,0.3467,,,0.6533,0.1867,0.18,1.0,0.26,0.2889,0.5083,0.406675,0.9264705882352942,0.42857142857142855,0.9584014705882353,0.8028500000000002,1.0,1.0,,,,,,,0.9037037037037037,0.7333333333333333,0.960337037037037,0.9278800000000001,0.0,0.0,0.0,0.0,0.077,0.0
LC-QuAD,classification,gpt-4.1-nano-2025-04-14,0.9867,0.5133,1.0,0.5133,0.4267,0.0,0.9733,0.9467,0.9533,0.99,0.7011,0.5733,0.0,0.9767,0.9533,0.9567,0.72,0.1933,,,1.0,0.9867,0.9867,1.0,0.52,0.9578,0.9622,0.87335,0.9907407407407407,0.9761904761904762,0.9907407407407407,0.9880952380952381,1.0,1.0,,,,,,,,0.5133333333333333,,0.7011113333333333,0.0,0.0,0.0,0.0,1.0,1.0
LC-QuAD,classification,gpt-4o,0.9867,0.9733,1.0,0.9933,0.1933,0.1667,0.6867,0.7,0.7933,0.9867,0.9945,0.7964,0.1887,0.7525,0.7624,0.8349,0.96,0.3667,,,0.7533,0.6733,0.6867,1.0,0.78,0.7267,0.7833,0.7849999999999999,0.9863013698630136,1.0,0.9863013698630136,1.0,1.0,1.0,,,,,,,1.0,0.96875,1.0,0.9935546875000001,0.0,0.0,0.0,0.0,1.0,0.0
LC-QuAD,classification,gpt-5,0.9533,0.8533,1.0,0.94,0.72,0.5933,0.4867,0.4867,0.5133,0.9791,0.9101,0.28,0.6785,0.6941,0.647,0.7032,0.9333,0.9867,,,0.5933,0.28,0.2867,1.0,0.3467,0.4956,0.6814,0.47835,0.965034965034965,0.7142857142857143,0.9856643356643355,0.8452428571428571,1.0,1.0,,,,,,,0.8541666666666666,0.8333333333333334,0.9076854166666668,0.96855,0.0,0.0,0.0,0.0,0.9827,0.0
LC-QuAD,classification,gpt-5-mini,0.8867,0.68,1.0,0.78,0.5733,0.2667,0.6467,0.64,0.6333,0.9149,0.783,0.4267,0.3677,0.7471,0.7569,0.7631,0.8467,0.96,,,0.2733,0.5533,0.5067,1.0,0.5,0.64,0.7557,0.64,0.8873239436619719,0.875,0.9171619718309859,0.875,1.0,1.0,,,,,,,0.6689655172413793,1.0,0.775475172413793,1.0,0.0,0.0,0.0,0.0093,0.3555,0.0
LC-QuAD,classification,gpt-5-nano,0.7933,0.7067,1.0,0.78,0.18,0.0067,0.0733,0.64,0.0733,0.7933,0.7768,0.82,0.08,0.0778,0.64,0.0733,0.84,0.8533,,,0.0867,0.1667,0.1333,1.0,0.0267,0.2622,0.2637,0.331675,0.8368794326241135,0.1111111111111111,0.8368794326241135,0.1111111111111111,1.0,1.0,,,,,,,0.7391304347826086,0.3333333333333333,0.7971985507246376,0.5416666666666666,0.0004,0.0,0.0,0.0018,1.0,1.0
LC-QuAD,classification,gpt-oss:20b,0.6933,0.46,1.0,0.7467,0.86,0.3467,0.2067,0.2267,0.22,0.7865,0.7544,0.1127,0.6272,0.4128,0.4186,0.4039,0.6867,0.8933,,,0.3867,0.1933,0.1467,1.0,0.2333,0.2178,0.4118,0.393325,0.6938775510204082,0.6666666666666666,0.7825789115646258,0.9798,1.0,1.0,,,,,,,0.4513888888888889,0.6666666666666666,0.7493729166666667,0.875,0.0,0.0,0.0,0.0,0.4225,0.0
LC-QuAD,classification,grok-3-mini,0.9467,0.82,1.0,0.9067,0.8333,0.68,0.2933,0.2867,0.3133,0.9723,0.9395,0.1601,0.7737,0.5847,0.5748,0.5916,0.92,0.94,,,0.7,0.14,0.14,1.0,0.1867,0.2978,0.5837,0.36667500000000003,0.9642857142857143,0.7,0.9826571428571429,0.82667,1.0,1.0,,,,,,,0.8163265306122449,1.0,0.9383047619047621,1.0,0.0,0.0,0.0,0.0,0.6962,0.0
LC-QuAD,classification,llama3.1:70b,0.78,0.6267,1.0,0.8867,0.9467,0.6133,0.1667,0.1267,0.1267,0.899,0.897,0.0242,0.8313,0.4143,0.3793,0.3859,0.8,0.0467,,,0.5533,0.0133,0.0467,1.0,0.02,0.14,0.3932,0.27,0.8,0.2,0.917433103448276,0.36334,1.0,1.0,,,,,,,0.6355140186915887,0.6046511627906976,0.8988551401869159,0.8925069767441862,0.0,0.0,0.0,0.0,0.0,0.0
LC-QuAD,classification,llama3.1:8b,0.1067,0.0733,1.0,0.2667,0.6667,0.02,0.0133,0.0133,0.0133,0.5162,0.4449,0.1021,0.342,0.2071,0.1868,0.1681,0.8533,0.2,,,0.9067,0.0,0.0,1.0,0.0,0.0133,0.1873,0.25,0.2,0.1,0.7313599999999999,0.5008699999999999,1.0,1.0,,,,,,,0.23076923076923078,0.058394160583941604,0.5037,0.43929781021897807,0.0003,0.0005,0.0,0.0,1.0,0.125
LC-QuAD,classification,mistral-small:24b,0.8333,0.5267,1.0,0.7867,0.8067,0.38,0.2933,0.2067,0.24,0.9206,0.811,0.1662,0.6194,0.5354,0.4164,0.454,0.8733,0.0,,,0.6067,0.12,0.1267,1.0,0.08,0.2467,0.4686,0.331675,0.8785714285714286,0.2,0.9444364285714284,0.58762,,1.0,,,,,,,0.4166666666666667,0.5362318840579711,0.7351166666666668,0.8175681159420289,0.0,0.0,0.0,0.0,0.0,0.0
LC-QuAD,classification,o3,0.8,0.82,1.0,0.92,0.92,0.7533,0.24,0.2933,0.26,0.9221,0.9227,0.0736,0.8414,0.5203,0.6088,0.5457,0.8267,0.96,,,0.7533,0.1333,0.1067,1.0,0.0867,0.2644,0.5583,0.331675,0.8263888888888888,0.16666666666666666,0.9360666666666666,0.5858666666666666,1.0,1.0,,,,,,,0.8287671232876712,0.5,0.9293465753424657,0.681,0.0,0.0,0.0,0.0,0.4018,0.0
LC-QuAD,fixing,deepseek-chat,0.7533,0.6733,0.86,0.92,0.9133,0.5867,0.48,0.5,0.5067,0.8787,0.9023,0.0774,0.7921,0.7292,0.7391,0.7598,,,,,,0.0733,0.08,0.16,0.1067,0.4956,0.7427,0.10500000000000001,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
LC-QuAD,fixing,deepseek-reasoner,0.7467,0.7333,0.8867,0.92,0.9533,0.7,0.2533,0.2133,0.2067,0.8629,0.8984,0.0357,0.8266,0.5087,0.4976,0.5011,,,,,,0.0667,0.0467,0.0933,0.1,0.2244,0.5025,0.076675,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.002,0.0
LC-QuAD,fixing,gemini-2.0-flash,0.78,0.7467,0.8,0.9133,0.96,0.7267,0.74,0.7533,0.7067,0.8033,0.905,0.0207,0.8501,0.8415,0.8587,0.8225,,,,,,0.0133,0.0067,0.0267,0.0733,0.7333,0.8409,0.030000000000000002,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
LC-QuAD,fixing,gemini-2.5-flash,0.9333,0.8933,0.9333,0.92,0.9533,0.8667,0.6133,0.5867,0.6133,0.9594,0.9124,0.0422,0.8381,0.7125,0.7154,0.7205,,,,,,0.06,0.0333,0.08,0.1,0.6044,0.7161,0.068325,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0012,0.0
LC-QuAD,fixing,gemini-2.5-pro,0.8467,0.8533,0.8733,0.96,0.98,0.84,0.46,0.5,0.4733,0.9129,0.9283,0.02,0.8819,0.7211,0.7348,0.7272,,,,,,0.06,0.02,0.02,0.08,0.4778,0.7277,0.045,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
LC-QuAD,fixing,gpt-4.1-2025-04-14,0.7467,0.7467,0.9067,0.9333,0.9467,0.72,0.2667,0.3,0.28,0.8879,0.912,0.0262,0.8538,0.5491,0.564,0.5545,,,,,,0.06,0.06,0.0867,0.0933,0.2822,0.5559,0.075,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
LC-QuAD,fixing,gpt-4.1-mini-2025-04-14,0.7133,0.6867,0.8733,0.9267,0.8733,0.6467,0.2,0.2067,0.2267,0.8546,0.8392,0.095,0.7683,0.4791,0.4949,0.4897,,,,,,0.0733,0.0667,0.1867,0.14,0.2111,0.4879,0.116675,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
LC-QuAD,fixing,gpt-4.1-nano-2025-04-14,0.4,0.2467,0.5467,0.5733,0.66,0.2,0.1533,0.14,0.1733,0.5398,0.479,0.2178,0.3423,0.2979,0.2723,0.2998,,,,,,0.1733,0.2067,0.3933,0.2,0.1555,0.29,0.243325,,,,,,,,,,,,,,,,,0.0,0.0002,0.0001,0.0,0.6482,0.0
LC-QuAD,fixing,gpt-4o,0.8067,0.78,0.88,0.9333,0.6133,0.4267,0.5,0.5333,0.4933,0.8748,0.8765,0.3746,0.5192,0.6382,0.6597,0.6358,,,,,,0.4267,0.4133,0.54,0.4667,0.5089,0.6446,0.46167500000000006,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0519,0.0
LC-QuAD,fixing,gpt-5,0.9133,0.8133,0.9,0.9733,0.8333,0.6467,0.58,0.6333,0.58,0.9735,0.883,0.1667,0.7202,0.7525,0.7685,0.7207,,,,,,0.36,0.3533,0.3467,0.3933,0.5978,0.7472,0.363325,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0758,0.0
LC-QuAD,fixing,gpt-5-mini,0.8933,0.74,0.9067,0.94,0.64,0.4067,0.7,0.7,0.7133,0.9251,0.839,0.36,0.4909,0.7767,0.7796,0.8016,,,,,,0.6,0.6,0.6067,0.5867,0.7044,0.786,0.5983499999999999,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0266,0.0
LC-QuAD,fixing,gpt-5-nano,0.8533,0.76,0.82,0.96,0.6733,0.4467,0.68,0.6333,0.6467,0.9023,0.8544,0.3157,0.4926,0.767,0.7298,0.7442,,,,,,0.5533,0.5,0.4867,0.5933,0.6533,0.747,0.533325,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0158,0.0
LC-QuAD,fixing,gpt-oss:20b,0.5,0.6133,0.7933,0.94,0.9133,0.5533,0.1733,0.22,0.18,0.6872,0.8242,0.0785,0.7085,0.3697,0.4278,0.3827,,,,,,0.1733,0.1267,0.2467,0.2867,0.1911,0.3934,0.20835000000000004,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.032,0.0
LC-QuAD,fixing,grok-3-mini,0.8867,0.8733,0.94,0.98,0.9333,0.8067,0.3333,0.3467,0.3533,0.9336,0.9399,0.0604,0.7685,0.5558,0.5593,0.588,,,,,,0.1533,0.14,0.1533,0.2267,0.3444,0.5677,0.168325,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0096,0.0
LC-QuAD,fixing,llama3.1:70b,0.5933,0.44,0.6933,0.8133,0.9133,0.4133,0.1533,0.14,0.1533,0.7596,0.7712,0.0517,0.7427,0.3918,0.3958,0.3976,,,,,,0.02,0.04,0.12,0.0533,0.1489,0.3951,0.058325,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
LC-QuAD,fixing,llama3.1:8b,0.0867,0.0067,0.0867,0.0733,0.6267,0.0,0.0,0.0067,0.0067,0.3944,0.2869,0.1198,0.2028,0.0965,0.0989,0.1014,,,,,,0.0,0.0,0.0133,0.0,0.0045,0.0989,0.003325,,,,,,,,,,,,,,,,,0.0005,0.5,0.0009,0.0005,1.0,1.0
LC-QuAD,fixing,mistral-small:24b,0.68,0.5333,0.7267,0.8933,0.9733,0.52,0.48,0.5,0.52,0.8081,0.7404,0.001,0.6622,0.7423,0.7325,0.7523,,,,,,0.2533,0.18,0.2533,0.1667,0.5,0.7424,0.21332500000000001,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
LC-QuAD,fixing,o3,0.6733,0.7733,0.9267,0.9867,0.96,0.7333,0.2933,0.26,0.3333,0.8884,0.929,0.04,0.8766,0.5632,0.5703,0.5964,,,,,,0.1467,0.12,0.1333,0.1533,0.2955,0.5766,0.138325,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0384,0.0
LC-QuAD,wikidata,gpt-4o,0.9527,0.9189,0.9189,0.9662,0.0541,0.0,,,,0.9527,0.9386,0.9459,0.0473,,,,,,,,,0.973,0.9797,0.9459,0.9932,,,0.97295,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,1.0,1.0
LC-QuAD,wikidata,gpt-5,0.6689,0.4662,0.6419,0.7162,0.7027,0.1892,,,,0.7864,0.6843,0.2913,0.4566,,,,,,,,,0.4865,0.4797,0.3446,0.473,,,0.44594999999999996,,,,,,,,,,,,,,,,,0.0113,0.0,0.0218,0.0,0.9978,0.0113
LC-QuAD,wikidata,gpt-5-mini,0.7635,0.5811,0.7162,0.7432,0.4459,0.0946,,,,0.8284,0.6998,0.5466,0.2619,,,,,,,,,0.6959,0.7095,0.6014,0.7297,,,0.6841250000000001,,,,,,,,,,,,,,,,,0.008,0.0158,0.092,0.0631,0.9988,0.5982
LC-QuAD,wikidata,gpt-5-nano,0.7838,0.5338,0.6959,0.7095,0.5135,0.1014,,,,0.8244,0.6769,0.4664,0.2815,,,,,,,,,0.6959,0.6689,0.5541,0.6689,,,0.64695,,,,,,,,,,,,,,,,,0.0,0.0122,0.1055,0.0244,0.9622,0.5
LC-QuAD,wikidata,gpt-oss:20b,0.2533,0.0933,0.3267,0.3,0.84,0.0333,,,,0.4325,0.3526,0.0957,0.237,,,,,,,,,0.1733,0.18,0.1333,0.2333,,,0.179975,,,,,,,,,,,,,,,,,0.0147,0.8204,0.1744,0.4373,0.6612,0.9539
LC-QuAD,wikidata,grok-3-mini,0.2703,0.2027,0.5135,0.3514,0.8446,0.1486,,,,0.5197,0.4693,0.0742,0.3305,,,,,,,,,0.0676,0.0878,0.1149,0.1554,,,0.10642499999999999,,,,,,,,,,,,,,,,,0.6445,0.5,0.0607,0.5627,0.5775,0.7728
LC-QuAD,wikidata,llama3.1:70b,0.14,0.0533,0.26,0.1933,0.68,0.0067,,,,0.4122,0.3464,0.1159,0.1895,,,,,,,,,0.0133,0.02,0.02,0.0533,,,0.02665,,,,,,,,,,,,,,,,,0.2517,0.6047,0.4402,0.5,0.8642,0.75
LC-QuAD,wikidata,llama3.1:8b,0.02,0.0,0.0667,0.0467,0.7133,0.0,,,,0.1848,0.1558,0.0576,0.101,,,,,,,,,0.0067,0.0067,0.0,0.0067,,,0.005025,,,,,,,,,,,,,,,,,0.5,1.0,0.0059,0.0078,1.0,1.0
LC-QuAD,wikidata,mistral-small:24b,0.7027,0.5473,0.6622,0.6486,0.3041,0.0068,,,,0.7489,0.6474,0.6606,0.0947,,,,,,,,,0.6622,0.6892,0.7027,0.7635,,,0.7044,,,,,,,,,,,,,,,,,0.0,0.0,0.0001,0.0001,1.0,0.5
LC-QuAD,wikidata,o3,0.3699,0.2329,0.4863,0.3973,0.9384,0.1986,,,,0.6854,0.5514,0.0416,0.4875,,,,,,,,,0.1507,0.1644,0.1027,0.1575,,,0.143825,,,,,,,,,,,,,,,,,0.0494,0.0175,0.4357,0.0048,0.1509,0.0019
LC-QuAD,zero-shot,deepseek-chat,0.3,0.1533,0.4333,0.3133,0.4933,0.0267,0.2905,,,0.6026,0.4925,0.2264,0.2699,0.5753,,,0.3467,0.5333,0.6,0.48,0.1467,0.0933,0.0933,0.1267,0.1333,0.2905,0.5753,0.11165,0.30935251798561153,0.18181818181818182,0.620326618705036,0.3789,0.4489795918367347,0.42574257425742573,0.2903225806451613,0.31932773109243695,0.4864864864864865,1.0,0.22948648648648648,0.0,0.1590909090909091,0.1111111111111111,0.5118772727272728,0.3504833333333333,1.0,1.0,1.0,1.0,1.0,1.0
LC-QuAD,zero-shot,deepseek-reasoner,0.14,0.1067,0.38,0.2667,0.8533,0.0733,,,,0.4725,0.3936,0.0436,0.3008,,,,0.2133,0.4,0.2867,0.82,0.1533,0.0267,0.04,0.0467,0.0333,,,0.036675,0.1510791366906475,0.0,0.4915870503597123,0.2311,0.3877551020408163,0.0,0.272108843537415,0.0,0.8482758620689655,1.0,0.0451503448275862,0.0,0.11594202898550725,0.0,0.41740289855072454,0.11935833333333333,1.0,1.0,1.0,1.0,1.0,1.0
LC-QuAD,zero-shot,gemini-2.0-flash,0.24,0.08,0.3533,0.28,0.64,0.0333,0.7297,,,0.522,0.392,0.1275,0.2705,0.8421,,,0.26,0.4733,0.6733,0.6133,0.2067,0.0067,0.0,0.0067,0.0067,0.7297,0.8421,0.005025,0.23404255319148937,0.3333333333333333,0.5195716312056738,0.5596222222222222,0.37735849056603776,0.29545454545454547,0.40540540540540543,0.23893805309734514,0.6357142857142857,0.7,0.12822285714285714,0.11789000000000001,0.08333333333333333,0.06666666666666667,0.40512750000000003,0.3393666666666667,1.0,1.0,1.0,1.0,1.0,1.0
LC-QuAD,zero-shot,gemini-2.5-flash,0.2733,0.1667,0.4667,0.28,0.8467,0.1533,0.6014,,,0.5536,0.4671,0.039,0.3603,0.7389,,,0.34,0.48,0.3,0.82,0.18,0.0467,0.02,0.0667,0.0267,0.6014,0.7389,0.040025,0.2835820895522388,0.1875,0.5590022388059701,0.5081875,0.4722222222222222,0.3333333333333333,0.2827586206896552,0.2,0.8472222222222222,0.8333333333333334,0.03368541666666666,0.16666666666666666,0.17123287671232876,0.0,0.4730486301369863,0.2501,1.0,1.0,1.0,1.0,1.0,1.0
LC-QuAD,zero-shot,gemini-2.5-pro,0.24,0.1733,0.4067,0.34,0.8467,0.14,0.4459,,,0.5941,0.5123,0.0378,0.4193,0.7187,,,0.2867,0.4133,0.32,0.84,0.1733,0.0333,0.0133,0.0533,0.0333,0.4459,0.7187,0.0333,0.2517482517482518,0.0,0.6035272727272727,0.40135714285714286,0.40939597315436244,0.0,0.32413793103448274,0.8,0.8456375838926175,1.0,0.03809328859060403,0.0,0.1793103448275862,0.0,0.5270910344827586,0.08316,1.0,1.0,1.0,1.0,1.0,1.0
LC-QuAD,zero-shot,gpt-4.1-2025-04-14,0.2333,0.0867,0.3867,0.2467,0.6733,0.0533,0.2905,,,0.5444,0.4429,0.1049,0.294,0.5729,,,0.2667,0.3933,0.42,0.68,0.0533,0.0467,0.0533,0.06,0.0533,0.2905,0.5729,0.053325,0.2413793103448276,0.0,0.5543896551724139,0.25576,0.3877551020408163,0.3333333333333333,0.2807017543859649,0.1388888888888889,0.6778523489932886,0.0,0.09888120805369129,1.0,0.08666666666666667,,0.44291533333333327,,1.0,1.0,1.0,1.0,1.0,1.0
LC-QuAD,zero-shot,gpt-4.1-mini-2025-04-14,0.1867,0.0733,0.3733,0.2467,0.6733,0.0267,0.1824,,,0.4461,0.3574,0.12,0.2014,0.4483,,,0.24,0.5467,0.6067,0.6533,0.08,0.0733,0.0667,0.1467,0.18,0.1824,0.4483,0.116675,0.18382352941176472,0.21428571428571427,0.45387647058823527,0.3701785714285714,0.38461538461538464,0.3673469387755102,0.21052631578947367,0.25892857142857145,0.6666666666666666,1.0,0.12248027210884352,0.0,0.07746478873239436,0.0,0.3691978873239437,0.1488,1.0,1.0,1.0,1.0,1.0,1.0
LC-QuAD,zero-shot,gpt-4.1-nano-2025-04-14,0.18,0.12,0.3533,0.2733,0.6733,0.04,0.2027,,,0.3369,0.2433,0.194,0.1084,0.3525,,,0.3533,0.54,0.6,0.44,0.96,0.16,0.1933,0.3467,0.3333,0.2027,0.3525,0.25832499999999997,0.16981132075471697,0.20454545454545456,0.35054150943396223,0.30393636363636367,0.34,0.36,0.3333333333333333,0.23655913978494625,0.6075949367088608,0.7464788732394366,0.23211898734177214,0.15164225352112676,,0.12,,0.24327933333333335,1.0,1.0,1.0,1.0,1.0,1.0
LC-QuAD,zero-shot,gpt-4o,0.4333,0.26,0.52,0.46,0.5333,0.0133,0.3581,,,0.6143,0.5001,0.3804,0.1962,0.5562,,,0.4267,0.4667,0.5467,0.5467,0.38,0.3667,0.3733,0.42,0.48,0.3581,0.5562,0.41,0.42758620689655175,0.6,0.6131179310344826,0.64722,0.47058823529411764,0.5344827586206896,0.5217391304347826,0.44881889763779526,0.5405405405405406,0.0,0.3720364864864865,1.0,0.22105263157894736,0.32727272727272727,0.48307894736842105,0.5295345454545455,1.0,1.0,1.0,1.0,1.0,1.0
LC-QuAD,zero-shot,gpt-5,0.5667,0.3,0.5533,0.54,0.78,0.1133,0.5811,,,0.7586,0.5924,0.2037,0.4188,0.7469,,,0.58,0.5467,0.5333,0.8,0.16,0.3467,0.3533,0.2267,0.3133,0.5811,0.7469,0.31,0.5774647887323944,0.375,0.7664936619718309,0.61785,0.5510204081632653,0.6666666666666666,0.5369127516778524,1.0,0.7959183673469388,0.0,0.18843129251700677,0.9523666666666667,0.2867132867132867,0.5714285714285714,0.5924419580419581,0.5918428571428571,1.0,1.0,1.0,1.0,1.0,1.0
LC-QuAD,zero-shot,gpt-5-mini,0.6467,0.4733,0.6467,0.6667,0.5533,0.0933,0.7027,,,0.7288,0.6537,0.4391,0.3288,0.7981,,,0.6267,0.6333,0.66,0.58,0.1533,0.5467,0.5533,0.5133,0.6067,0.7027,0.7981,0.555,0.6453900709219859,0.6666666666666666,0.723895744680851,0.8059666666666667,0.6458333333333334,0.6666666666666666,0.6666666666666666,0.6666666666666666,0.5694444444444444,0.16666666666666666,0.4243826388888889,0.7916666666666666,0.45390070921985815,0.7777777777777778,0.6386602836879433,0.8888888888888888,1.0,1.0,1.0,1.0,1.0,1.0
LC-QuAD,zero-shot,gpt-5-nano,0.6133,0.4333,0.6267,0.6133,0.5733,0.0933,0.5608,,,0.7049,0.5967,0.4037,0.2769,0.686,,,0.6133,0.5867,0.6267,0.6,0.1733,0.5333,0.5733,0.5267,0.5667,0.5608,0.686,0.55,0.6197183098591549,0.5,0.7146478873239436,0.53125,0.626984126984127,0.625,0.6304347826086957,0.4166666666666667,0.5955882352941176,0.35714285714285715,0.38522720588235293,0.5833357142857143,0.42028985507246375,0.5833333333333334,0.5946760869565217,0.6203666666666666,1.0,1.0,1.0,1.0,1.0,1.0
LC-QuAD,zero-shot,gpt-oss:20b,0.1667,0.1133,0.28,0.2867,0.8467,0.0667,0.1824,,,0.3719,0.3289,0.088,0.2483,0.3995,,,0.18,0.3067,0.2933,0.8533,0.1333,0.12,0.1467,0.1267,0.1467,0.1824,0.3995,0.135025,0.1643835616438356,0.25,0.3709952054794521,0.40475,0.2753623188405797,0.3333333333333333,0.2765957446808511,0.4444444444444444,0.8832116788321168,0.46153846153846156,0.06192992700729926,0.36237692307692304,0.10714285714285714,0.2,0.32049285714285713,0.44603000000000004,1.0,1.0,1.0,1.0,1.0,1.0
LC-QuAD,zero-shot,grok-3-mini,0.2733,0.1933,0.44,0.3467,0.8467,0.16,,,,0.5756,0.4669,0.081,0.3644,,,,0.3,0.44,0.3667,0.8533,0.1933,0.0733,0.0933,0.0867,0.12,,,0.09332499999999999,0.2777777777777778,0.16666666666666666,0.5870770833333334,0.30058333333333337,0.43661971830985913,0.5,0.35172413793103446,0.2,0.8571428571428571,0.3333333333333333,0.07418299319727892,0.4166666666666667,0.2,0.0,0.4789234482758621,0.11882,1.0,1.0,1.0,1.0,1.0,1.0
LC-QuAD,zero-shot,llama3.1:70b,0.1133,0.0533,0.2467,0.1867,0.7267,0.0067,,,,0.3887,0.2973,0.095,0.1847,,,,0.1333,0.74,0.6933,0.7267,0.02,0.04,0.04,0.0467,0.0667,,,0.048350000000000004,0.1103448275862069,0.2,0.3916855172413793,0.30204,0.4,0.2357142857142857,0.2631578947368421,0.16071428571428573,0.7297297297297297,0.5,0.09355202702702703,0.2,0.0472972972972973,0.5,0.2945635135135135,0.5,1.0,1.0,1.0,1.0,1.0,1.0
LC-QuAD,zero-shot,llama3.1:8b,0.0133,0.0,0.0067,0.0,0.9067,0.0,0.0135,,,0.1069,0.0816,0.0142,0.0427,0.1361,,,0.9533,0.8667,0.9133,0.2533,0.9733,0.0133,0.0,0.0133,0.0267,0.0135,0.1361,0.013325,0.14285714285714285,0.006993006993006993,0.18367142857142857,0.10311888111888111,0.0,0.007633587786259542,0.0,0.0,0.9615384615384616,0.8951612903225806,0.02564230769230769,0.011784677419354838,0.0,0.0,0.15835,0.07952191780821917,1.0,1.0,1.0,1.0,1.0,1.0
LC-QuAD,zero-shot,mistral-small:24b,0.3986,0.2365,0.4662,0.4257,0.5135,0.0,0.5135,,,0.5949,0.5091,0.3764,0.169,0.6899,,,0.4122,0.5338,0.5743,0.5068,0.9324,0.2703,0.2568,0.4257,0.4324,0.5135,0.6899,0.34629999999999994,0.39855072463768115,0.4,0.5979521739130436,0.5527200000000001,,0.46621621621621623,0.5,0.4246575342465753,0.5102040816326531,1.0,0.3789877551020408,0.0,0.4,0.2246376811594203,0.6266700000000001,0.5005876811594202,1.0,1.0,1.0,1.0,1.0,1.0
LC-QuAD,zero-shot,o3,0.3041,0.1486,0.4797,0.277,0.9054,0.0946,0.25,,,0.5609,0.4804,0.0539,0.3565,0.5506,,,0.3176,0.4595,0.277,0.8851,0.1554,0.1284,0.1216,0.1014,0.0811,0.25,0.5506,0.108125,0.3055555555555556,0.25,0.5643520833333332,0.437025,0.46853146853146854,0.8,0.273972602739726,0.5,0.9090909090909091,0.8,0.04977692307692308,0.17142,0.1510791366906475,0.1111111111111111,0.4906640287769784,0.3214666666666667,1.0,1.0,1.0,1.0,1.0,1.0
qawiki,classification,deepseek-chat,0.98,0.8467,0.9,0.9333,0.56,0.4533,0.5133,0.5133,0.5133,0.9925,0.9582,0.4117,0.5403,0.6638,0.6377,0.6588,,,,,,0.3667,0.3667,0.4,0.4333,0.5133,0.6534,0.391675,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.8555,0.0
qawiki,classification,deepseek-reasoner,0.8933,0.8667,0.56,0.9133,0.4133,0.3067,0.52,0.4467,0.6,0.9267,0.9058,0.5867,0.3467,0.6656,0.531,0.6823,,,,,,0.2667,0.22,0.48,0.6267,0.5222,0.6263,0.39835,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,1.0,0.0
qawiki,classification,gemini-2.0-flash,0.8667,0.8867,1.0,0.9533,0.3533,0.2933,0.4667,0.4333,0.3733,0.9471,0.9502,0.5642,0.3982,0.6483,0.6061,0.5276,0.8733,0.3733,,,0.2933,0.3067,0.3,1.0,0.5467,0.4244,0.594,0.53835,0.87248322147651,0.0,0.9492456375838925,0.625,1.0,1.0,,,,,,,0.8851351351351351,1.0,0.949497972972973,1.0,0.0,0.0,0.0,0.0,1.0,0.0
qawiki,classification,gemini-2.5-flash,0.8933,0.9467,1.0,0.98,0.8267,0.7733,0.26,0.3267,0.2467,0.9404,0.9787,0.1678,0.7438,0.5804,0.5794,0.5752,0.8333,0.9467,,,0.8,0.12,0.12,1.0,0.2467,0.2778,0.5783,0.371675,0.9504132231404959,0.6551724137931034,0.9550727272727273,0.8790793103448277,1.0,1.0,,,,,,,0.9652777777777778,0.5,0.9810659722222224,0.9222333333333333,0.0,0.0,0.0,0.0,0.6821,0.0
qawiki,classification,gemini-2.5-pro,0.9,0.72,1.0,0.8467,0.88,0.62,0.3,0.2333,0.28,0.9486,0.8374,0.11,0.7094,0.5939,0.499,0.5359,0.9533,0.9733,,,0.6733,0.1067,0.0933,1.0,0.22,0.2711,0.5429,0.355,0.9848484848484849,0.2777777777777778,0.9848484848484849,0.6829444444444444,1.0,1.0,,,,,,,0.7571428571428571,0.2,0.8624642857142858,0.48690999999999995,0.0,0.0,0.0,0.0,0.0144,0.0
qawiki,classification,gpt-4.1-2025-04-14,0.92,0.8867,0.4667,0.98,0.7933,0.68,0.2667,0.2667,0.2867,0.9874,0.9681,0.2003,0.7671,0.5831,0.569,0.5555,0.9133,0.4667,,,0.6867,0.0933,0.0933,0.1533,0.2067,0.2734,0.5692,0.13665,0.9432624113475178,0.5555555555555556,0.9937127659574467,0.8890333333333333,0.46621621621621623,0.5,,,,,,,0.8843537414965986,1.0,0.9674442176870747,1.0,0.0,0.0,0.0871,0.0,0.0015,0.0
qawiki,classification,gpt-4.1-mini-2025-04-14,0.9067,0.8733,0.4267,0.9533,0.6667,0.5467,0.3267,0.3733,0.3333,0.9635,0.9346,0.3269,0.6176,0.5754,0.5618,0.5564,0.9333,0.56,,,0.52,0.2333,0.2333,0.2133,0.3533,0.3444,0.5645,0.2583,0.9565217391304348,0.3333333333333333,0.9821123188405797,0.749025,0.4666666666666667,0.4166666666666667,,,,,,,0.8785714285714286,0.8,0.9374614285714286,0.8949999999999999,0.0,0.0,0.7159,0.0,0.0465,0.0
qawiki,classification,gpt-4.1-nano-2025-04-14,0.9667,0.54,0.68,0.54,0.3867,0.0,0.94,0.9533,0.9333,0.9667,0.6912,0.6075,0.0,0.94,0.9533,0.9333,0.62,0.3733,,,1.0,0.98,0.9867,0.6867,0.54,0.9422,0.9422,0.79835,0.9489795918367347,1.0,0.9489795918367347,1.0,0.6052631578947368,0.7053571428571429,,,,,,,,0.54,,0.691204,0.0,0.0,0.0001,0.0003,1.0,1.0
qawiki,classification,gpt-4o,0.98,0.9333,1.0,0.9867,0.2733,0.2133,0.74,0.0933,0.14,0.2828,0.9743,0.7203,0.2614,0.1983,0.1749,0.2079,0.9867,0.72,,,0.7467,0.7067,0.7133,1.0,0.7333,0.3244,0.1937,0.7883249999999999,0.9931972789115646,0.3333333333333333,0.2834469387755102,0.25,1.0,1.0,,,,,,,0.9166666666666666,0.9411764705882353,0.9787041666666667,0.9722803921568628,0.0,0.0,0.0,0.0,1.0,0.0
qawiki,classification,gpt-5,0.9133,0.9267,1.0,0.9733,0.52,0.4467,0.5267,0.6067,0.6067,0.9636,0.9644,0.48,0.4733,0.7286,0.7324,0.746,0.88,0.98,,,0.48,0.4133,0.4067,1.0,0.5333,0.58,0.7357,0.588325,0.9473684210526315,0.6470588235294118,0.9752368421052632,0.8727058823529412,1.0,1.0,,,,,,,0.9300699300699301,0.8571428571428571,0.9633384615384617,0.9863999999999999,0.0,0.0,0.0,0.0,0.9739,0.0
qawiki,classification,gpt-5-mini,0.86,0.66,1.0,0.78,0.6267,0.32,0.52,0.5267,0.5133,0.9295,0.7907,0.3733,0.4344,0.6695,0.6549,0.6581,0.8267,0.9733,,,0.3733,0.42,0.4267,1.0,0.48,0.52,0.6608,0.5816749999999999,0.8872180451127819,0.6470588235294118,0.9485684210526316,0.7803941176470588,1.0,1.0,,,,,,,0.6619718309859155,0.625,0.7933239436619718,0.74375,0.0003,0.1302,0.0,0.3359,0.0113,0.0
qawiki,classification,gpt-5-nano,0.7867,0.7067,1.0,0.7733,0.16,0.0067,0.08,0.5467,0.0667,0.7893,0.7687,0.8356,0.0694,0.08,0.5467,0.0667,0.7667,0.9267,,,0.06,0.1133,0.1,1.0,0.0333,0.2311,0.2311,0.31165000000000004,0.8120300751879699,0.5882352941176471,0.8120300751879699,0.611764705882353,1.0,1.0,,,,,,,0.7323943661971831,0.25,0.792019014084507,0.3541625,0.0,0.0,0.0,0.0775,1.0,1.0
qawiki,classification,gpt-oss:20b,0.7133,0.5467,1.0,0.7267,0.7867,0.3667,0.2933,0.2333,0.2667,0.8163,0.7847,0.1779,0.5467,0.4965,0.4492,0.4766,0.66,0.8733,,,0.4133,0.22,0.1667,1.0,0.28,0.2644,0.4741,0.416675,0.7058823529411765,0.7857142857142857,0.8075352941176471,0.9017857142857143,1.0,1.0,,,,,,,0.5524475524475524,0.42857142857142855,0.7840706293706294,0.7976285714285714,0.0,0.0,0.0,0.0,0.9283,0.0
qawiki,classification,grok-3-mini,0.8867,0.7667,1.0,0.8867,0.8,0.58,0.3467,0.38,0.34,0.966,0.9123,0.1934,0.7055,0.5385,0.5576,0.5358,0.9333,0.9533,,,0.6,0.1667,0.1667,1.0,0.2867,0.3556,0.544,0.40502499999999997,0.9694656488549618,0.3157894736842105,0.9939519083969465,0.7730263157894737,1.0,1.0,,,,,,,0.7724137931034483,0.6,0.9116372413793103,0.93286,0.0,0.0,0.0,0.0,0.9995,0.0
qawiki,classification,llama3.1:70b,0.8067,0.4733,1.0,0.8267,0.9333,0.4667,0.1467,0.0933,0.1333,0.9067,0.7929,0.0377,0.7731,0.4186,0.3589,0.395,0.8133,0.0467,,,0.46,0.0267,0.0333,1.0,0.0533,0.1244,0.3908,0.278325,0.8163265306122449,0.3333333333333333,0.9091789115646259,0.7852,1.0,1.0,,,,,,,0.4608695652173913,0.5142857142857142,0.7775495652173913,0.8435228571428572,0.0,0.0,0.0,0.0,0.0,0.0
qawiki,classification,llama3.1:8b,0.1133,0.1,1.0,0.3533,0.68,0.06,0.0133,0.0067,0.0133,0.4828,0.4791,0.1024,0.385,0.2243,0.1549,0.1793,0.88,0.1067,,,0.82,0.0,0.0133,1.0,0.0,0.0111,0.1862,0.253325,0.0,0.11409395973154363,0.8,0.48069395973154366,1.0,1.0,,,,,,,0.15,0.09230769230769231,0.5286299999999999,0.47152692307692307,0.0001,0.0003,0.0,0.0,1.0,0.002
qawiki,classification,mistral-small:24b,0.8533,0.66,1.0,0.8667,0.6933,0.4067,0.3733,0.2333,0.24,0.9396,0.8434,0.2837,0.5736,0.5789,0.4543,0.4658,0.9067,0.0,,,0.5667,0.1867,0.2,1.0,0.1267,0.2822,0.4997,0.37835,0.9130434782608695,0.16666666666666666,0.9563217391304347,0.7469583333333333,,1.0,,,,,,,0.6,0.6642857142857143,0.90105,0.8392478571428571,0.0,0.0,0.0,0.0,0.0,0.0
qawiki,classification,o3,0.7667,0.74,1.0,0.8733,0.86,0.6133,0.2867,0.2733,0.3067,0.9033,0.8855,0.1241,0.7496,0.5253,0.5431,0.5611,0.8267,0.9733,,,0.6667,0.14,0.1067,1.0,0.1933,0.2889,0.5432,0.36000000000000004,0.8248175182481752,0.15384615384615385,0.9318890510948904,0.6015384615384616,1.0,1.0,,,,,,,0.7676056338028169,0.25,0.8951415492957747,0.7148249999999999,0.0,0.0,0.0,0.0,0.0378,0.0
qawiki,fixing,deepseek-chat,0.7933,0.7133,0.8867,0.94,0.9467,0.6933,0.38,0.42,0.38,0.8925,0.8845,0.0312,0.7526,0.5727,0.5964,0.5797,,,,,,0.1133,0.1,0.1867,0.1867,0.3933,0.5829,0.146675,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
qawiki,fixing,deepseek-reasoner,0.5867,0.52,0.68,0.8133,0.9067,0.48,0.1,0.1133,0.0933,0.7372,0.7351,0.0802,0.7272,0.3148,0.2896,0.2818,,,,,,0.0667,0.0533,0.14,0.1267,0.1022,0.2954,0.09667500000000001,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0205,0.0
qawiki,fixing,gemini-2.0-flash,0.7533,0.6467,0.8067,0.8867,0.9467,0.6333,0.7333,0.7533,0.7267,0.803,0.8483,0.0193,0.8033,0.8361,0.8383,0.8408,,,,,,0.02,0.0067,0.0067,0.0867,0.7378,0.8384,0.030025,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
qawiki,fixing,gemini-2.5-flash,0.86,0.8867,0.8867,0.9667,0.9267,0.8133,0.68,0.6533,0.6133,0.8998,0.9033,0.073,0.7792,0.7605,0.7503,0.7192,,,,,,0.1333,0.0867,0.1133,0.1933,0.6489,0.7433,0.13165,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0121,0.0
qawiki,fixing,gemini-2.5-pro,0.76,0.5933,0.74,0.8667,0.98,0.58,0.4467,0.4533,0.42,0.8519,0.757,0.02,0.7066,0.6166,0.623,0.5838,,,,,,0.1,0.06,0.06,0.1467,0.44,0.6078,0.091675,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
qawiki,fixing,gpt-4.1-2025-04-14,0.76,0.7467,0.8667,0.9533,0.8933,0.6667,0.5,0.4933,0.4867,0.9013,0.9098,0.0874,0.7795,0.7944,0.789,0.7863,,,,,,0.0733,0.0733,0.1067,0.12,0.4933,0.7899,0.093325,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
qawiki,fixing,gpt-4.1-mini-2025-04-14,0.7267,0.68,0.8333,0.98,0.8867,0.6,0.3867,0.3733,0.4133,0.869,0.8699,0.0686,0.7808,0.729,0.7245,0.721,,,,,,0.0867,0.1133,0.2133,0.1333,0.3911,0.7248,0.13665,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
qawiki,fixing,gpt-4.1-nano-2025-04-14,0.4733,0.1733,0.56,0.4267,0.6267,0.1267,0.4867,0.5,0.54,0.5857,0.3823,0.2356,0.2938,0.6356,0.6437,0.6641,,,,,,0.2333,0.3267,0.38,0.04,0.5089,0.6478,0.24500000000000002,,,,,,,,,,,,,,,,,0.0,0.0178,0.0043,0.0809,0.6482,0.0002
qawiki,fixing,gpt-4o,0.84,0.7867,0.92,0.9333,0.92,0.74,0.5067,0.5333,0.5133,0.5993,0.5796,0.0572,0.507,0.3598,0.3719,0.3537,,,,,,0.38,0.4067,0.4533,0.3867,0.5178,0.3618,0.406675,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
qawiki,fixing,gpt-5,0.8933,0.8933,0.9533,0.9867,0.62,0.5133,0.7067,0.7067,0.6867,0.9418,0.9191,0.38,0.4667,0.822,0.8343,0.8067,,,,,,0.5133,0.5267,0.5067,0.58,0.7,0.821,0.531675,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.1958,0.0
qawiki,fixing,gpt-5-mini,0.86,0.84,0.9533,0.9733,0.7067,0.56,0.7333,0.7667,0.7267,0.8897,0.9187,0.2933,0.4163,0.8109,0.839,0.8203,,,,,,0.6533,0.6133,0.64,0.6333,0.7422,0.8234,0.6349750000000001,,,,,,,,,,,,,,,,,0.0004,0.0,0.0,0.0,0.0,0.0
qawiki,fixing,gpt-5-nano,0.7533,0.7067,0.8067,0.96,0.7267,0.44,0.6333,0.6533,0.5933,0.8314,0.8437,0.2669,0.4701,0.7281,0.7279,0.7132,,,,,,0.56,0.48,0.4467,0.5467,0.6266,0.7231,0.50835,,,,,,,,,,,,,,,,,0.0,0.0,0.0003,0.0,0.0002,0.0
qawiki,fixing,grok-3-mini,0.8533,0.8,0.92,0.9867,0.9333,0.74,0.3867,0.32,0.38,0.9231,0.934,0.0635,0.7428,0.5878,0.5636,0.5961,,,,,,0.1267,0.1333,0.2267,0.2467,0.3622,0.5825,0.18335,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.1917,0.0
qawiki,fixing,llama3.1:70b,0.5733,0.46,0.68,0.7733,0.8933,0.4133,0.1467,0.1733,0.1467,0.7004,0.7224,0.0639,0.708,0.3565,0.3712,0.365,,,,,,0.02,0.06,0.14,0.04,0.1556,0.3642,0.065,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0001,0.0
qawiki,fixing,llama3.1:8b,0.0933,0.02,0.1467,0.12,0.66,0.0067,0.0067,0.0,0.0267,0.427,0.2836,0.0856,0.2307,0.126,0.1202,0.1375,,,,,,0.0,0.0067,0.0,0.0,0.0111,0.1279,0.001675,,,,,,,,,,,,,,,,,0.0005,0.3125,0.0013,0.0,1.0,0.5
qawiki,fixing,mistral-small:24b,0.66,0.4867,0.6933,0.88,0.96,0.4733,0.58,0.5333,0.5467,0.7737,0.7488,0.0141,0.6267,0.7513,0.7381,0.7446,,,,,,0.26,0.1667,0.2533,0.24,0.5533,0.7447,0.22999999999999998,,,,,,,,,,,,,,,,,0.0012,0.0001,0.0001,0.0,0.0,0.0
qawiki,fixing,o3,0.7067,0.7333,0.8533,0.9,0.9667,0.7,0.38,0.4133,0.4067,0.6446,0.7051,0.0333,0.6443,0.4113,0.4086,0.4298,,,,,,0.3067,0.2733,0.1933,0.26,0.4,0.4166,0.258325,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
qawiki,wikidata,deepseek-chat,0.277,0.223,0.5878,0.5135,0.6419,0.1554,,,,0.3761,0.3353,0.1248,0.1649,,,,,,,,,0.1892,0.1824,0.3514,0.3851,,,0.277025,,,,,,,,,,,,,,,,,0.0354,0.0214,0.0001,0.0027,0.3136,0.0007
qawiki,wikidata,gemini-2.0-flash,0.1892,0.1081,0.4122,0.3986,0.6081,0.0608,,,,0.4271,0.3339,0.098,0.2263,,,,,,,,,0.0811,0.1081,0.1757,0.1824,,,0.136825,,,,,,,,,,,,,,,,,0.939,0.7597,0.0998,0.3258,0.8389,0.1938
qawiki,wikidata,gemini-2.5-flash,0.2973,0.1622,0.5676,0.4865,0.8784,0.1622,,,,0.3593,0.3268,0.0191,0.2641,,,,,,,,,0.277,0.3176,0.2973,0.3176,,,0.302375,,,,,,,,,,,,,,,,,0.28,0.9646,0.0111,0.0027,0.1635,0.779
qawiki,wikidata,gemini-2.5-pro,0.2162,0.1757,0.4122,0.3378,0.7635,0.1622,,,,0.5105,0.4907,0.0379,0.3979,,,,,,,,,0.027,0.0405,0.0743,0.0676,,,0.05235,,,,,,,,,,,,,,,,,0.7878,0.2517,0.0192,0.1077,0.7443,0.3238
qawiki,wikidata,gpt-4.1-2025-04-14,0.25,0.1216,0.4257,0.3851,0.6284,0.0743,,,,0.651,0.5123,0.1003,0.3573,,,,,,,,,0.027,0.027,0.0743,0.0608,,,0.047275,,,,,,,,,,,,,,,,,0.8365,0.1509,0.2122,0.1002,0.5679,0.2539
qawiki,wikidata,gpt-4.1-mini-2025-04-14,0.2162,0.0608,0.2905,0.2905,0.8649,0.027,,,,0.3732,0.2293,0.0392,0.1782,,,,,,,,,0.0946,0.1014,0.0878,0.2095,,,0.12332499999999999,,,,,,,,,,,,,,,,,0.8923,0.9888,0.9998,0.9155,0.0,0.8125
qawiki,wikidata,gpt-4.1-nano-2025-04-14,0.2973,0.0608,0.3378,0.5,0.8919,0.0405,,,,0.4626,0.338,0.0369,0.3131,,,,,,,,,0.1149,0.0946,0.75,0.1622,,,0.28042500000000004,,,,,,,,,,,,,,,,,0.0551,0.8949,0.962,0.0002,0.0,0.2266
qawiki,wikidata,gpt-4o,0.9189,0.7905,0.8311,0.8716,0.1622,0.0,,,,0.9189,0.8295,0.8328,0.0811,,,,,,,,,0.9189,0.9257,0.8514,0.9392,,,0.9088,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,1.0,1.0
qawiki,wikidata,gpt-5,0.8176,0.6351,0.7838,0.7635,0.5,0.1486,,,,0.8892,0.7796,0.4891,0.306,,,,,,,,,0.6622,0.6486,0.5405,0.5946,,,0.611475,,,,,,,,,,,,,,,,,0.0001,0.0662,0.1279,0.3038,0.9967,0.9408
qawiki,wikidata,gpt-5-mini,0.8446,0.6959,0.7973,0.8041,0.3784,0.1014,,,,0.8914,0.8215,0.6117,0.2384,,,,,,,,,0.7162,0.7568,0.6622,0.75,,,0.7212999999999999,,,,,,,,,,,,,,,,,0.0004,0.0035,0.0401,0.0925,1.0,0.8281
qawiki,wikidata,gpt-5-nano,0.7703,0.5946,0.7297,0.7973,0.4865,0.1081,,,,0.8297,0.7449,0.5045,0.3213,,,,,,,,,0.7162,0.6689,0.5608,0.6892,,,0.658775,,,,,,,,,,,,,,,,,0.0,0.0007,0.0586,0.0098,0.9992,0.6875
qawiki,wikidata,gpt-oss:20b,0.28,0.1867,0.42,0.3733,0.8267,0.1067,,,,0.4299,0.3768,0.1137,0.2707,,,,,,,,,0.2467,0.24,0.24,0.2867,,,0.25335,,,,,,,,,,,,,,,,,0.0121,0.3238,0.2664,0.3258,0.7214,0.1938
qawiki,wikidata,grok-3-mini,0.2703,0.2027,0.5,0.4054,0.8919,0.1824,,,,0.4757,0.461,0.0465,0.3278,,,,,,,,,0.0878,0.1081,0.1351,0.1757,,,0.126675,,,,,,,,,,,,,,,,,0.8998,0.8365,0.3179,0.3776,0.6682,0.5841
qawiki,wikidata,llama3.1:70b,0.14,0.0467,0.1867,0.16,0.6467,0.0133,,,,0.3889,0.3251,0.1026,0.1818,,,,,,,,,0.0067,0.0133,0.04,0.04,,,0.025,,,,,,,,,,,,,,,,,0.9331,0.7461,0.5,0.9157,0.9733,0.75
qawiki,wikidata,llama3.1:8b,0.02,0.0,0.08,0.0533,0.78,0.0,,,,0.18,0.1406,0.0613,0.0632,,,,,,,,,0.0,0.0,0.0267,0.0133,,,0.01,,,,,,,,,,,,,,,,,0.3125,1.0,0.1334,0.0195,0.9965,1.0
qawiki,wikidata,mistral-small:24b,0.777,0.6284,0.7162,0.723,0.2703,0.0068,,,,0.8561,0.7402,0.7019,0.1199,,,,,,,,,0.6824,0.6757,0.7838,0.8243,,,0.7415499999999999,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.9954,0.5
qawiki,wikidata,o3,0.3514,0.2162,0.527,0.3378,0.8581,0.1419,,,,0.5997,0.5264,0.0895,0.3936,,,,,,,,,0.1824,0.1892,0.1622,0.1757,,,0.177375,,,,,,,,,,,,,,,,,0.0401,0.0669,0.0266,0.1744,0.0436,0.1938
qawiki,zero-shot,deepseek-chat,0.2067,0.1467,0.4133,0.3667,0.62,0.06,0.1892,,,0.4229,0.374,0.1295,0.2266,0.3911,,,0.2267,0.5133,0.5467,0.62,0.1333,0.0867,0.1067,0.2267,0.2133,0.1892,0.3911,0.15835,0.2108843537414966,0.0,0.4231775510204082,0.41076666666666667,0.34285714285714286,0.43478260869565216,0.2903225806451613,0.3865546218487395,0.62,,0.12948333333333334,,0.1366906474820144,0.2727272727272727,0.3640539568345324,0.5001363636363636,1.0,1.0,1.0,1.0,1.0,1.0
qawiki,zero-shot,deepseek-reasoner,0.1267,0.08,0.2867,0.2,0.82,0.0733,0.1284,,,0.3114,0.2858,0.0324,0.2096,0.3168,,,0.2333,0.3067,0.2067,0.8067,0.12,0.0467,0.0733,0.0533,0.08,0.1284,0.3168,0.063325,0.125,0.13636363636363635,0.3130390625,0.3015818181818182,0.2896551724137931,0.2,0.19310344827586207,0.4,0.8263888888888888,0.6666666666666666,0.030047916666666667,0.08888333333333333,0.08391608391608392,0.0,0.28695594405594405,0.2624714285714286,1.0,1.0,1.0,1.0,1.0,1.0
qawiki,zero-shot,gemini-2.0-flash,0.24,0.1267,0.3533,0.3733,0.6533,0.04,0.3378,,,0.4918,0.4092,0.1247,0.2673,0.5812,,,0.26,0.4067,0.6467,0.6467,0.1867,0.0,0.0133,0.0,0.02,0.3378,0.5812,0.008324999999999999,0.23776223776223776,0.2857142857142857,0.5012944055944056,0.2976142857142857,0.359375,0.3181818181818182,0.5652173913043478,0.33858267716535434,0.6618705035971223,0.5454545454545454,0.12981942446043165,0.060036363636363645,0.10483870967741936,0.23076923076923078,0.4076370967741935,0.4164423076923077,1.0,1.0,1.0,1.0,1.0,1.0
qawiki,zero-shot,gemini-2.5-flash,0.2667,0.22,0.4467,0.3467,0.84,0.18,0.2635,,,0.5316,0.4667,0.0481,0.3643,0.5294,,,0.3933,0.4533,0.3533,0.8267,0.2133,0.0667,0.0733,0.1067,0.1267,0.2635,0.5294,0.09335000000000002,0.2857142857142857,0.1935483870967742,0.5463605042016806,0.4749870967741935,0.44755244755244755,0.42857142857142855,0.3448275862068966,0.4,0.8424657534246576,0.75,0.04912397260273973,0.00925,0.2206896551724138,0.2,0.4744262068965517,0.24247999999999997,1.0,1.0,1.0,1.0,1.0,1.0
qawiki,zero-shot,gemini-2.5-pro,0.2333,0.1467,0.3133,0.2867,0.7867,0.14,0.25,,,0.5551,0.4852,0.0282,0.405,0.5869,,,0.3333,0.3067,0.3,0.8,0.1733,0.0333,0.0267,0.0467,0.0667,0.25,0.5869,0.04335,0.25190839694656486,0.10526315789473684,0.562512213740458,0.5036631578947369,0.30344827586206896,0.6,0.2785714285714286,0.4,0.7972972972972973,0.0,0.027546621621621623,0.0763,0.15172413793103448,0.0,0.4836668965517241,0.52896,1.0,1.0,1.0,1.0,1.0,1.0
qawiki,zero-shot,gpt-4.1-2025-04-14,0.28,0.0933,0.4,0.3333,0.6267,0.06,0.1486,,,0.5868,0.4502,0.093,0.3232,0.4286,,,0.3267,0.4,0.5333,0.6267,0.06,0.02,0.02,0.0867,0.08,0.1486,0.4286,0.051675,0.2937062937062937,0.0,0.6051944055944056,0.21027142857142858,0.39864864864864863,0.5,0.39361702127659576,0.23214285714285715,0.6266666666666667,,0.09300266666666665,,0.09333333333333334,,0.4502473333333334,,1.0,1.0,1.0,1.0,1.0,1.0
qawiki,zero-shot,gpt-4.1-mini-2025-04-14,0.26,0.1133,0.4467,0.3533,0.56,0.04,0.1419,,,0.5869,0.4461,0.1613,0.2639,0.4164,,,0.3467,0.4867,0.6867,0.56,0.06,0.02,0.02,0.0667,0.06,0.1419,0.4164,0.041675,0.2846715328467153,0.0,0.596556204379562,0.48513846153846163,0.40384615384615385,0.46938775510204084,0.59375,0.288135593220339,0.56,,0.1613446666666667,,0.09655172413793103,0.6,0.43762137931034484,0.6916,1.0,1.0,1.0,1.0,1.0,1.0
qawiki,zero-shot,gpt-4.1-nano-2025-04-14,0.2333,0.0933,0.42,0.3467,0.64,0.02,0.1554,,,0.4494,0.3785,0.1699,0.2108,0.3468,,,0.4933,0.5267,0.4933,0.54,0.98,0.0667,0.1,0.1933,0.1333,0.1554,0.3468,0.12332499999999999,0.27472527472527475,0.1694915254237288,0.4892725274725274,0.38782711864406777,0.4444444444444444,0.3974358974358974,0.35365853658536583,0.3382352941176471,0.6391752577319587,0.6415094339622641,0.18549690721649487,0.14137169811320754,,0.09333333333333334,,0.37850066666666665,1.0,1.0,1.0,1.0,1.0,1.0
qawiki,zero-shot,gpt-4o,0.42,0.32,0.4867,0.4733,0.6133,0.0533,0.3311,,,0.5844,0.5272,0.3016,0.1985,0.5283,,,0.4267,0.5067,0.5267,0.6133,0.3933,0.2867,0.3133,0.3533,0.48,0.3311,0.5283,0.358325,0.41843971631205673,0.4444444444444444,0.5899326241134751,0.49702222222222225,0.4888888888888889,0.4857142857142857,0.5,0.4696969696969697,0.6133333333333333,,0.30155933333333335,,0.3263157894736842,0.3090909090909091,0.5591484210526315,0.47192727272727264,1.0,1.0,1.0,1.0,1.0,1.0
qawiki,zero-shot,gpt-5,0.68,0.5733,0.7333,0.74,0.58,0.1867,0.7162,,,0.7996,0.748,0.3975,0.3737,0.8468,,,0.6733,0.7133,0.7333,0.6067,0.24,0.5467,0.5267,0.4467,0.5267,0.7162,0.8468,0.5116999999999999,0.7022900763358778,0.5263157894736842,0.8083290076335877,0.7391157894736842,0.7278911564625851,1.0,0.7414965986394558,0.6666666666666666,0.5958904109589042,0.0,0.3867691780821918,0.7894749999999999,0.5704225352112676,0.625,0.7419345070422536,0.8558125,1.0,1.0,1.0,1.0,1.0,1.0
qawiki,zero-shot,gpt-5-mini,0.72,0.5933,0.7267,0.7533,0.52,0.12,0.7297,,,0.7998,0.7254,0.4686,0.2931,0.8281,,,0.68,0.7267,0.7533,0.5467,0.1667,0.6333,0.6,0.54,0.6533,0.7297,0.8281,0.6066499999999999,0.734375,0.6363636363636364,0.8183835937499999,0.6915363636363636,0.7297297297297297,0.5,0.7602739726027398,0.5,0.5352112676056338,0.25,0.4527528169014084,0.75,0.5874125874125874,0.7142857142857143,0.7244447552447552,0.7455428571428572,1.0,1.0,1.0,1.0,1.0,1.0
qawiki,zero-shot,gpt-5-nano,0.56,0.4733,0.6533,0.6933,0.5933,0.1133,0.5878,,,0.6779,0.6566,0.3823,0.3472,0.7523,,,0.5467,0.6467,0.6467,0.5333,0.1933,0.5333,0.5067,0.44,0.6067,0.5878,0.7523,0.521675,0.5606060606060606,0.5555555555555556,0.6825121212121212,0.6438111111111111,0.6642335766423357,0.5384615384615384,0.6834532374100719,0.8181818181818182,0.5725190839694656,0.7368421052631579,0.39954580152671754,0.2631578947368421,0.463768115942029,0.5833333333333334,0.6470021739130436,0.7668583333333333,1.0,1.0,1.0,1.0,1.0,1.0
qawiki,zero-shot,gpt-oss:20b,0.1933,0.1667,0.3867,0.3467,0.84,0.08,,,,0.3969,0.3603,0.1059,0.2296,,,,0.24,0.3733,0.3333,0.8133,0.1267,0.16,0.1933,0.1733,0.2533,,,0.194975,0.19424460431654678,0.18181818181818182,0.4047374100719424,0.2979727272727273,0.37142857142857144,0.6,0.3333333333333333,0.6666666666666666,0.855072463768116,0.6666666666666666,0.09701739130434783,0.20833333333333334,0.17482517482517482,0.0,0.37452097902097903,0.07072857142857143,1.0,1.0,1.0,1.0,1.0,1.0
qawiki,zero-shot,grok-3-mini,0.3133,0.2333,0.4733,0.3867,0.9,0.1867,,,,0.5919,0.5091,0.0644,0.3688,,,,0.3933,0.4867,0.3667,0.9133,0.2133,0.0933,0.1,0.1133,0.1933,,,0.124975,0.3333333333333333,0.16666666666666666,0.6150325757575759,0.42261111111111105,0.4791666666666667,0.3333333333333333,0.3724137931034483,0.8,0.9178082191780822,0.25,0.05896986301369863,0.260725,0.2328767123287671,0.25,0.5076445205479452,0.5617,1.0,1.0,1.0,1.0,1.0,1.0
qawiki,zero-shot,llama3.1:70b,0.18,0.0533,0.18,0.2,0.7333,0.0133,,,,0.3871,0.2833,0.1062,0.1601,,,,0.1933,0.78,0.6,0.7333,0.0267,0.0267,0.0467,0.06,0.0867,,,0.055025,0.18243243243243243,0.0,0.38931013513513507,0.22725,0.25,0.17391304347826086,0.1590909090909091,0.2169811320754717,0.7333333333333333,,0.10615266666666666,,0.05405405405405406,0.0,0.2809716216216216,0.4524,1.0,1.0,1.0,1.0,1.0,1.0
qawiki,zero-shot,llama3.1:8b,0.0067,0.0067,0.0467,0.0067,0.8733,0.0,,,,0.1259,0.0996,0.0318,0.0491,,,,0.9533,0.8533,0.8133,0.2933,0.9733,0.0133,0.0,0.0267,0.0467,,,0.021675,0.0,0.006944444444444444,0.126,0.12587500000000001,0.058823529411764705,0.045112781954887216,0.0,0.008130081300813009,0.9310344827586207,0.859504132231405,0.01005862068965517,0.03701570247933884,0.0,0.00684931506849315,0.041675,0.10114794520547947,1.0,1.0,1.0,1.0,1.0,1.0
qawiki,zero-shot,mistral-small:24b,0.4662,0.2838,0.4797,0.5,0.4054,0.0,,,,0.6336,0.5209,0.4368,0.1897,,,,0.5203,0.5203,0.4932,0.4054,0.8514,0.3041,0.3446,0.4257,0.4189,,,0.373325,0.49264705882352944,0.16666666666666666,0.6545286764705882,0.397,,0.4797297297297297,0.3333333333333333,0.503448275862069,0.40540540540540543,,0.43675878378378374,,0.18181818181818182,0.30158730158730157,0.46777272727272723,0.5301222222222222,1.0,1.0,1.0,1.0,1.0,1.0
qawiki,zero-shot,o3,0.277,0.1622,0.4392,0.2905,0.7905,0.1149,0.2703,,,0.5359,0.4399,0.1039,0.3346,0.5218,,,0.3514,0.4257,0.3243,0.7973,0.1892,0.1622,0.1689,0.1757,0.1419,0.2703,0.5218,0.162175,0.2962962962962963,0.07692307692307693,0.5572992592592593,0.31313846153846153,0.4305555555555556,0.75,0.3006993006993007,0.0,0.8085106382978723,0.42857142857142855,0.09302198581560284,0.32209999999999994,0.16058394160583941,0.18181818181818182,0.43258540145985397,0.5307181818181818,1.0,1.0,1.0,1.0,1.0,1.0
spinach,classification,deepseek-chat,0.9667,0.84,0.58,0.8867,0.62,0.5,0.5,0.4933,0.5267,0.9946,0.9418,0.3543,0.5562,0.7006,0.6845,0.7188,,,,,,0.2,0.2,0.2533,0.42,0.5067,0.7013,0.268325,,,,,,,,,,,,,,,,,0.0,0.0,0.0124,0.0,0.2253,0.0
spinach,classification,deepseek-reasoner,0.8867,0.82,0.5267,0.8533,0.68,0.5,0.4533,0.4933,0.48,0.925,0.8933,0.315,0.4892,0.6603,0.6361,0.6065,,,,,,0.2333,0.18,0.3467,0.4067,0.4755,0.6343,0.291675,,,,,,,,,,,,,,,,,0.0,0.0,0.015,0.0,0.9998,0.0
spinach,classification,gemini-2.0-flash,0.8867,0.86,1.0,0.9667,0.6467,0.5533,0.4267,0.38,0.3467,0.9208,0.8928,0.261,0.5597,0.6547,0.593,0.5749,0.8867,0.58,,,0.48,0.1467,0.14,1.0,0.3867,0.3845,0.6075,0.41835,0.8866666666666667,,0.9208019999999999,,1.0,1.0,,,,,,,0.7272727272727273,0.8705035971223022,0.7234454545454546,0.9062539568345322,0.0,0.0,0.0,0.0,0.638,0.0
spinach,classification,gemini-2.5-flash,0.9267,0.9133,1.0,0.9667,0.9,0.82,0.3667,0.3733,0.3467,0.9461,0.9416,0.0934,0.8052,0.6205,0.6056,0.624,0.8733,0.94,,,0.8,0.0933,0.0733,1.0,0.1733,0.3622,0.6167,0.334975,0.9545454545454546,0.7222222222222222,0.9570704545454546,0.86605,1.0,1.0,,,,,,,0.927007299270073,0.7692307692307693,0.943807299270073,0.918123076923077,0.0,0.0,0.0,0.0,0.0288,0.0
spinach,classification,gemini-2.5-pro,0.8,0.7667,1.0,0.88,0.92,0.7067,0.38,0.3467,0.3067,0.8609,0.8735,0.0743,0.7962,0.6403,0.6053,0.6075,0.86,0.9267,,,0.7333,0.1,0.12,1.0,0.1533,0.3445,0.6177,0.343325,0.8837209302325582,0.2857142857142857,0.9027387596899225,0.6038714285714286,1.0,1.0,,,,,,,0.7898550724637681,0.5,0.8985391304347826,0.5849333333333333,0.0,0.0,0.0,0.0,0.01,0.0
spinach,classification,gpt-4.1-2025-04-14,0.9267,0.8533,0.6467,0.96,0.84,0.7067,0.2667,0.2667,0.3,0.9788,0.9758,0.1376,0.8143,0.6085,0.5578,0.6019,0.94,0.64,,,0.7133,0.0333,0.0333,0.0867,0.16,0.2778,0.5894,0.078325,0.9577464788732394,0.375,0.9904464788732396,0.7726625,0.6442953020134228,1.0,,,,,,,0.8551724137931035,0.8,0.976524827586207,0.95556,0.0,0.0,0.001,0.0,0.0,0.0
spinach,classification,gpt-4.1-mini-2025-04-14,0.9467,0.82,0.5733,0.9,0.7467,0.5733,0.3667,0.3867,0.4267,0.9793,0.9461,0.2379,0.6587,0.5982,0.6016,0.6552,0.9267,0.48,,,0.5867,0.1867,0.18,0.14,0.3067,0.3934,0.6183,0.20335,0.9645390070921985,0.6666666666666666,0.9811787234042554,0.9497777777777778,0.5769230769230769,0.5714285714285714,,,,,,,0.835820895522388,0.6875,0.9504776119402986,0.90921875,0.0,0.0,0.3899,0.0,0.0,0.0
spinach,classification,gpt-4.1-nano-2025-04-14,0.9733,0.5867,0.7467,0.5933,0.3733,0.0,0.9867,0.9933,0.9867,0.9733,0.7567,0.6267,0.0067,0.9867,0.9933,0.9867,0.7933,0.38,,,1.0,1.0,0.9733,0.74,0.5867,0.9889,0.9889,0.8250000000000001,0.967479674796748,1.0,0.967479674796748,1.0,0.7111111111111111,0.7619047619047619,,,,,,,,0.5866666666666667,,0.7566666666666667,0.0,0.0,0.0004,0.0022,0.9999,1.0
spinach,classification,gpt-4o,0.98,0.94,0.9733,0.9867,0.3933,0.3333,0.6867,0.2133,0.22,0.4551,0.9869,0.601,0.3765,0.3163,0.2912,0.3005,0.9667,0.8067,,,0.7533,0.54,0.54,0.56,0.62,0.3733,0.3027,0.5650000000000001,0.9863013698630136,0.75,0.46196849315068494,0.205875,0.975609756097561,0.9629629629629629,,,,,,,0.8918918918918919,0.9557522123893806,0.9816567567567568,0.9886159292035397,0.0,0.0,0.0,0.0,1.0,0.0
spinach,classification,gpt-5,0.9333,0.86,1.0,0.9733,0.7,0.5667,0.62,0.6067,0.6133,0.9625,0.9308,0.2946,0.6592,0.7761,0.787,0.7876,0.9267,0.9867,,,0.6267,0.3,0.2933,1.0,0.3867,0.6133,0.7836,0.495,0.9640287769784173,0.5454545454545454,0.977064748201439,0.7784272727272726,1.0,1.0,,,,,,,0.8920863309352518,0.45454545454545453,0.942163309352518,0.7873363636363636,0.0,0.0,0.0,0.0,0.7142,0.0
spinach,classification,gpt-5-mini,0.9,0.6267,1.0,0.76,0.6533,0.3,0.6467,0.6333,0.62,0.9204,0.7496,0.3467,0.4298,0.7631,0.7637,0.7533,0.8667,0.9533,,,0.3667,0.4667,0.4333,1.0,0.4467,0.6333,0.76,0.586675,0.9078014184397163,0.7777777777777778,0.9295539007092197,0.7777777777777778,1.0,1.0,,,,,,,0.6357142857142857,0.5,0.7527078571428572,0.7059599999999999,0.0,0.0063,0.0,0.151,0.1537,0.0002
spinach,classification,gpt-5-nano,0.7533,0.7,1.0,0.78,0.2067,0.0067,0.1133,0.5133,0.1,0.7567,0.7682,0.7933,0.0769,0.1208,0.5185,0.11,0.7667,0.8667,,,0.1133,0.12,0.1333,1.0,0.02,0.2422,0.2498,0.31832499999999997,0.7867647058823529,0.42857142857142855,0.7904411764705882,0.42857142857142855,1.0,1.0,,,,,,,0.746268656716418,0.3125,0.8014395522388059,0.48958124999999997,0.0102,0.0,0.0,0.007,1.0,1.0
spinach,classification,gpt-oss:20b,0.6267,0.4667,1.0,0.7,0.8467,0.3733,0.3133,0.26,0.2933,0.7483,0.7751,0.0876,0.58,0.4812,0.4713,0.4501,0.6467,0.8733,,,0.3733,0.1667,0.12,1.0,0.2533,0.2889,0.4675,0.385,0.6413793103448275,0.2,0.7509186206896552,0.67222,1.0,1.0,,,,,,,0.4652777777777778,0.5,0.7704819444444444,0.8852333333333333,0.0,0.0,0.0,0.0,0.5,0.0
spinach,classification,grok-3-mini,0.9,0.7467,1.0,0.8733,0.88,0.6333,0.4533,0.3867,0.4067,0.9583,0.9379,0.1106,0.77,0.6843,0.6474,0.6459,0.9067,0.9467,,,0.6733,0.1067,0.1067,1.0,0.18,0.4156,0.6592,0.34835,0.9416058394160584,0.46153846153846156,0.9814963503649635,0.7135461538461538,1.0,1.0,,,,,,,0.7569444444444444,0.5,0.9414145833333332,0.8539,0.0,0.0,0.0,0.0,0.5722,0.0
spinach,classification,llama3.1:70b,0.8067,0.5933,1.0,0.86,0.94,0.5667,0.2333,0.22,0.24,0.9024,0.8631,0.0235,0.7901,0.5097,0.4861,0.5062,0.8067,0.16,,,0.58,0.0067,0.02,1.0,0.0467,0.2311,0.5007,0.26835,0.8108108108108109,0.5,0.901745945945946,0.95455,1.0,1.0,,,,,,,0.6057692307692307,0.5652173913043478,0.8776028846153847,0.8301760869565217,0.0,0.0,0.0,0.0,0.0,0.0
spinach,classification,llama3.1:8b,0.2067,0.0533,1.0,0.3133,0.7,0.0333,0.0467,0.0333,0.0267,0.6022,0.5033,0.0949,0.3606,0.324,0.2737,0.2779,0.7533,0.1133,,,0.84,0.0,0.0,1.0,0.0,0.0356,0.2919,0.25,0.0,0.2152777777777778,0.5413,0.6047055555555556,1.0,1.0,,,,,,,0.0,0.061068702290076333,0.4970631578947369,0.5041839694656488,0.0,0.0039,0.0,0.0,0.9818,0.0312
spinach,classification,mistral-small:24b,0.84,0.5067,1.0,0.74,0.78,0.3733,0.36,0.2467,0.3067,0.9167,0.7588,0.1898,0.5804,0.5832,0.4896,0.5246,0.8867,0.0,,,0.6267,0.12,0.1267,1.0,0.1533,0.3045,0.5325,0.35,0.8865248226950354,0.1111111111111111,0.9291106382978722,0.7224888888888888,,1.0,,,,,,,0.5,0.5067567567567568,0.5,0.7623445945945946,0.0,0.0,0.0,0.0,0.0001,0.0
spinach,classification,o3,0.8,0.78,0.5333,0.88,0.94,0.7333,0.38,0.36,0.3867,0.9275,0.878,0.0537,0.8104,0.6588,0.5964,0.6194,0.8267,0.52,,,0.76,0.12,0.1,0.0933,0.1333,0.3756,0.6249,0.11165,0.821917808219178,0.0,0.9392719178082192,0.49702500000000005,0.528169014084507,0.625,,,,,,,0.8,0.5,0.8868642857142858,0.7538799999999999,0.0,0.0,0.889,0.0,0.2403,0.0
spinach,fixing,deepseek-chat,0.8267,0.7467,0.8867,0.9533,0.96,0.7333,0.4667,0.46,0.5067,0.9232,0.9161,0.0239,0.7704,0.7327,0.7319,0.752,,,,,,0.0533,0.0533,0.12,0.1733,0.4778,0.7389,0.09997500000000001,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
spinach,fixing,deepseek-reasoner,0.6467,0.5333,0.6667,0.8467,0.8933,0.5,0.1933,0.1733,0.1667,0.8036,0.8206,0.0845,0.7548,0.4319,0.4514,0.4066,,,,,,0.0467,0.06,0.0867,0.0467,0.1778,0.43,0.060024999999999995,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.1958,0.0
spinach,fixing,gemini-2.0-flash,0.7667,0.7733,0.8867,0.9133,0.9733,0.7667,0.7933,0.78,0.7467,0.8136,0.9152,0.014,0.8039,0.8713,0.8616,0.8323,,,,,,0.0267,0.0,0.0133,0.12,0.7733,0.8551,0.04,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
spinach,fixing,gemini-2.5-flash,0.8733,0.8667,0.8467,0.9667,0.9733,0.84,0.6533,0.7333,0.6667,0.9042,0.9076,0.0213,0.8107,0.7627,0.8087,0.799,,,,,,0.0867,0.04,0.0467,0.1733,0.6844,0.7901,0.086675,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
spinach,fixing,gemini-2.5-pro,0.8067,0.8133,0.7733,0.9733,0.98,0.8067,0.5933,0.5533,0.54,0.9041,0.9203,0.0175,0.8446,0.7701,0.7532,0.7434,,,,,,0.06,0.02,0.02,0.14,0.5622,0.7556,0.060000000000000005,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
spinach,fixing,gpt-4.1-2025-04-14,0.9067,0.76,0.9467,0.9267,1.0,0.76,0.6333,0.6467,0.6333,0.9275,0.9078,0.0,0.8206,0.8455,0.8494,0.8417,,,,,,0.0333,0.0333,0.0733,0.1133,0.6378,0.8455,0.06330000000000001,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
spinach,fixing,gpt-4.1-mini-2025-04-14,0.8533,0.7533,0.8867,0.98,0.9667,0.7467,0.5667,0.5733,0.5533,0.9138,0.8935,0.0165,0.7935,0.7759,0.7902,0.7916,,,,,,0.0267,0.02,0.0467,0.1267,0.5644,0.7859,0.055025000000000004,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
spinach,fixing,gpt-4.1-nano-2025-04-14,0.76,0.44,0.6467,0.7067,0.6133,0.3133,0.26,0.24,0.5333,0.6881,0.4969,0.1927,0.3241,0.3253,0.3042,0.5406,,,,,,0.2,0.1733,0.14,0.1867,0.3444,0.39,0.17500000000000002,,,,,,,,,,,,,,,,,0.0,0.0,0.0259,0.0,0.3073,0.0
spinach,fixing,gpt-4o,0.9533,0.8067,0.9267,0.9667,0.9533,0.78,0.6133,0.5933,0.5933,0.7168,0.7235,0.0362,0.5894,0.5886,0.579,0.5889,,,,,,0.3,0.2933,0.2533,0.3267,0.6,0.5855,0.293325,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
spinach,fixing,gpt-5,0.94,0.84,0.94,0.9733,0.78,0.6267,0.7267,0.62,0.6533,0.9592,0.8977,0.22,0.6842,0.8527,0.7902,0.816,,,,,,0.3533,0.3533,0.3333,0.3867,0.6667,0.8196,0.35665,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0106,0.0
spinach,fixing,gpt-5-mini,0.8933,0.7867,0.92,0.9333,0.7467,0.5533,0.7133,0.74,0.7067,0.9262,0.8639,0.2516,0.5143,0.7875,0.8356,0.8049,,,,,,0.5267,0.4733,0.56,0.48,0.72,0.8093,0.51,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0003,0.0
spinach,fixing,gpt-5-nano,0.8533,0.7467,0.8533,0.9867,0.8067,0.5533,0.6333,0.6533,0.6133,0.9005,0.8363,0.1806,0.5846,0.7468,0.8044,0.7705,,,,,,0.4133,0.3533,0.3733,0.5133,0.6333,0.7739,0.4133,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0007,0.0
spinach,fixing,grok-3-mini,0.8933,0.8533,0.9133,0.98,0.9867,0.84,0.4067,0.4667,0.4267,0.9354,0.9374,0.0133,0.7327,0.6547,0.7053,0.6644,,,,,,0.08,0.0867,0.1067,0.26,0.4334,0.6748,0.13335000000000002,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0002,0.0
spinach,fixing,llama3.1:70b,0.5933,0.5467,0.7267,0.8267,0.9667,0.54,0.2533,0.2667,0.24,0.7508,0.8176,0.0107,0.7583,0.5101,0.5157,0.5142,,,,,,0.0,0.06,0.0733,0.06,0.2533,0.5133,0.048325,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
spinach,fixing,llama3.1:8b,0.12,0.04,0.1733,0.1267,0.56,0.0267,0.02,0.0133,0.02,0.4342,0.3653,0.1351,0.2416,0.1657,0.1654,0.1914,,,,,,0.0,0.0,0.0067,0.0,0.0178,0.1742,0.001675,,,,,,,,,,,,,,,,,0.0001,0.0156,0.0001,0.0033,1.0,0.0625
spinach,fixing,mistral-small:24b,0.7867,0.5,0.7267,0.8733,0.9467,0.48,0.5,0.54,0.52,0.8546,0.718,0.0254,0.63,0.727,0.7243,0.7362,,,,,,0.12,0.06,0.16,0.16,0.52,0.7292,0.125,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
spinach,fixing,o3,0.8,0.7667,0.94,0.92,0.9667,0.7467,0.3733,0.38,0.3733,0.8361,0.8607,0.0242,0.7798,0.5516,0.5745,0.5787,,,,,,0.1333,0.1067,0.08,0.16,0.3755,0.5683,0.12,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0245,0.0
spinach,wikidata,deepseek-chat,0.3333,0.2153,0.5278,0.5625,0.625,0.1458,,,,0.5393,0.4885,0.1399,0.2152,,,,,,,,,0.0972,0.1042,0.1597,0.3333,,,0.17359999999999998,,,,,,,,,,,,,,,,,0.4278,0.0539,0.1163,0.0106,0.1553,0.0176
spinach,wikidata,gemini-2.0-flash,0.3472,0.1806,0.5347,0.4792,0.625,0.1111,,,,0.6181,0.4803,0.113,0.2705,,,,,,,,,0.0278,0.0417,0.1319,0.2222,,,0.1059,,,,,,,,,,,,,,,,,0.9449,0.0173,0.0098,0.0005,0.9061,0.0037
spinach,wikidata,gemini-2.5-flash,0.375,0.2847,0.5417,0.625,0.9028,0.2847,,,,0.4757,0.4383,0.0193,0.333,,,,,,,,,0.2292,0.2083,0.2639,0.2917,,,0.24827500000000002,,,,,,,,,,,,,,,,,0.1744,0.3714,0.1058,0.0072,0.0307,0.1553
spinach,wikidata,gemini-2.5-pro,0.3194,0.3264,0.5625,0.4792,0.8194,0.3056,,,,0.6635,0.6276,0.0251,0.5171,,,,,,,,,0.0069,0.0139,0.0625,0.0556,,,0.034725,,,,,,,,,,,,,,,,,0.4253,0.0035,0.0011,0.162,0.57,0.0008
spinach,wikidata,gpt-4.1-2025-04-14,0.5,0.2153,0.5625,0.4375,0.6389,0.1528,,,,0.7949,0.6168,0.0999,0.364,,,,,,,,,0.0,0.0,0.0417,0.0903,,,0.033,,,,,,,,,,,,,,,,,0.3679,0.6128,0.1405,0.5982,0.5,0.3036
spinach,wikidata,gpt-4.1-mini-2025-04-14,0.2431,0.1042,0.3611,0.2708,0.8056,0.0486,,,,0.4007,0.2877,0.0664,0.1367,,,,,,,,,0.0417,0.0903,0.0139,0.1875,,,0.08335000000000001,,,,,,,,,,,,,,,,,1.0,0.9996,1.0,0.9973,0.0,0.9648
spinach,wikidata,gpt-4.1-nano-2025-04-14,0.3333,0.0694,0.3194,0.5278,0.7708,0.0208,,,,0.6041,0.3473,0.0804,0.255,,,,,,,,,0.0278,0.0486,0.5903,0.1389,,,0.20140000000000002,,,,,,,,,,,,,,,,,0.6821,0.9978,1.0,0.0129,0.0,0.8555
spinach,wikidata,gpt-4o,0.8333,0.6736,0.7431,0.7917,0.2431,0.0208,,,,0.846,0.7486,0.7428,0.1278,,,,,,,,,0.7569,0.7778,0.7986,0.8889,,,0.80555,,,,,,,,,,,,,,,,,0.0,0.0,0.0084,0.0,1.0,1.0
spinach,wikidata,gpt-5,0.7639,0.5903,0.75,0.7569,0.6181,0.2569,,,,0.8263,0.7391,0.3349,0.4432,,,,,,,,,0.4583,0.5,0.375,0.4514,,,0.446175,,,,,,,,,,,,,,,,,0.0058,0.002,0.0401,0.0557,0.9992,0.2517
spinach,wikidata,gpt-5-mini,0.7986,0.6528,0.7639,0.8125,0.5,0.1944,,,,0.8449,0.7671,0.4756,0.3571,,,,,,,,,0.5833,0.5903,0.5278,0.6319,,,0.583325,,,,,,,,,,,,,,,,,0.0057,0.0,0.003,0.0057,0.9999,0.0592
spinach,wikidata,gpt-5-nano,0.7569,0.5208,0.6875,0.7639,0.5903,0.1806,,,,0.848,0.7201,0.3775,0.4106,,,,,,,,,0.5347,0.5208,0.4167,0.5625,,,0.508675,,,,,,,,,,,,,,,,,0.0032,0.0175,0.0719,0.0022,0.9988,0.7256
spinach,wikidata,gpt-oss:20b,0.3533,0.1667,0.4133,0.3667,0.86,0.1333,,,,0.5328,0.4225,0.0654,0.31,,,,,,,,,0.1733,0.2267,0.14,0.1667,,,0.176675,,,,,,,,,,,,,,,,,0.0048,0.0758,0.0814,0.0138,0.3388,0.0481
spinach,wikidata,grok-3-mini,0.3611,0.2361,0.5972,0.5208,0.8819,0.2222,,,,0.6113,0.5329,0.0418,0.3844,,,,,,,,,0.0764,0.1042,0.0972,0.1875,,,0.116325,,,,,,,,,,,,,,,,,0.3506,0.9075,0.2434,0.2088,0.5881,0.939
spinach,wikidata,llama3.1:70b,0.28,0.12,0.3533,0.2467,0.6533,0.02,,,,0.5186,0.4042,0.124,0.1725,,,,,,,,,0.0067,0.0133,0.0067,0.0133,,,0.01,,,,,,,,,,,,,,,,,0.0669,0.1662,0.28,0.7294,0.9782,1.0
spinach,wikidata,llama3.1:8b,0.06,0.0267,0.1333,0.08,0.6533,0.0,,,,0.2882,0.2309,0.1065,0.1032,,,,,,,,,0.0067,0.0133,0.0133,0.0067,,,0.009999999999999998,,,,,,,,,,,,,,,,,0.0547,0.0625,0.0004,0.0327,0.9991,1.0
spinach,wikidata,mistral-small:24b,0.7361,0.4653,0.6458,0.625,0.3681,0.0,,,,0.8099,0.6391,0.5941,0.1327,,,,,,,,,0.5625,0.5694,0.6042,0.7639,,,0.625,,,,,,,,,,,,,,,,,0.0,0.0002,0.0046,0.0011,1.0,1.0
spinach,wikidata,o3,0.4306,0.2986,0.5417,0.4861,0.9375,0.2708,,,,0.7116,0.6058,0.0163,0.5108,,,,,,,,,0.0764,0.0764,0.0347,0.0694,,,0.064225,,,,,,,,,,,,,,,,,0.43,0.9331,0.9157,0.9061,0.2272,0.9423
spinach,zero-shot,deepseek-chat,0.3267,0.1467,0.46,0.44,0.5667,0.08,0.2639,,,0.5802,0.4576,0.146,0.217,0.5272,,,0.3267,0.46,0.5133,0.5733,0.14,0.04,0.04,0.1,0.24,0.2639,0.5272,0.105,0.3219178082191781,0.5,0.5813835616438356,0.538575,0.40625,0.5,0.3793103448275862,0.45454545454545453,0.5704697986577181,0.0,0.1465724832214765,0.0625,0.13333333333333333,0.26666666666666666,0.44885037037037034,0.5367933333333333,1.0,1.0,1.0,1.0,1.0,1.0
spinach,zero-shot,deepseek-reasoner,0.1933,0.12,0.4133,0.2933,0.8533,0.1067,0.2153,,,0.4896,0.4069,0.0397,0.296,0.4904,,,0.2867,0.42,0.32,0.8067,0.18,0.0467,0.06,0.0333,0.0667,0.2153,0.4904,0.051675,0.208955223880597,0.0625,0.497255223880597,0.425325,0.41379310344827586,0.4,0.2986111111111111,0.16666666666666666,0.8461538461538461,1.0,0.04164615384615385,0.0,0.12949640287769784,0.0,0.40775107913669056,0.39601818181818177,1.0,1.0,1.0,1.0,1.0,1.0
spinach,zero-shot,gemini-2.0-flash,0.4,0.1,0.4267,0.32,0.66,0.0333,0.4861,,,0.6502,0.4701,0.12,0.2635,0.7066,,,0.44,0.3933,0.6333,0.66,0.18,0.0,0.0,0.0067,0.0,0.4861,0.7066,0.001675,0.4142857142857143,0.2,0.6616807142857143,0.48970000000000014,0.3953488372093023,0.6190476190476191,0.41025641025641024,0.2882882882882883,0.6714285714285714,0.5,0.12170714285714286,0.09634999999999999,0.08870967741935484,0.15384615384615385,0.47400564516129023,0.4516576923076923,1.0,1.0,1.0,1.0,1.0,1.0
spinach,zero-shot,gemini-2.5-flash,0.3333,0.2533,0.46,0.48,0.82,0.2267,0.3542,,,0.6132,0.5451,0.0327,0.4323,0.6329,,,0.3933,0.4933,0.46,0.8267,0.2867,0.0467,0.04,0.0533,0.0867,0.3542,0.6329,0.056675,0.3435114503816794,0.2631578947368421,0.6258366412213741,0.525921052631579,0.4755244755244755,0.14285714285714285,0.46853146853146854,0.7142857142857143,0.8344827586206897,0.4,0.024082068965517244,0.2824,0.26618705035971224,0.09090909090909091,0.554048201438849,0.43138181818181826,1.0,1.0,1.0,1.0,1.0,1.0
spinach,zero-shot,gemini-2.5-pro,0.32,0.22,0.42,0.4333,0.82,0.1933,0.3333,,,0.6484,0.5618,0.023,0.4549,0.6351,,,0.4067,0.42,0.4467,0.8,0.24,0.0267,0.02,0.06,0.06,0.3333,0.6351,0.041675000000000004,0.35036496350364965,0.0,0.6770240875912408,0.3466076923076923,0.4178082191780822,0.5,0.43478260869565216,0.4166666666666667,0.8163265306122449,1.0,0.023454421768707483,0.0,0.23076923076923078,0.0,0.5667307692307691,0.4606142857142857,1.0,1.0,1.0,1.0,1.0,1.0
spinach,zero-shot,gpt-4.1-2025-04-14,0.4733,0.2067,0.5067,0.4267,0.62,0.1267,0.2986,,,0.7396,0.58,0.1208,0.3468,0.5722,,,0.4933,0.5,0.54,0.6267,0.1467,0.0,0.0133,0.04,0.0933,0.2986,0.5722,0.03665,0.48226950354609927,0.3333333333333333,0.7531808510638297,0.5270222222222222,0.5033557046979866,1.0,0.4752475247524752,0.32653061224489793,0.6241610738255033,0.0,0.12131946308724832,0.0417,0.20408163265306123,0.3333333333333333,0.582434693877551,0.45990000000000003,1.0,1.0,1.0,1.0,1.0,1.0
spinach,zero-shot,gpt-4.1-mini-2025-04-14,0.4333,0.1933,0.5533,0.4,0.5,0.0733,0.2083,,,0.7356,0.5728,0.1756,0.2803,0.402,,,0.44,0.3667,0.5667,0.5067,0.1133,0.0067,0.0133,0.0133,0.0667,0.2083,0.402,0.024999999999999998,0.4326241134751773,0.4444444444444444,0.7384581560283688,0.6912888888888888,0.3888888888888889,0.6458333333333334,0.391304347826087,0.4015748031496063,0.5033557046979866,0.0,0.17506040268456377,0.2632,0.18309859154929578,0.375,0.5659859154929577,0.6945625,1.0,1.0,1.0,1.0,1.0,1.0
spinach,zero-shot,gpt-4.1-nano-2025-04-14,0.3333,0.1467,0.54,0.42,0.58,0.04,0.2095,,,0.6183,0.4573,0.2252,0.1737,0.422,,,0.4667,0.5467,0.5267,0.5333,0.96,0.0333,0.0333,0.0933,0.1333,0.2095,0.422,0.0733,0.375,0.16666666666666666,0.6387541666666667,0.5364633333333333,0.6226415094339622,0.4948453608247423,0.4090909090909091,0.42452830188679247,0.5825242718446602,0.574468085106383,0.25094660194174756,0.16884468085106383,,0.14666666666666667,,0.4573333333333333,1.0,1.0,1.0,1.0,1.0,1.0
spinach,zero-shot,gpt-4o,0.5467,0.28,0.5733,0.5333,0.6667,0.0933,0.375,,,0.7352,0.5709,0.2254,0.2945,0.6048,,,0.5533,0.4933,0.4933,0.66,0.3733,0.2533,0.2267,0.2467,0.4533,0.375,0.6048,0.295,0.5531914893617021,0.4444444444444444,0.7482113475177306,0.5310333333333334,0.5892857142857143,0.5638297872340425,0.625,0.5223880597014925,0.6644295302013423,1.0,0.2269510067114094,0.0,0.27,0.3,0.587908,0.536892,1.0,1.0,1.0,1.0,1.0,1.0
spinach,zero-shot,gpt-5,0.64,0.46,0.6733,0.6733,0.7133,0.2267,0.6458,,,0.7812,0.6984,0.2376,0.4621,0.812,,,0.6067,0.6733,0.6733,0.74,0.3067,0.38,0.3667,0.2933,0.3333,0.6458,0.812,0.343325,0.635036496350365,0.6923076923076923,0.7797094890510949,0.7967,0.678082191780822,0.5,0.678082191780822,0.5,0.7394366197183099,0.25,0.2190225352112676,0.5666625,0.45588235294117646,0.5,0.7080735294117647,0.6046000000000001,1.0,1.0,1.0,1.0,1.0,1.0
spinach,zero-shot,gpt-5-mini,0.6867,0.5,0.6467,0.7067,0.6133,0.1467,0.6667,,,0.8057,0.6854,0.3581,0.3551,0.7911,,,0.6467,0.64,0.6933,0.64,0.2133,0.5133,0.4733,0.4,0.5333,0.6667,0.7911,0.47997500000000004,0.6838235294117647,0.7142857142857143,0.8100676470588235,0.7637857142857144,0.6462585034013606,0.6666666666666666,0.7027027027027027,1.0,0.6376811594202898,0.3333333333333333,0.3312384057971014,0.6666666666666666,0.5071428571428571,0.4,0.6892107142857142,0.6317900000000001,1.0,1.0,1.0,1.0,1.0,1.0
spinach,zero-shot,gpt-5-nano,0.6267,0.4267,0.6,0.6333,0.68,0.18,0.6181,,,0.7586,0.6127,0.2948,0.3619,0.769,,,0.5867,0.5733,0.6467,0.68,0.24,0.4,0.42,0.3333,0.4467,0.6181,0.769,0.4,0.6176470588235294,0.7142857142857143,0.7509573529411765,0.8332428571428572,0.6065573770491803,0.5714285714285714,0.6544117647058824,0.42857142857142855,0.7109375,0.5,0.2594765625,0.5,0.4233576642335766,0.46153846153846156,0.6020861313868614,0.7247076923076923,1.0,1.0,1.0,1.0,1.0,1.0
spinach,zero-shot,gpt-oss:20b,0.2333,0.1133,0.3533,0.2667,0.84,0.08,,,,0.4207,0.35,0.0686,0.2254,,,,0.3067,0.38,0.2667,0.7667,0.16,0.1533,0.1267,0.1267,0.1533,,,0.14,0.24817518248175183,0.07692307692307693,0.4309810218978102,0.31274615384615384,0.35294117647058826,0.35714285714285715,0.2602739726027397,0.5,0.8421052631578947,0.8235294117647058,0.06743458646616542,0.0777294117647059,0.10869565217391304,0.16666666666666666,0.35218260869565216,0.3245166666666667,1.0,1.0,1.0,1.0,1.0,1.0
spinach,zero-shot,grok-3-mini,0.3333,0.2733,0.5533,0.4733,0.88,0.2667,,,,0.6511,0.6204,0.0391,0.461,,,,0.38,0.5267,0.4733,0.8667,0.3133,0.04,0.06,0.08,0.1333,,,0.078325,0.34306569343065696,0.23076923076923078,0.6512408759124088,0.6496153846153846,0.5428571428571428,0.7,0.47183098591549294,0.5,0.8835616438356164,0.75,0.03603150684931507,0.15,0.2867132867132867,0.0,0.6321328671328672,0.3802285714285714,1.0,1.0,1.0,1.0,1.0,1.0
spinach,zero-shot,llama3.1:70b,0.2267,0.0867,0.32,0.26,0.7333,0.0467,,,,0.4951,0.3904,0.0865,0.2086,,,,0.2467,0.6133,0.6467,0.7333,0.0667,0.0067,0.0067,0.0,0.0733,,,0.021675,0.23129251700680273,0.0,0.49701700680272104,0.4001333333333334,0.08333333333333333,0.34057971014492755,0.28125,0.2542372881355932,0.7364864864864865,0.5,0.08600675675675676,0.125,0.08163265306122448,0.3333333333333333,0.38821360544217687,0.49510000000000004,1.0,1.0,1.0,1.0,1.0,1.0
spinach,zero-shot,llama3.1:8b,0.02,0.0,0.0333,0.0333,0.7933,0.0,,,,0.1735,0.1221,0.0517,0.0628,,,,0.9333,0.8667,0.8867,0.2867,0.96,0.0,0.0067,0.0267,0.0333,,,0.016675000000000002,0.1111111111111111,0.014184397163120567,0.2761888888888889,0.16691843971631204,0.058823529411764705,0.03007518796992481,0.0,0.036231884057971016,0.75,0.8015873015873016,0.04834583333333333,0.05236587301587301,0.0,0.0,0.27935000000000004,0.11550138888888888,1.0,1.0,1.0,1.0,1.0,1.0
spinach,zero-shot,mistral-small:24b,0.4931,0.2708,0.4931,0.4514,0.5833,0.0347,,,,0.6879,0.517,0.3129,0.2091,,,,0.5139,0.5,0.5417,0.5764,0.8889,0.1875,0.2361,0.2639,0.4097,,,0.2743,0.5037037037037037,0.3333333333333333,0.6869148148148149,0.7030444444444445,0.0,0.4965034965034965,0.0,0.45454545454545453,0.5804195804195804,1.0,0.3151132867132867,0.0,0.45454545454545453,0.2556390977443609,0.5886363636363637,0.5110616541353384,1.0,1.0,1.0,1.0,1.0,1.0
spinach,zero-shot,o3,0.4167,0.3403,0.5833,0.5347,0.9097,0.3125,0.3194,,,0.7225,0.6434,0.0313,0.5242,0.5517,,,0.4444,0.5694,0.5347,0.8611,0.4028,0.0417,0.0764,0.0556,0.0833,0.3194,0.5517,0.06425,0.4253731343283582,0.3,0.7307895522388058,0.6111,0.5785714285714286,0.75,0.5357142857142857,0.5,0.9172932330827067,0.8181818181818182,0.031393984962406016,0.03003636363636364,0.37404580152671757,0.0,0.6693282442748093,0.38178461538461533,1.0,1.0,1.0,1.0,1.0,1.0
synthetic,classification,deepseek-chat,0.9333,0.7733,0.9067,0.9133,0.9333,0.74,0.56,0.5,0.4867,0.9579,0.9413,0.0609,0.8659,0.7571,0.7342,0.7401,,,,,,0.08,0.1,0.0933,0.1,0.5156,0.7438,0.09332499999999999,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
synthetic,classification,deepseek-reasoner,0.7267,0.8733,0.9267,0.9533,0.88,0.7667,0.3933,0.3333,0.4533,0.7768,0.9456,0.1183,0.8357,0.6352,0.6168,0.7358,,,,,,0.06,0.0733,0.0867,0.12,0.3933,0.6626,0.08499999999999999,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0017,0.0
synthetic,classification,gemini-2.0-flash,0.8467,0.8467,1.0,0.9133,0.7467,0.64,0.48,0.4333,0.4533,0.9226,0.942,0.1848,0.7404,0.7256,0.6496,0.6439,0.8733,0.48,,,0.6467,0.08,0.0733,1.0,0.2,0.4555,0.673,0.338325,0.875,0.16666666666666666,0.9515722222222222,0.2262833333333333,1.0,1.0,,,,,,,0.8523489932885906,0.0,0.9475738255033558,0.1176,0.0,0.0,0.0,0.0,0.0002,0.0
synthetic,classification,gemini-2.5-flash,0.8667,0.92,1.0,0.9733,0.9733,0.9067,0.4667,0.42,0.4667,0.9151,0.9651,0.021,0.9298,0.7421,0.7173,0.7786,0.8933,0.96,,,0.92,0.04,0.0333,1.0,0.04,0.4511,0.746,0.278325,0.9318181818181818,0.3888888888888889,0.9392681818181818,0.7379944444444445,1.0,1.0,,,,,,,0.9315068493150684,0.5,0.972204794520548,0.70535,0.0,0.0,0.0,0.0,0.0001,0.0
synthetic,classification,gemini-2.5-pro,0.7733,0.6733,1.0,0.8267,0.9667,0.66,0.5067,0.3733,0.4267,0.8069,0.7644,0.0179,0.7276,0.7361,0.6142,0.669,0.8333,0.9467,,,0.7067,0.1067,0.0867,1.0,0.1133,0.4356,0.6731,0.326675,0.8473282442748091,0.2631578947368421,0.8384595419847328,0.5889842105263158,1.0,1.0,,,,,,,0.7175572519083969,0.3684210526315789,0.7866877862595419,0.6106315789473684,0.0,0.0,0.0,0.0,0.0,0.0
synthetic,classification,gpt-4.1-2025-04-14,0.9,0.86,0.7,0.98,0.9467,0.82,0.44,0.4133,0.4533,0.9626,0.9656,0.0374,0.934,0.7658,0.7448,0.76,0.92,0.6933,,,0.8,0.02,0.0267,0.0267,0.04,0.4355,0.7569,0.02835,0.9424460431654677,0.36363636363636365,0.9865791366906475,0.6599636363636364,0.7034482758620689,0.6,,,,,,,0.8620689655172413,0.8,0.9707220689655174,0.8173999999999999,0.0,0.0,0.5643,0.0,0.0,0.0
synthetic,classification,gpt-4.1-mini-2025-04-14,0.86,0.8733,0.56,0.98,0.9133,0.7933,0.3867,0.3267,0.4,0.9424,0.9551,0.0682,0.8945,0.6601,0.6615,0.6947,0.82,0.4067,,,0.74,0.06,0.0667,0.0333,0.0867,0.3711,0.6721,0.061674999999999994,0.8863636363636364,0.6666666666666666,0.964239393939394,0.7821666666666667,0.4358974358974359,0.6036036036036037,,,,,,,0.8768115942028986,0.8333333333333334,0.95828115942029,0.91865,0.0,0.0,0.5,0.0,0.0,0.0
synthetic,classification,gpt-4.1-nano-2025-04-14,0.9667,0.62,0.44,0.62,0.34,0.0,0.94,0.94,0.9467,0.9727,0.7599,0.6521,0.0,0.94,0.94,0.9467,0.64,0.5133,,,1.0,0.9667,0.9467,0.4267,0.6133,0.9422,0.9422,0.7383500000000001,0.9504950495049505,1.0,0.9594534653465346,1.0,0.41025641025641024,0.45045045045045046,,,,,,,,0.62,,0.7599073333333333,0.0,0.0,0.9036,0.0024,1.0,1.0
synthetic,classification,gpt-4o,0.9467,0.9,1.0,0.9667,0.5933,0.5133,0.2867,0.2733,0.54,0.6222,0.9647,0.394,0.5765,0.4659,0.4302,0.6898,0.9467,0.76,,,0.6867,0.3533,0.3533,1.0,0.4067,0.3667,0.5286,0.5283249999999999,0.9652777777777778,0.5,0.6356152777777777,0.3,1.0,1.0,,,,,,,0.9264705882352942,0.8780487804878049,0.9824352941176471,0.9500317073170732,0.0,0.0,0.0,0.0,0.9876,0.0
synthetic,classification,gpt-5,0.8933,0.8867,1.0,0.9733,0.8867,0.7867,0.6,0.5867,0.6133,0.9599,0.9285,0.1133,0.8621,0.8146,0.7942,0.8114,0.9133,0.9733,,,0.82,0.0933,0.0933,1.0,0.1667,0.6,0.8067,0.338325,0.9416058394160584,0.38461538461538464,0.9900080291970803,0.6427923076923077,1.0,1.0,,,,,,,0.903448275862069,0.4,0.9398124137931035,0.599,0.0,0.0,0.0,0.0,0.3318,0.0
synthetic,classification,gpt-5-mini,0.8733,0.6,1.0,0.68,0.84,0.4733,0.5467,0.5733,0.56,0.9382,0.7438,0.16,0.5535,0.7441,0.7764,0.7569,0.8467,0.96,,,0.5067,0.1933,0.1867,1.0,0.2,0.56,0.7591,0.39499999999999996,0.8913043478260869,0.6666666666666666,0.9543369565217391,0.7522083333333333,1.0,1.0,,,,,,,0.6068965517241379,0.4,0.7449213793103449,0.7101200000000001,0.0,0.0,0.0,0.0704,0.0494,0.0
synthetic,classification,gpt-5-nano,0.78,0.6467,1.0,0.74,0.2333,0.0,0.0933,0.5533,0.06,0.7872,0.7296,0.7667,0.0864,0.1053,0.5587,0.0766,0.7333,0.9267,,,0.0933,0.0467,0.06,1.0,0.0067,0.2355,0.2469,0.27835,0.7851851851851852,0.7333333333333333,0.7920688888888888,0.7435866666666667,1.0,1.0,,,,,,,0.6617647058823529,0.5,0.739535294117647,0.6333357142857142,0.0001,0.0,0.0,0.0019,1.0,1.0
synthetic,classification,gpt-oss:20b,0.6533,0.46,1.0,0.7533,0.8933,0.4333,0.2733,0.2733,0.2733,0.8246,0.7567,0.051,0.6858,0.5664,0.5924,0.627,0.6667,0.9333,,,0.44,0.0467,0.04,1.0,0.06,0.2733,0.5953,0.286675,0.6739130434782609,0.4166666666666667,0.8499079710144928,0.5338416666666667,1.0,1.0,,,,,,,0.46153846153846156,0.42857142857142855,0.7595223776223776,0.6984714285714285,0.0,0.0,0.0,0.0,0.0068,0.0
synthetic,classification,grok-3-mini,0.9,0.82,1.0,0.9333,0.9533,0.78,0.4133,0.4067,0.4267,0.9591,0.9565,0.0467,0.8982,0.7163,0.7014,0.6994,0.9133,0.9667,,,0.8,0.04,0.0467,1.0,0.0467,0.4156,0.7057,0.28335,0.9420289855072463,0.4166666666666667,0.9890014492753624,0.6151166666666666,1.0,1.0,,,,,,,0.8344827586206897,0.4,0.9644206896551725,0.728,0.0,0.0,0.0,0.0,0.0064,0.0
synthetic,classification,llama3.1:70b,0.82,0.6,1.0,0.88,0.92,0.6,0.2667,0.2667,0.22,0.9204,0.8955,0.0327,0.8533,0.6103,0.5952,0.5764,0.8733,0.1067,,,0.5533,0.0067,0.02,1.0,0.0133,0.2511,0.594,0.26,0.8661971830985915,0.0,0.9566584507042253,0.27635,1.0,1.0,,,,,,,0.5950413223140496,0.6206896551724138,0.8982983471074379,0.8837482758620689,0.0,0.0,0.0,0.0,0.0,0.0
synthetic,classification,llama3.1:8b,0.2267,0.1067,1.0,0.42,0.68,0.0867,0.1067,0.0933,0.0733,0.6402,0.5797,0.0901,0.4721,0.4224,0.3968,0.3736,0.7533,0.1333,,,0.74,0.0,0.0,1.0,0.0,0.0911,0.3976,0.25,0.3333333333333333,0.2198581560283688,0.8546333333333334,0.6264758865248227,1.0,1.0,,,,,,,0.07692307692307693,0.11290322580645161,0.5014615384615385,0.5961056451612905,0.0,0.0,0.0,0.0,0.9948,0.0001
synthetic,classification,mistral-small:24b,0.8533,0.5667,1.0,0.8333,0.9133,0.5333,0.36,0.32,0.3333,0.9257,0.8309,0.07,0.7751,0.671,0.6502,0.6517,0.8733,0.0067,,,0.46,0.0267,0.0333,1.0,0.0733,0.3378,0.6576,0.283325,0.916030534351145,0.42105263157894735,0.9752167938931298,0.5845947368421053,1.0,1.0,,,,,,,0.5263157894736842,0.5725190839694656,0.8570789473684212,0.8271473282442748,0.0,0.0,0.0,0.0,0.0,0.0
synthetic,classification,o3,0.9133,0.9,1.0,0.98,0.98,0.8867,0.3933,0.36,0.36,0.9767,0.9632,0.0182,0.9574,0.7092,0.7111,0.7131,0.8933,0.9733,,,0.92,0.0067,0.0067,1.0,0.0267,0.3711,0.7111,0.260025,0.9548872180451128,0.5882352941176471,0.9952624060150376,0.8319176470588237,1.0,1.0,,,,,,,0.9230769230769231,0.42857142857142855,0.9767545454545455,0.6867571428571428,0.0,0.0,0.0,0.0,0.0,0.0
synthetic,fixing,deepseek-chat,0.8933,0.8533,0.92,0.9867,0.9667,0.8267,0.6267,0.6733,0.6267,0.9525,0.9625,0.0259,0.877,0.8214,0.8428,0.8304,,,,,,0.0267,0.02,0.06,0.0867,0.6422,0.8315,0.048350000000000004,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
synthetic,fixing,deepseek-reasoner,0.7533,0.6667,0.7933,0.8733,0.9667,0.6533,0.2667,0.2467,0.2867,0.8847,0.8887,0.0142,0.8733,0.5773,0.5358,0.5834,,,,,,0.0067,0.0067,0.0333,0.02,0.2667,0.5655,0.016675000000000002,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
synthetic,fixing,gemini-2.0-flash,0.86,0.84,0.8733,0.9667,0.9867,0.8267,0.8333,0.7867,0.8067,0.8733,0.958,0.0007,0.9087,0.9335,0.9037,0.9159,,,,,,0.0067,0.0,0.0067,0.06,0.8089,0.9177,0.018349999999999998,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
synthetic,fixing,gemini-2.5-flash,0.9,0.9133,0.94,0.96,0.9733,0.9,0.78,0.7467,0.7667,0.9388,0.9457,0.0267,0.9118,0.8867,0.8675,0.8694,,,,,,0.0267,0.02,0.0267,0.06,0.7645,0.8745,0.033350000000000005,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0004,0.0
synthetic,fixing,gemini-2.5-pro,0.68,0.7733,0.82,0.8867,0.9733,0.7667,0.3867,0.4333,0.3133,0.6098,0.8262,0.025,0.8025,0.3909,0.5311,0.4324,,,,,,0.24,0.24,0.34,0.0733,0.3778,0.4515,0.22332500000000002,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
synthetic,fixing,gpt-4.1-2025-04-14,0.8867,0.8667,0.9533,0.9667,0.9733,0.8467,0.6867,0.6933,0.6933,0.9662,0.9711,0.01,0.8919,0.9175,0.9131,0.9145,,,,,,0.0133,0.0133,0.0267,0.08,0.6911,0.915,0.033325,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
synthetic,fixing,gpt-4.1-mini-2025-04-14,0.8067,0.8,0.9133,0.9667,0.86,0.76,0.6267,0.6533,0.6733,0.9151,0.9315,0.0639,0.8387,0.8616,0.8986,0.8651,,,,,,0.02,0.04,0.06,0.0467,0.6511,0.8751,0.041675,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
synthetic,fixing,gpt-4.1-nano-2025-04-14,0.5533,0.3733,0.6133,0.7,0.5733,0.22,0.4933,0.5333,0.5133,0.6813,0.6589,0.2566,0.4434,0.6822,0.7098,0.6864,,,,,,0.06,0.12,0.16,0.1067,0.5133,0.6928,0.111675,,,,,,,,,,,,,,,,,0.0001,0.0,0.0432,0.0,0.6439,0.0
synthetic,fixing,gpt-4o,0.8133,0.7667,0.9467,0.96,0.9733,0.7667,0.54,0.56,0.5933,0.7462,0.7764,0.0157,0.7361,0.6078,0.6199,0.6514,,,,,,0.1733,0.18,0.2333,0.1867,0.5644,0.6264,0.193325,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
synthetic,fixing,gpt-5,0.88,0.9,0.9667,0.9933,0.9267,0.8267,0.5667,0.56,0.5467,0.9697,0.9482,0.0733,0.8959,0.7964,0.7859,0.7853,,,,,,0.1267,0.1333,0.12,0.1467,0.5578,0.7892,0.131675,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0392,0.0
synthetic,fixing,gpt-5-mini,0.8267,0.78,0.8867,0.9533,0.8533,0.66,0.5733,0.5667,0.6,0.9004,0.8703,0.142,0.7797,0.7607,0.7453,0.7902,,,,,,0.2667,0.2467,0.2667,0.2533,0.58,0.7654,0.25835,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0113,0.0
synthetic,fixing,gpt-5-nano,0.9067,0.7733,0.8467,0.98,0.9067,0.6933,0.5533,0.58,0.56,0.9517,0.8801,0.093,0.7908,0.75,0.7743,0.7542,,,,,,0.1867,0.1733,0.1667,0.2533,0.5644,0.7595,0.195,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0835,0.0
synthetic,fixing,gpt-oss:20b,0.6667,0.6867,0.82,0.9067,0.9933,0.6867,0.3133,0.3333,0.3067,0.8429,0.8644,0.0009,0.8644,0.6195,0.6303,0.5971,,,,,,0.04,0.0333,0.0733,0.0867,0.3178,0.6156,0.058325,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
synthetic,fixing,grok-3-mini,0.8933,0.8733,0.9333,0.9933,0.9667,0.8533,0.5067,0.4333,0.4267,0.9532,0.9679,0.0227,0.9224,0.7509,0.6881,0.7234,,,,,,0.0333,0.0533,0.06,0.0667,0.4556,0.7208,0.053325,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0036,0.0
synthetic,fixing,llama3.1:70b,0.68,0.6667,0.8067,0.8733,0.9667,0.66,0.2867,0.3333,0.2867,0.8008,0.8717,0.0085,0.8449,0.6224,0.627,0.5811,,,,,,0.0,0.0333,0.06,0.0267,0.3022,0.6102,0.03,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
synthetic,fixing,llama3.1:8b,0.2067,0.04,0.2133,0.1733,0.6267,0.04,0.06,0.0467,0.0533,0.5639,0.3906,0.1097,0.2903,0.2753,0.2564,0.2895,,,,,,0.0,0.0,0.0067,0.0,0.0533,0.2737,0.001675,,,,,,,,,,,,,,,,,0.0,0.0156,0.0,0.0,0.9999,0.0156
synthetic,fixing,mistral-small:24b,0.7533,0.5533,0.84,0.9533,0.9533,0.5333,0.5333,0.58,0.56,0.882,0.818,0.0188,0.7938,0.7864,0.8008,0.8029,,,,,,0.04,0.0133,0.1,0.06,0.5578,0.7967,0.053325,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
synthetic,fixing,o3,0.7667,0.9067,0.96,0.9733,1.0,0.9067,0.4667,0.4,0.4,0.9182,0.9312,0.0,0.9321,0.715,0.6663,0.6517,,,,,,0.04,0.0333,0.0333,0.0467,0.4222,0.6777,0.038325,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
synthetic,wikidata,deepseek-chat,0.4867,0.2067,0.5667,0.5533,0.5733,0.1,,,,0.6873,0.5714,0.1657,0.3056,,,,,,,,,0.06,0.06,0.1333,0.1933,,,0.11165,,,,,,,,,,,,,,,,,0.3555,0.6682,0.14,0.0717,0.4357,0.927
synthetic,wikidata,gemini-2.0-flash,0.4867,0.1933,0.56,0.52,0.5733,0.1,,,,0.7258,0.6022,0.1475,0.3962,,,,,,,,,0.0067,0.0333,0.0333,0.04,,,0.028325000000000003,,,,,,,,,,,,,,,,,0.061,0.4159,0.2664,0.0481,0.3746,0.5
synthetic,wikidata,gemini-2.5-flash,0.3333,0.2467,0.6333,0.5133,0.8467,0.2133,,,,0.5816,0.5658,0.0465,0.4459,,,,,,,,,0.1467,0.1067,0.0867,0.1733,,,0.12835000000000002,,,,,,,,,,,,,,,,,0.9998,0.997,0.3417,0.8389,0.8275,0.997
synthetic,wikidata,gemini-2.5-pro,0.42,0.3,0.5933,0.5733,0.8067,0.28,,,,0.7499,0.6982,0.0404,0.5896,,,,,,,,,0.0133,0.0133,0.0067,0.0133,,,0.011649999999999999,,,,,,,,,,,,,,,,,0.7796,0.9393,0.678,0.0403,0.5841,0.8115
synthetic,wikidata,gpt-4.1-2025-04-14,0.62,0.2933,0.6867,0.5733,0.6267,0.18,,,,0.8506,0.6977,0.1254,0.4714,,,,,,,,,0.0,0.0,0.0267,0.04,,,0.016675000000000002,,,,,,,,,,,,,,,,,0.345,0.3238,0.7294,0.2122,0.3953,0.377
synthetic,wikidata,gpt-4.1-mini-2025-04-14,0.3267,0.08,0.4,0.3067,0.68,0.0467,,,,0.5417,0.3843,0.0802,0.2859,,,,,,,,,0.02,0.04,0.02,0.08,,,0.04,,,,,,,,,,,,,,,,,0.9995,0.9995,0.9999,1.0,0.0007,0.9673
synthetic,wikidata,gpt-4.1-nano-2025-04-14,0.36,0.0933,0.4933,0.5067,0.7667,0.06,,,,0.6337,0.3993,0.0745,0.2804,,,,,,,,,0.0267,0.0667,0.4667,0.1,,,0.165025,,,,,,,,,,,,,,,,,0.6494,0.8338,0.7121,0.1163,0.0002,0.623
synthetic,wikidata,gpt-4o,0.8533,0.6467,0.6933,0.84,0.3,0.02,,,,0.8668,0.7349,0.6846,0.1987,,,,,,,,,0.84,0.8133,0.7267,0.8267,,,0.8016749999999999,,,,,,,,,,,,,,,,,0.0,0.0,0.0128,0.0,1.0,1.0
synthetic,wikidata,gpt-5,0.6667,0.54,0.7333,0.78,0.8667,0.44,,,,0.8036,0.7549,0.0975,0.7062,,,,,,,,,0.2133,0.2333,0.12,0.2067,,,0.193325,,,,,,,,,,,,,,,,,0.012,0.0023,0.0063,0.0033,0.5841,0.0222
synthetic,wikidata,gpt-5-mini,0.6067,0.46,0.62,0.66,0.7067,0.2267,,,,0.7246,0.6434,0.2502,0.4476,,,,,,,,,0.3933,0.36,0.2867,0.4067,,,0.361675,,,,,,,,,,,,,,,,,0.1802,0.012,0.0587,0.1312,0.9855,0.4253
synthetic,wikidata,gpt-5-nano,0.6667,0.4133,0.66,0.6333,0.8,0.2667,,,,0.8043,0.6404,0.1655,0.4962,,,,,,,,,0.3067,0.3133,0.1933,0.3,,,0.278325,,,,,,,,,,,,,,,,,0.0147,0.566,0.7288,0.1802,0.9867,0.9331
synthetic,wikidata,gpt-oss:20b,0.2933,0.2267,0.5267,0.4067,0.8333,0.1933,,,,0.5579,0.5247,0.0532,0.4524,,,,,,,,,0.0667,0.0533,0.04,0.08,,,0.06,,,,,,,,,,,,,,,,,0.3642,0.5,0.3179,0.5,0.1885,0.3388
synthetic,wikidata,grok-3-mini,0.4467,0.3067,0.6133,0.5933,0.8867,0.3,,,,0.7327,0.6889,0.0221,0.6184,,,,,,,,,0.02,0.0133,0.02,0.0267,,,0.02,,,,,,,,,,,,,,,,,0.6399,0.9242,0.7077,0.1553,0.5,0.6494
synthetic,wikidata,llama3.1:70b,0.3467,0.1067,0.4733,0.3667,0.6133,0.0467,,,,0.6352,0.4944,0.13,0.2851,,,,,,,,,0.0067,0.0,0.0067,0.0,,,0.00335,,,,,,,,,,,,,,,,,0.1725,0.2905,0.0821,0.1055,0.8198,0.6875
synthetic,wikidata,llama3.1:8b,0.0667,0.0,0.2,0.1333,0.62,0.0,,,,0.3819,0.296,0.1017,0.1473,,,,,,,,,0.0,0.0,0.0067,0.0133,,,0.005,,,,,,,,,,,,,,,,,0.0547,1.0,0.0,0.0,0.9999,1.0
synthetic,wikidata,mistral-small:24b,0.66,0.3867,0.62,0.5333,0.4067,0.02,,,,0.7292,0.5659,0.5147,0.1662,,,,,,,,,0.4933,0.5,0.5733,0.7067,,,0.5683250000000001,,,,,,,,,,,,,,,,,0.0,0.0,0.1958,0.0408,0.993,0.8125
synthetic,wikidata,o3,0.5333,0.34,0.66,0.5333,0.8467,0.3067,,,,0.8006,0.723,0.0287,0.6603,,,,,,,,,0.0467,0.0267,0.0133,0.0267,,,0.02835,,,,,,,,,,,,,,,,,0.0001,0.0401,0.0018,0.0235,0.7483,0.068
synthetic,zero-shot,deepseek-chat,0.4667,0.2133,0.5133,0.4867,0.56,0.1267,0.3467,,,0.6754,0.5407,0.1626,0.3156,0.5964,,,0.4933,0.5067,0.5133,0.5667,0.1733,0.0267,0.0533,0.1067,0.1333,0.3467,0.5964,0.08,0.4791666666666667,0.16666666666666666,0.6860819444444445,0.4200833333333333,0.5230769230769231,0.5058823529411764,0.5,0.4827586206896552,0.5637583892617449,0.0,0.16032416107382552,0.5,0.20437956204379562,0.3076923076923077,0.5470875912408759,0.4735384615384615,1.0,1.0,1.0,1.0,1.0,1.0
synthetic,zero-shot,deepseek-reasoner,0.2867,0.2333,0.5267,0.4467,0.7533,0.18,0.25,,,0.5706,0.5545,0.0427,0.4397,0.7556,,,0.3933,0.5467,0.4467,0.7533,0.2067,0.0133,0.0133,0.0067,0.0333,0.25,0.7556,0.016649999999999998,0.3181818181818182,0.05555555555555555,0.6173318181818181,0.22810555555555556,0.5379310344827586,0.2,0.4444444444444444,0.5,0.7602739726027398,0.5,0.04072876712328767,0.114225,0.23972602739726026,0.0,0.5603821917808219,0.3409,1.0,1.0,1.0,1.0,1.0,1.0
synthetic,zero-shot,gemini-2.0-flash,0.4267,0.18,0.5267,0.44,0.5533,0.0933,0.6067,,,0.7391,0.5706,0.1478,0.364,0.8385,,,0.5333,0.5067,0.54,0.58,0.36,0.0,0.0067,0.0,0.0,0.6067,0.8385,0.001675,0.4732142857142857,0.2894736842105263,0.7715982142857143,0.6432763157894736,0.5238095238095238,0.5333333333333333,0.4482758620689655,0.4380165289256198,0.5704225352112676,0.25,0.1405725352112676,0.2763375,0.19811320754716982,0.13636363636363635,0.576138679245283,0.5573022727272726,1.0,1.0,1.0,1.0,1.0,1.0
synthetic,zero-shot,gemini-2.5-flash,0.4667,0.3467,0.6067,0.5533,0.8733,0.3133,0.4733,,,0.7421,0.6804,0.0241,0.5958,0.7429,,,0.5467,0.6067,0.56,0.8867,0.3467,0.02,0.0267,0.0133,0.0333,0.4733,0.7429,0.023325000000000002,0.5074626865671642,0.125,0.7727656716417911,0.48540000000000005,0.6111111111111112,0.5,0.5586206896551724,0.4,0.8904109589041096,0.25,0.013611643835616439,0.40675,0.35664335664335667,0.14285714285714285,0.6941531468531469,0.3986,1.0,1.0,1.0,1.0,1.0,1.0
synthetic,zero-shot,gemini-2.5-pro,0.4467,0.3533,0.6067,0.4933,0.8067,0.3067,0.4267,,,0.7282,0.7069,0.0412,0.5999,0.766,,,0.52,0.6067,0.52,0.8267,0.3467,0.0133,0.0333,0.0,0.0133,0.4267,0.766,0.014975,0.48175182481751827,0.07692307692307693,0.7513175182481752,0.4844,0.6095890410958904,0.5,0.5069444444444444,0.16666666666666666,0.8231292517006803,0.0,0.028610884353741498,0.6563666666666667,0.3680555555555556,0.0,0.7177263888888888,0.4474,1.0,1.0,1.0,1.0,1.0,1.0
synthetic,zero-shot,gpt-4.1-2025-04-14,0.6,0.2733,0.7,0.54,0.6133,0.1667,0.3667,,,0.8377,0.7008,0.1287,0.4685,0.7002,,,0.6267,0.7,0.52,0.62,0.1733,0.0,0.0,0.0133,0.0267,0.3667,0.7002,0.01,0.6231884057971014,0.3333333333333333,0.8629971014492754,0.54685,0.7027027027027027,0.5,0.5412844036697247,0.5365853658536586,0.6174496644295302,0.0,0.12287248322147651,1.0,0.2751677852348993,0.0,0.7047476510067116,0.1154,1.0,1.0,1.0,1.0,1.0,1.0
synthetic,zero-shot,gpt-4.1-mini-2025-04-14,0.4667,0.18,0.5533,0.5333,0.52,0.08,0.3267,,,0.75,0.6301,0.1726,0.3793,0.6579,,,0.52,0.44,0.5067,0.5333,0.0933,0.0,0.0,0.0,0.04,0.3267,0.6579,0.01,0.49242424242424243,0.2777777777777778,0.7795075757575757,0.5336222222222222,0.4897959183673469,0.5841584158415841,0.59375,0.5169491525423728,0.527027027027027,0.0,0.16139121621621622,1.0,0.1780821917808219,0.25,0.6355294520547945,0.43332499999999996,1.0,1.0,1.0,1.0,1.0,1.0
synthetic,zero-shot,gpt-4.1-nano-2025-04-14,0.3667,0.1133,0.5133,0.4467,0.5867,0.06,0.2733,,,0.6469,0.5003,0.1767,0.3219,0.4982,,,0.5133,0.3867,0.5067,0.4867,0.94,0.0133,0.0133,0.1,0.0667,0.2733,0.4982,0.04832499999999999,0.40816326530612246,0.28846153846153844,0.7081683673469389,0.5313999999999999,0.3770491803278688,0.6067415730337079,0.4461538461538462,0.4470588235294118,0.5578947368421052,0.6363636363636364,0.15809473684210523,0.20870181818181818,,0.11333333333333333,,0.5003313333333334,1.0,1.0,1.0,1.0,1.0,1.0
synthetic,zero-shot,gpt-4o,0.4067,0.16,0.5533,0.4133,0.7,0.0867,0.4533,,,0.6798,0.5558,0.1601,0.3557,0.743,,,0.4067,0.4933,0.5733,0.7067,0.36,0.1267,0.1,0.1067,0.18,0.4533,0.743,0.12835000000000002,0.4,0.5,0.68373,0.6247,0.5636363636363636,0.5473684210526316,0.4375,0.41044776119402987,0.7046979865771812,0.0,0.15450268456375837,1.0,0.13861386138613863,0.20408163265306123,0.5680910891089108,0.5305061224489795,1.0,1.0,1.0,1.0,1.0,1.0
synthetic,zero-shot,gpt-5,0.5533,0.3933,0.6067,0.6467,0.8667,0.34,0.5133,,,0.7869,0.6867,0.0456,0.672,0.734,,,0.5733,0.6067,0.6467,0.88,0.3667,0.1533,0.1267,0.0467,0.1333,0.5133,0.734,0.11500000000000002,0.5714285714285714,0.4117647058823529,0.8037706766917293,0.6551823529411764,0.6095890410958904,0.5,0.6486486486486487,0.5,0.8835616438356164,0.25,0.026320547945205476,0.75,0.3972602739726027,0.25,0.6924650684931506,0.47467500000000007,1.0,1.0,1.0,1.0,1.0,1.0
synthetic,zero-shot,gpt-5-mini,0.56,0.3467,0.5333,0.6,0.7733,0.2133,0.5933,,,0.753,0.6191,0.1713,0.5131,0.7568,,,0.58,0.52,0.6267,0.7733,0.2333,0.2467,0.2333,0.1867,0.3133,0.5933,0.7568,0.245,0.5766423357664233,0.38461538461538464,0.7612912408759125,0.6659769230769231,0.5277777777777778,0.6666666666666666,0.6164383561643836,0.0,0.7808219178082192,0.5,0.16226712328767126,0.5,0.3469387755102041,0.3333333333333333,0.621130612244898,0.5212,1.0,1.0,1.0,1.0,1.0,1.0
synthetic,zero-shot,gpt-5-nano,0.56,0.4133,0.68,0.5867,0.86,0.3067,0.5533,,,0.7499,0.6659,0.1143,0.5548,0.7762,,,0.5667,0.6467,0.5867,0.82,0.3733,0.1867,0.2067,0.1467,0.26,0.5533,0.7762,0.200025,0.5673758865248227,0.4444444444444444,0.7532588652482269,0.6975666666666667,0.696,0.6,0.5955882352941176,0.5,0.875,0.7142857142857143,0.10076985294117646,0.24602857142857143,0.4253731343283582,0.3125,0.6707455223880597,0.62538125,1.0,1.0,1.0,1.0,1.0,1.0
synthetic,zero-shot,gpt-oss:20b,0.2733,0.22,0.5,0.4,0.7933,0.1733,,,,0.5755,0.5528,0.0474,0.4674,,,,0.32,0.5533,0.3933,0.78,0.22,0.0467,0.04,0.0,0.04,,,0.031675,0.28368794326241137,0.1111111111111111,0.5882375886524822,0.37645555555555554,0.5285714285714286,0.1,0.3945578231292517,0.6666666666666666,0.8071428571428572,0.6,0.032045,0.2625,0.23076923076923078,0.0,0.5609027972027972,0.3874857142857143,1.0,1.0,1.0,1.0,1.0,1.0
synthetic,zero-shot,grok-3-mini,0.4533,0.3467,0.6267,0.5467,0.88,0.3067,,,,0.7199,0.6895,0.0401,0.597,,,,0.5,0.6333,0.5533,0.8867,0.3267,0.02,0.0267,0.02,0.0467,,,0.02835,0.4744525547445255,0.23076923076923078,0.7443832116788321,0.46232307692307695,0.6363636363636364,0.42857142857142855,0.5510204081632653,0.3333333333333333,0.891156462585034,0.3333333333333333,0.0322265306122449,0.4242333333333333,0.35374149659863946,0.0,0.697943537414966,0.2738,1.0,1.0,1.0,1.0,1.0,1.0
synthetic,zero-shot,llama3.1:70b,0.3067,0.0867,0.4067,0.3067,0.6467,0.0467,,,,0.6141,0.4462,0.1147,0.2735,,,,0.3467,0.5933,0.5467,0.6533,0.06,0.0067,0.0067,0.0067,0.0133,,,0.00835,0.3194444444444444,0.0,0.6322319444444444,0.17818333333333333,0.5,0.39855072463768115,0.2708333333333333,0.3235294117647059,0.6510067114093959,0.0,0.11411006711409395,0.2,0.08783783783783784,0.0,0.4462743243243243,0.4375,1.0,1.0,1.0,1.0,1.0,1.0
synthetic,zero-shot,llama3.1:8b,0.0267,0.0,0.0333,0.0067,0.8,0.0,,,,0.2604,0.1659,0.034,0.103,,,,0.9133,0.8667,0.88,0.3067,0.98,0.0067,0.0067,0.0267,0.02,,,0.015025,0.15384615384615385,0.014598540145985401,0.3869923076923077,0.2483773722627737,0.0,0.037037037037037035,0.0,0.007518796992481203,0.7222222222222222,0.8245614035087719,0.05750833333333332,0.026518421052631577,0.0,0.0,0.1275,0.1666503401360544,1.0,1.0,1.0,1.0,1.0,1.0
synthetic,zero-shot,mistral-small:24b,0.4067,0.1733,0.5733,0.4333,0.5333,0.0267,,,,0.6445,0.5298,0.2529,0.2543,,,,0.5133,0.4267,0.5733,0.54,0.82,0.12,0.0733,0.2133,0.2933,,,0.174975,0.45454545454545453,0.05555555555555555,0.6896878787878788,0.31328888888888895,,0.5733333333333334,1.0,0.42953020134228187,0.5369127516778524,0.0,0.24789932885906038,1.0,0.25925925925925924,0.15447154471544716,0.5943666666666666,0.5156211382113821,1.0,1.0,1.0,1.0,1.0,1.0
synthetic,zero-shot,o3,0.38,0.2667,0.5267,0.4467,0.86,0.2467,0.3733,,,0.6809,0.6088,0.0317,0.5389,0.6738,,,0.46,0.5267,0.4533,0.8533,0.2733,0.0267,0.0267,0.0133,0.0333,0.3733,0.6738,0.025,0.41044776119402987,0.125,0.7124291044776119,0.4165,0.5273972602739726,0.5,0.4489795918367347,0.3333333333333333,0.8689655172413793,0.6,0.02478137931034483,0.23334000000000002,0.273972602739726,0.0,0.6191684931506849,0.228575,1.0,1.0,1.0,1.0,1.0,1.0
overall,classification,deepseek-chat,0.945,0.825,0.8467,0.9117,0.6733,0.5417,0.5033,0.48,0.4917,0.9722,0.9404,0.3101,0.6274,0.6937,0.6687,0.6839,0.8933,0.1,,,0.52,0.2167,0.2267,0.4367,0.3467,0.4917,0.6821,0.3067,0.9280575539568345,0.5454545454545454,0.9597827338129498,0.740909090909091,1.0,1.0,,,,,,,0.8571428571428571,0.8391608391608392,0.9642857142857143,0.9183146853146854,0.0,0.0,0.0,0.0,0.0001,0.0
overall,classification,deepseek-reasoner,0.785,0.7883,0.7533,0.8533,0.6967,0.515,0.4267,0.3817,0.45,0.824,0.8742,0.2966,0.5637,0.6299,0.5716,0.6319,0.6533,0.92,,,0.54,0.1883,0.1433,0.4783,0.3317,0.4195,0.6111,0.2854,0.6546762589928058,0.36363636363636365,0.6765122302158274,0.5545454545454546,1.0,1.0,,,,,,,0.6056338028169014,0.375,0.7532640845070423,0.731975,0.0,0.0,0.0,0.0,1.0,0.0
overall,classification,gemini-2.0-flash,0.8817,0.8483,1.0,0.945,0.6433,0.5383,0.42,0.3867,0.3683,0.9384,0.9292,0.2782,0.6268,0.659,0.6005,0.5822,0.885,0.4683,,,0.4617,0.155,0.1483,1.0,0.32,0.3917,0.6139,0.40582500000000005,0.8924914675767918,0.42857142857142855,0.9474943686006827,0.5566428571428571,1.0,1.0,,,,,,,0.8455056179775281,0.8524590163934426,0.9357103932584269,0.919794262295082,0.0,0.0,0.0,0.0,0.2967,0.0
overall,classification,gemini-2.5-flash,0.8983,0.93,1.0,0.9683,0.9067,0.8483,0.33,0.3383,0.3267,0.9383,0.9607,0.0882,0.8392,0.6356,0.6168,0.6381,0.875,0.9483,,,0.8533,0.0717,0.0667,1.0,0.14,0.3317,0.6302,0.3196,0.9478764478764479,0.5853658536585366,0.9560220077220076,0.8265573170731707,1.0,1.0,,,,,,,0.9457092819614711,0.6206896551724138,0.9653462346760071,0.8686103448275863,0.0,0.0,0.0,0.0,0.0004,0.0
overall,classification,gemini-2.5-pro,0.8533,0.765,1.0,0.88,0.93,0.7133,0.3733,0.3133,0.3367,0.8978,0.8616,0.0622,0.7902,0.6515,0.5929,0.6195,0.9067,0.9583,,,0.7467,0.0917,0.0883,1.0,0.14,0.3411,0.6213,0.32999999999999996,0.9269662921348315,0.25757575757575757,0.9306636704119851,0.6316727272727273,1.0,1.0,,,,,,,0.7949640287769785,0.38636363636363635,0.8822052158273381,0.6009159090909091,0.0,0.0,0.0,0.0,0.0,0.0
overall,classification,gpt-4.1-2025-04-14,0.9183,0.87,0.7033,0.975,0.87,0.7483,0.2983,0.2917,0.325,0.9754,0.9711,0.1174,0.8484,0.6209,0.6051,0.6229,0.9233,0.6967,,,0.7467,0.0517,0.0533,0.3167,0.1283,0.305,0.6163,0.13749999999999998,0.9453262786596119,0.45454545454545453,0.9865074074074074,0.7850454545454546,0.7033898305084746,0.7,,,,,,,0.8713550600343053,0.8235294117647058,0.9724267581475129,0.9273411764705882,0.0,0.0,0.0,0.0,0.0,0.0
overall,classification,gpt-4.1-mini-2025-04-14,0.8983,0.8633,0.64,0.9483,0.7717,0.64,0.345,0.3383,0.365,0.9573,0.9482,0.2183,0.7241,0.5946,0.5772,0.6007,0.8933,0.4483,,,0.625,0.1667,0.165,0.3467,0.2517,0.3494,0.5908,0.23252499999999998,0.9341864716636198,0.5283018867924528,0.971663436928702,0.808588679245283,0.653179190751445,0.6346604215456675,,,,,,,0.8738574040219378,0.7547169811320755,0.9515482632541133,0.9139528301886793,0.0,0.0,0.0,0.0,0.0,0.0
overall,classification,gpt-4.1-nano-2025-04-14,0.9733,0.565,0.7167,0.5667,0.3817,0.0,0.96,0.9583,0.955,0.9757,0.7272,0.6149,0.0017,0.9608,0.96,0.9558,0.6933,0.365,,,1.0,0.9833,0.9733,0.7133,0.565,0.9578,0.9589,0.808725,0.9651162790697675,0.9941176470588236,0.9672204651162791,0.9970588235294118,0.6622516556291391,0.734966592427617,,,,,,,,0.565,,0.7272223333333333,0.0,0.0,0.0,0.0,1.0,1.0
overall,classification,gpt-4o,0.9733,0.9367,0.9933,0.9833,0.3633,0.3067,0.6,0.32,0.4233,0.5867,0.9801,0.6279,0.3508,0.4333,0.4147,0.5083,0.965,0.6633,,,0.735,0.5683,0.5733,0.89,0.635,0.4478,0.4521,0.66665,0.9828473413379074,0.6470588235294118,0.591153859348199,0.43373529411764705,0.9925,0.995,,,,,,,0.9257142857142857,0.9411764705882353,0.9834554285714284,0.9787383529411765,0.0,0.0,0.0,0.0,1.0,0.0
overall,classification,gpt-5,0.9233,0.8817,1.0,0.965,0.7067,0.5983,0.5583,0.5717,0.5867,0.9663,0.9334,0.292,0.6683,0.7534,0.7402,0.7621,0.9133,0.9817,,,0.63,0.2717,0.27,1.0,0.3583,0.5722,0.7519,0.47500000000000003,0.9547101449275363,0.5625,0.9820644927536232,0.7848270833333334,1.0,1.0,,,,,,,0.8949211908931699,0.6206896551724138,0.9381744308231172,0.8404068965517242,0.0,0.0,0.0,0.0,0.965,0.0
overall,classification,gpt-5-mini,0.88,0.6417,1.0,0.75,0.6733,0.34,0.59,0.5933,0.5817,0.9258,0.7667,0.3267,0.4464,0.731,0.738,0.7328,0.8467,0.9617,,,0.38,0.4083,0.3883,1.0,0.4067,0.5883,0.7339,0.550825,0.8935018050541517,0.717391304347826,0.9371158844765343,0.7889826086956522,1.0,1.0,,,,,,,0.6433566433566433,0.6071428571428571,0.7665884615384616,0.7700071428571428,0.0,0.0,0.0,0.0022,0.0012,0.0
overall,classification,gpt-5-nano,0.7783,0.69,1.0,0.7683,0.195,0.005,0.09,0.5633,0.075,0.7816,0.7608,0.8039,0.0782,0.096,0.566,0.0816,0.7767,0.8933,,,0.0883,0.1117,0.1067,1.0,0.0217,0.2428,0.2479,0.310025,0.8055045871559633,0.509090909090909,0.8081271559633028,0.5191600000000001,1.0,1.0,,,,,,,0.72,0.36,0.782636,0.520666,0.0,0.0,0.0,0.0,1.0,1.0
overall,classification,gpt-oss:20b,0.6717,0.4833,1.0,0.7317,0.8467,0.38,0.2717,0.2483,0.2633,0.7939,0.7677,0.1073,0.6099,0.4892,0.4829,0.4894,0.665,0.8933,,,0.4033,0.1567,0.1183,1.0,0.2067,0.2611,0.4872,0.370425,0.6784452296819788,0.5588235294117647,0.7968805653710247,0.7450470588235295,1.0,1.0,,,,,,,0.48257839721254353,0.5,0.7658412891986063,0.8090038461538462,0.0,0.0,0.0,0.0,0.2132,0.0
overall,classification,grok-3-mini,0.9083,0.7883,1.0,0.9,0.8667,0.6683,0.3767,0.365,0.3717,0.9639,0.9366,0.1277,0.7869,0.6309,0.6203,0.6182,0.9183,0.9517,,,0.6933,0.1133,0.115,1.0,0.175,0.3711,0.6231,0.350825,0.9542124542124543,0.4444444444444444,0.986679304029304,0.73355,1.0,1.0,,,,,,,0.7951807228915663,0.5789473684210527,0.938937865748709,0.8646157894736843,0.0,0.0,0.0,0.0,0.7516,0.0
overall,classification,llama3.1:70b,0.8033,0.5733,1.0,0.8633,0.935,0.5617,0.2033,0.1767,0.18,0.9071,0.8621,0.0295,0.812,0.4882,0.4549,0.4659,0.8233,0.09,,,0.5367,0.0133,0.03,1.0,0.0333,0.1867,0.4697,0.26915,0.8230240549828178,0.16666666666666666,0.9209295532646048,0.46067777777777785,1.0,1.0,,,,,,,0.5727069351230425,0.5751633986928104,0.8625514541387023,0.8609013071895424,0.0,0.0,0.0,0.0,0.0,0.0
overall,classification,llama3.1:8b,0.1633,0.0833,1.0,0.3383,0.6817,0.05,0.045,0.0367,0.0317,0.5603,0.5018,0.0974,0.3899,0.2944,0.253,0.2497,0.81,0.1383,,,0.8267,0.0,0.0033,1.0,0.0,0.0378,0.2657,0.250825,0.19230769230769232,0.16202090592334495,0.7328115384615385,0.5525364111498258,1.0,1.0,,,,,,,0.10256410256410256,0.08045977011494253,0.5077294871794872,0.5008572796934866,0.0,0.0,0.0,0.0,1.0,0.0
overall,classification,mistral-small:24b,0.845,0.565,1.0,0.8067,0.7983,0.4233,0.3467,0.2517,0.28,0.9257,0.811,0.1774,0.6371,0.5922,0.5026,0.524,0.885,0.0017,,,0.565,0.1133,0.1217,1.0,0.1083,0.2928,0.5396,0.335825,0.8981818181818182,0.26,0.9508209090909089,0.6489880000000001,1.0,1.0,,,,,,,0.5116279069767442,0.5691202872531418,0.816660465116279,0.8105967684021544,0.0,0.0,0.0,0.0,0.0,0.0
overall,classification,o3,0.82,0.81,0.8833,0.9133,0.925,0.7467,0.325,0.3217,0.3283,0.9324,0.9124,0.0674,0.8397,0.6034,0.6148,0.6098,0.8433,0.8567,,,0.775,0.1,0.08,0.7733,0.11,0.325,0.6093,0.26582500000000003,0.8553571428571428,0.325,0.9499392857142858,0.6866475,0.884083044982699,0.8636363636363636,,,,,,,0.830122591943958,0.41379310344827586,0.9222970227670754,0.716851724137931,0.0,0.0,0.0,0.0,0.0001,0.0
overall,fixing,deepseek-chat,0.8167,0.7467,0.8883,0.95,0.9467,0.71,0.4883,0.5133,0.505,0.9117,0.9164,0.0396,0.798,0.714,0.7276,0.7305,,,,,,0.0667,0.0633,0.1317,0.1383,0.5022,0.724,0.1,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
overall,fixing,deepseek-reasoner,0.6833,0.6133,0.7567,0.8633,0.93,0.5833,0.2033,0.1867,0.1883,0.8221,0.8357,0.0537,0.7955,0.4582,0.4436,0.4432,,,,,,0.0467,0.0417,0.0883,0.0733,0.1928,0.4483,0.0625,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
overall,fixing,gemini-2.0-flash,0.79,0.7517,0.8417,0.92,0.9667,0.7383,0.775,0.7683,0.7467,0.8233,0.9066,0.0137,0.8415,0.8706,0.8656,0.8529,,,,,,0.0167,0.0033,0.0133,0.085,0.7633,0.863,0.029575,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
overall,fixing,gemini-2.5-flash,0.8917,0.89,0.9017,0.9533,0.9567,0.855,0.6817,0.68,0.665,0.9256,0.9172,0.0408,0.8349,0.7806,0.7855,0.777,,,,,,0.0767,0.045,0.0667,0.1317,0.6756,0.781,0.08002500000000001,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
overall,fixing,gemini-2.5-pro,0.7733,0.7583,0.8017,0.9217,0.9783,0.7483,0.4717,0.485,0.4367,0.8197,0.8579,0.0206,0.8089,0.6247,0.6606,0.6217,,,,,,0.115,0.085,0.11,0.11,0.4645,0.6357,0.105,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
overall,fixing,gpt-4.1-2025-04-14,0.825,0.78,0.9183,0.945,0.9533,0.7483,0.5217,0.5333,0.5233,0.9207,0.9252,0.0309,0.8364,0.7766,0.7789,0.7743,,,,,,0.045,0.045,0.0733,0.1017,0.5261,0.7766,0.06625,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
overall,fixing,gpt-4.1-mini-2025-04-14,0.775,0.73,0.8767,0.9633,0.8967,0.6883,0.445,0.4517,0.4667,0.8881,0.8835,0.061,0.7953,0.7114,0.727,0.7168,,,,,,0.0517,0.06,0.1267,0.1117,0.4545,0.7184,0.08752499999999999,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
overall,fixing,gpt-4.1-nano-2025-04-14,0.5467,0.3083,0.5917,0.6017,0.6183,0.215,0.3483,0.3533,0.44,0.6237,0.5043,0.2257,0.3509,0.4852,0.4825,0.5477,,,,,,0.1667,0.2067,0.2683,0.1333,0.3805,0.5051,0.19374999999999998,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.55,0.0
overall,fixing,gpt-4o,0.8533,0.785,0.9183,0.9483,0.865,0.6783,0.54,0.555,0.5483,0.7343,0.739,0.1209,0.5879,0.5486,0.5576,0.5574,,,,,,0.32,0.3233,0.37,0.3417,0.5478,0.5545,0.33875,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
overall,fixing,gpt-5,0.9067,0.8617,0.94,0.9817,0.79,0.6533,0.645,0.63,0.6167,0.9611,0.912,0.21,0.6917,0.8059,0.7947,0.7822,,,,,,0.3383,0.3417,0.3267,0.3767,0.6306,0.7943,0.34585,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0005,0.0
overall,fixing,gpt-5-mini,0.8683,0.7867,0.9167,0.95,0.7367,0.545,0.68,0.6933,0.6867,0.9104,0.873,0.2617,0.5503,0.7839,0.7999,0.8043,,,,,,0.5117,0.4833,0.5183,0.4883,0.6867,0.796,0.5004000000000001,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
overall,fixing,gpt-5-nano,0.8417,0.7467,0.8317,0.9717,0.7783,0.5333,0.625,0.63,0.6033,0.8965,0.8536,0.214,0.5845,0.748,0.7591,0.7455,,,,,,0.4283,0.3767,0.3683,0.4767,0.6194,0.7509,0.4125,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
overall,fixing,gpt-oss:20b,0.5833,0.65,0.8067,0.9233,0.9533,0.62,0.2433,0.2767,0.2433,0.765,0.8443,0.0397,0.7864,0.4946,0.5291,0.4899,,,,,,0.1067,0.08,0.16,0.1867,0.2544,0.5045,0.13335,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
overall,fixing,grok-3-mini,0.8817,0.85,0.9267,0.985,0.955,0.81,0.4083,0.3917,0.3967,0.9363,0.9448,0.04,0.7916,0.6373,0.6291,0.643,,,,,,0.0983,0.1033,0.1367,0.2,0.3989,0.6365,0.134575,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
overall,fixing,llama3.1:70b,0.61,0.5283,0.7267,0.8217,0.935,0.5067,0.21,0.2283,0.2067,0.7529,0.7957,0.0337,0.7635,0.4702,0.4774,0.4645,,,,,,0.01,0.0483,0.0983,0.045,0.215,0.4707,0.0504,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
overall,fixing,llama3.1:8b,0.1267,0.0267,0.155,0.1233,0.6183,0.0183,0.0217,0.0167,0.0267,0.4549,0.3316,0.1126,0.2414,0.1659,0.1602,0.18,,,,,,0.0,0.0017,0.0067,0.0,0.0217,0.1687,0.0021,,,,,,,,,,,,,,,,,0.0,0.0001,0.0,0.0,1.0,0.0005
overall,fixing,mistral-small:24b,0.72,0.5183,0.7467,0.9,0.9583,0.5017,0.5233,0.5383,0.5367,0.8296,0.7563,0.0148,0.6782,0.7518,0.7489,0.759,,,,,,0.1683,0.105,0.1917,0.1567,0.5328,0.7532,0.15542499999999998,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
overall,fixing,o3,0.7367,0.795,0.92,0.945,0.9733,0.7717,0.3783,0.3633,0.3783,0.8218,0.8565,0.0244,0.8082,0.5603,0.5549,0.5642,,,,,,0.1567,0.1333,0.11,0.155,0.3733,0.5598,0.13875,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0
overall,wikidata,deepseek-chat,0.3665,0.2149,0.5611,0.543,0.6131,0.1335,,,,0.5349,0.4654,0.1436,0.229,,,,,,,,,0.1154,0.1154,0.2149,0.3032,,,0.187225,,,,,,,,,,,,,,,,,0.0567,0.0143,0.0001,0.0,0.1273,0.0028
overall,wikidata,gemini-2.0-flash,0.3416,0.1606,0.5023,0.4661,0.6018,0.0905,,,,0.5907,0.4726,0.1197,0.2984,,,,,,,,,0.0385,0.0611,0.1131,0.1471,,,0.08995,,,,,,,,,,,,,,,,,0.7747,0.1037,0.0042,0.0005,0.8364,0.0057
overall,wikidata,gemini-2.5-flash,0.3348,0.2308,0.5814,0.5407,0.8756,0.2195,,,,0.4727,0.4442,0.0285,0.3483,,,,,,,,,0.2172,0.2104,0.2149,0.2602,,,0.225675,,,,,,,,,,,,,,,,,0.7886,0.9852,0.0073,0.0042,0.0937,0.8886
overall,wikidata,gemini-2.5-pro,0.319,0.267,0.5226,0.4638,0.7964,0.2489,,,,0.6416,0.6057,0.0345,0.5018,,,,,,,,,0.0158,0.0226,0.0475,0.0452,,,0.032775,,,,,,,,,,,,,,,,,0.7308,0.1177,0.0024,0.0062,0.6624,0.0444
overall,wikidata,gpt-4.1-2025-04-14,0.457,0.2104,0.5588,0.4661,0.6312,0.1357,,,,0.7656,0.6092,0.1087,0.3982,,,,,,,,,0.009,0.009,0.0475,0.0633,,,0.0322,,,,,,,,,,,,,,,,,0.4571,0.151,0.1572,0.077,0.4063,0.1147
overall,wikidata,gpt-4.1-mini-2025-04-14,0.2624,0.0814,0.3507,0.2896,0.7828,0.0407,,,,0.4393,0.3009,0.062,0.2012,,,,,,,,,0.052,0.0769,0.0407,0.1584,,,0.08199999999999999,,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,0.0,0.9887
overall,wikidata,gpt-4.1-nano-2025-04-14,0.3303,0.0747,0.3846,0.5113,0.81,0.0407,,,,0.5668,0.3619,0.0638,0.2831,,,,,,,,,0.0566,0.0701,0.6018,0.1335,,,0.21549999999999997,,,,,,,,,,,,,,,,,0.2734,0.9961,0.9999,0.0,0.0,0.5
overall,wikidata,gpt-4o,0.8898,0.7576,0.7966,0.8678,0.1898,0.0102,,,,0.8963,0.8131,0.8015,0.1139,,,,,,,,,0.8729,0.8746,0.8305,0.9119,,,0.8724750000000001,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,1.0,1.0
overall,wikidata,gpt-5,0.7288,0.5576,0.7271,0.7542,0.6729,0.2593,,,,0.8263,0.7395,0.3023,0.479,,,,,,,,,0.4542,0.4644,0.3441,0.4305,,,0.4233,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,1.0,0.0079
overall,wikidata,gpt-5-mini,0.7525,0.5966,0.7237,0.7542,0.5085,0.1542,,,,0.8218,0.7324,0.4703,0.3265,,,,,,,,,0.5966,0.6034,0.5186,0.6288,,,0.5868500000000001,,,,,,,,,,,,,,,,,0.0,0.0,0.0001,0.0003,1.0,0.2352
overall,wikidata,gpt-5-nano,0.7441,0.5153,0.6932,0.7254,0.5983,0.1644,,,,0.8264,0.6952,0.3778,0.3776,,,,,,,,,0.5627,0.5424,0.4305,0.5542,,,0.5224500000000001,,,,,,,,,,,,,,,,,0.0,0.0001,0.0196,0.0,1.0,0.849
overall,wikidata,gpt-oss:20b,0.295,0.1683,0.4217,0.3617,0.84,0.1167,,,,0.4883,0.4192,0.082,0.3175,,,,,,,,,0.165,0.175,0.1383,0.1917,,,0.16749999999999998,,,,,,,,,,,,,,,,,0.0,0.1983,0.0268,0.0493,0.3121,0.1339
overall,wikidata,grok-3-mini,0.3373,0.2373,0.5559,0.4678,0.8763,0.2136,,,,0.5852,0.5386,0.0461,0.4162,,,,,,,,,0.0627,0.078,0.0915,0.1356,,,0.09195,,,,,,,,,,,,,,,,,0.7104,0.9463,0.0857,0.1133,0.5419,0.8743
overall,wikidata,llama3.1:70b,0.2267,0.0817,0.3183,0.2417,0.6483,0.0217,,,,0.4887,0.3925,0.1181,0.2072,,,,,,,,,0.0083,0.0117,0.0183,0.0267,,,0.01625,,,,,,,,,,,,,,,,,0.1257,0.2051,0.0894,0.4634,0.9971,0.927
overall,wikidata,llama3.1:8b,0.0417,0.0067,0.12,0.0783,0.6917,0.0,,,,0.2587,0.2058,0.0817,0.1037,,,,,,,,,0.0033,0.005,0.0117,0.01,,,0.0075,,,,,,,,,,,,,,,,,0.0041,0.1875,0.0,0.0,1.0,1.0
overall,wikidata,mistral-small:24b,0.7186,0.5068,0.661,0.6322,0.3373,0.0085,,,,0.7857,0.6479,0.6177,0.1285,,,,,,,,,0.6,0.6085,0.6661,0.7644,,,0.6597500000000001,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,1.0,0.927
overall,wikidata,o3,0.4218,0.2721,0.5544,0.4388,0.8946,0.2296,,,,0.6996,0.6022,0.0442,0.5136,,,,,,,,,0.1139,0.1139,0.0782,0.1071,,,0.103275,,,,,,,,,,,,,,,,,0.0,0.0084,0.0103,0.0085,0.0356,0.0096
overall,zero-shot,deepseek-chat,0.325,0.165,0.455,0.4017,0.56,0.0733,0.3881,,,0.5703,0.4662,0.1661,0.2573,0.6082000000000001,,,0.3483,0.5033,0.5433,0.56,0.1483,0.0617,0.0733,0.14,0.18,0.2729,0.5227,0.11375,0.3298611111111111,0.20833333333333334,0.5765803819444444,0.4197916666666666,0.4413145539906103,0.4625322997416021,0.368,0.4105263157894737,0.5604026845637584,0.5,0.16629882550335573,0.140625,0.15837937384898712,0.22807017543859648,0.46725064456721915,0.45645789473684206,1.0,1.0,1.0,1.0,1.0,1.0
overall,zero-shot,deepseek-reasoner,0.1867,0.135,0.4017,0.3017,0.82,0.1083,0.2995,,,0.461,0.4102,0.0396,0.3115,0.5185500000000001,,,0.2817,0.4183,0.315,0.7967,0.165,0.0333,0.0467,0.035,0.0533,0.1723,0.4072,0.042075,0.20075046904315197,0.07462686567164178,0.48127504690431516,0.29982089552238805,0.4072164948453608,0.2222222222222222,0.3017241379310345,0.3,0.8200692041522492,0.8181818181818182,0.03940397923875433,0.04500909090909091,0.1431095406360424,0.0,0.41895671378091875,0.26439411764705884,1.0,1.0,1.0,1.0,1.0,1.0
overall,zero-shot,gemini-2.0-flash,0.3267,0.1217,0.415,0.3533,0.6267,0.05,0.48034999999999994,,,0.6008,0.4605,0.13,0.2914,0.7008000000000001,,,0.3733,0.445,0.6233,0.625,0.2333,0.0017,0.005,0.0033,0.0067,0.5407,0.7426,0.004175,0.332089552238806,0.28125,0.6044757462686567,0.569709375,0.41025641025641024,0.4318181818181818,0.4453125,0.3283898305084746,0.6345811051693404,0.5128205128205128,0.13011836007130123,0.12855128205128205,0.1160337552742616,0.14285714285714285,0.4620457805907173,0.4545468253968254,1.0,1.0,1.0,1.0,1.0,1.0
overall,zero-shot,gemini-2.5-flash,0.335,0.2467,0.495,0.415,0.845,0.2183,0.37685,,,0.6101,0.5398,0.036,0.4382,0.64855,,,0.4183,0.5083,0.4183,0.84,0.2567,0.045,0.04,0.06,0.0683,0.4237,0.6615,0.053325,0.35714285714285715,0.1951219512195122,0.6282980694980694,0.49529878048780485,0.5017421602787456,0.34615384615384615,0.4134948096885813,0.45454545454545453,0.8537005163511188,0.5789473684210527,0.030123924268502585,0.2145263157894737,0.2530541012216405,0.1111111111111111,0.5482260034904014,0.36104444444444445,1.0,1.0,1.0,1.0,1.0,1.0
overall,zero-shot,gemini-2.5-pro,0.31,0.2233,0.4367,0.3883,0.815,0.195,0.36885,,,0.6314,0.5665,0.0325,0.4698,0.66435,,,0.3867,0.4367,0.3967,0.8167,0.2333,0.0267,0.0233,0.04,0.0433,0.3644,0.6772,0.033325,0.33394160583941607,0.057692307692307696,0.6490443430656934,0.4458115384615384,0.4351535836177474,0.5,0.3862433862433862,0.42424242424242425,0.8206429780033841,0.4444444444444444,0.02945245346869712,0.23574444444444445,0.2322357019064125,0.0,0.5735788561525129,0.38996956521739135,1.0,1.0,1.0,1.0,1.0,1.0
overall,zero-shot,gpt-4.1-2025-04-14,0.3967,0.165,0.4983,0.3867,0.6333,0.1017,0.2873,,,0.6771,0.5435,0.1118,0.3581,0.5949,,,0.4283,0.4983,0.5033,0.6383,0.1083,0.0167,0.0217,0.05,0.0633,0.2763,0.5689,0.037925,0.4074074074074074,0.21212121212121213,0.6917485008818343,0.4259424242424243,0.4983108108108108,0.5,0.42105263157894735,0.3076923076923077,0.6365159128978225,0.0,0.10899212730318257,0.6805666666666667,0.1644295302013423,0.25,0.5446303691275167,0.37377499999999997,1.0,1.0,1.0,1.0,1.0,1.0
overall,zero-shot,gpt-4.1-mini-2025-04-14,0.3367,0.14,0.4817,0.3833,0.5633,0.055,0.28015,,,0.6296,0.5016,0.1574,0.2812,0.53845,,,0.3867,0.46,0.5917,0.5633,0.0867,0.025,0.025,0.0567,0.0867,0.2153,0.4823,0.048350000000000004,0.34798534798534797,0.2222222222222222,0.6418919413919414,0.5058537037037036,0.41545893719806765,0.5165394402035624,0.44,0.3684210526315789,0.563973063973064,0.5,0.15517878787878786,0.3772,0.13391304347826086,0.28,0.5026756521739131,0.47752799999999995,1.0,1.0,1.0,1.0,1.0,1.0
overall,zero-shot,gpt-4.1-nano-2025-04-14,0.2783,0.1183,0.4567,0.3717,0.62,0.04,0.5852499999999999,,,0.5129,0.3949,0.1915,0.2037,0.68235,,,0.4567,0.5,0.5317,0.5,0.96,0.0683,0.085,0.1833,0.1667,0.2105,0.4039,0.125825,0.30843373493975906,0.21081081081081082,0.5487522891566265,0.4323335135135135,0.4449152542372881,0.4642857142857143,0.38306451612903225,0.36363636363636365,0.5962566844919787,0.6592920353982301,0.20640935828877008,0.1666973451327434,,0.11833333333333333,,0.39486116666666665,1.0,1.0,1.0,1.0,1.0,1.0
overall,zero-shot,gpt-4o,0.4517,0.255,0.5333,0.47,0.6283,0.0617,0.48985,,,0.6534,0.5385,0.2669,0.2612,0.52095,,,0.4533,0.49,0.535,0.6317,0.3767,0.2583,0.2533,0.2817,0.3983,0.3797,0.6086,0.29790000000000005,0.4497354497354497,0.48484848484848486,0.6583820105820105,0.5677454545454544,0.5368421052631579,0.5317073170731708,0.5205479452054794,0.4629981024667932,0.6308724832214765,0.25,0.26364412751677846,0.75,0.23785166240409208,0.28708133971291866,0.5503314578005115,0.5163626794258374,1.0,1.0,1.0,1.0,1.0,1.0
overall,zero-shot,gpt-5,0.61,0.4317,0.6417,0.65,0.735,0.2167,0.58595,,,0.7816,0.6814,0.2211,0.4816,0.76895,,,0.6083,0.635,0.6467,0.7567,0.2683,0.3567,0.3433,0.2533,0.3267,0.6136,0.7845,0.32,0.6206261510128913,0.5087719298245614,0.7890513812154696,0.7101964912280702,0.6416382252559727,0.6428571428571429,0.6508474576271186,0.6,0.7538726333907056,0.15789473684210525,0.20501153184165233,0.7130684210526317,0.42680776014109345,0.5151515151515151,0.6833717813051146,0.6470454545454545,1.0,1.0,1.0,1.0,1.0,1.0
overall,zero-shot,gpt-5-mini,0.6533,0.4783,0.6383,0.6817,0.615,0.1433,0.6314500000000001,,,0.7718,0.6709,0.3593,0.3725,0.7622,,,0.6333,0.63,0.6833,0.635,0.1917,0.485,0.465,0.41,0.5267,0.6729,0.7934,0.47167499999999996,0.6586715867158671,0.603448275862069,0.7772850553505535,0.721003448275862,0.6380789022298456,0.6470588235294118,0.686541737649063,0.46153846153846156,0.631578947368421,0.3,0.34176122807017545,0.6916666666666667,0.47285464098073554,0.5862068965517241,0.6680252189141856,0.7275965517241381,1.0,1.0,1.0,1.0,1.0,1.0
overall,zero-shot,gpt-5-nano,0.59,0.4367,0.64,0.6317,0.6767,0.1733,0.33485,,,0.7228,0.633,0.2988,0.3852,0.4209,,,0.5783,0.6133,0.6267,0.6583,0.245,0.4133,0.4267,0.3617,0.47,0.5797,0.7458,0.41792500000000005,0.5916515426497277,0.5714285714285714,0.7257918330308529,0.689430612244898,0.6490196078431373,0.5888888888888889,0.6411657559198543,0.5294117647058824,0.6892655367231638,0.5797101449275363,0.28559152542372884,0.4001608695652174,0.43327239488117003,0.4716981132075472,0.6283680073126143,0.6806415094339624,1.0,1.0,1.0,1.0,1.0,1.0
overall,zero-shot,gpt-oss:20b,0.2167,0.1533,0.38,0.325,0.83,0.1,0.22705,,,0.4413,0.398,0.0775,0.2927,0.44435,,,0.2617,0.4033,0.3217,0.8033,0.16,0.12,0.1267,0.1067,0.1483,0.1824,0.3995,0.125425,0.22202486678507993,0.13513513513513514,0.44832984014209587,0.33379729729729724,0.38267148014440433,0.34782608695652173,0.3166089965397924,0.5454545454545454,0.8467153284671532,0.6538461538461539,0.0644669708029197,0.21456346153846154,0.15602836879432624,0.1111111111111111,0.4029003546099291,0.32116666666666666,1.0,1.0,1.0,1.0,1.0,1.0
overall,zero-shot,grok-3-mini,0.3433,0.2617,0.5233,0.4383,0.8767,0.23,0.3767,,,0.6346,0.5715,0.0561,0.4478,0.6309,,,0.3933,0.5217,0.44,0.88,0.2617,0.0567,0.07,0.075,0.1233,,,0.08125,0.3563636363636364,0.2,0.6489525454545455,0.477314,0.523725834797891,0.5161290322580645,0.4369602763385147,0.47619047619047616,0.8873720136518771,0.42857142857142855,0.050362457337883956,0.2975428571428571,0.2685025817555938,0.05263157894736842,0.5792645438898452,0.3328368421052632,1.0,1.0,1.0,1.0,1.0,1.0
overall,zero-shot,llama3.1:70b,0.2067,0.07,0.2883,0.2383,0.71,0.0283,0.2033,,,0.4712,0.3543,0.1006,0.2067,0.4882,,,0.23,0.6817,0.6217,0.7117,0.0433,0.02,0.025,0.0283,0.06,,,0.033325,0.2106164383561644,0.0625,0.47690958904109587,0.26463749999999997,0.30434782608695654,0.2870036101083033,0.24074074074074073,0.2374429223744292,0.7126050420168067,0.4,0.1,0.17,0.0676818950930626,0.2222222222222222,0.3524453468697124,0.47390000000000004,1.0,1.0,1.0,1.0,1.0,1.0
overall,zero-shot,llama3.1:8b,0.0167,0.0017,0.03,0.0117,0.8433,0.0,0.029249999999999998,,,0.1667,0.1173,0.0329,0.0644,0.21525,,,0.9383,0.8633,0.8733,0.285,0.9717,0.0083,0.0033,0.0233,0.0317,0.0135,0.1361,0.016649999999999998,0.11428571428571428,0.010619469026548672,0.2730942857142857,0.16006230088495574,0.029411764705882353,0.03007518796992481,0.0,0.013182674199623353,0.8347826086956521,0.845360824742268,0.03642608695652174,0.03208536082474227,0.0,0.0017152658662092624,0.16815882352941175,0.1157934819897084,1.0,1.0,1.0,1.0,1.0,1.0
overall,zero-shot,mistral-small:24b,0.4407,0.2407,0.5034,0.4525,0.5085,0.0153,0.4301,,,0.6399,0.5192,0.3447,0.2057,0.6410499999999999,,,0.4898,0.4949,0.5458,0.5068,0.8729,0.2203,0.2271,0.3322,0.3881,0.5135,0.6899,0.291925,0.46210720887245843,0.20408163265306123,0.6567571164510166,0.4542408163265305,0.0,0.5042444821731749,0.42857142857142855,0.4528301886792453,0.5076660988074957,0.6666666666666666,0.344718398637138,0.3333333333333333,0.2857142857142857,0.23461538461538461,0.5582942857142856,0.5139790384615385,1.0,1.0,1.0,1.0,1.0,1.0
overall,zero-shot,o3,0.3441,0.2288,0.5068,0.3864,0.8661,0.1915,0.31420000000000003,,,0.6246,0.5426,0.0553,0.4383,0.5892,,,0.3932,0.4949,0.3966,0.8492,0.2542,0.0898,0.0983,0.0864,0.0847,0.3034,0.575,0.08979999999999999,0.3583180987202925,0.16279069767441862,0.6396588665447899,0.4324162790697674,0.5008726003490401,0.7058823529411765,0.3888888888888889,0.2857142857142857,0.8754448398576512,0.6785714285714286,0.04982722419928825,0.16460357142857143,0.23869801084990958,0.08108108108108109,0.5525264014466547,0.3948270270270271,1.0,1.0,1.0,1.0,1.0,1.0
