ID,dataset,action,llm,?A1=A2,?A1=A3+A4,?A1>A3,?A1>A4,?A3∅A4,?A4=A1|3,?A1=A1*,?A1=A1**,?A1*=A1**,J(A1-A2),J(A1-A34),J(A3-A4),J(A4-A1|3),J(A1-A1*),J(A1-A1**),J(A1*-A1**),?SC(A1=A2),?SC(A1>A3),?SC(A1>A4),?SC(A3∅A4),?SC(A4=A1|3),idk_A1,idk_A2,idk_A3,idk_A4,?A1=A1(ave),J_A1_ave,idk,?A1=A2(+),?A1=A2(-),J(1-2)+,J(1-2)-,?A1>A3(+),?A1>A3(-),?A1>A4(+),?A1>A4(-),?A3∅A4(+),?A3∅A4(-),J(3-4)+,J(3-4)-,?A1=A3+A4(+),?A1=A3+A4(-),J(1-34)+,J(1-34)-,p(A1=A2)_x,p(A1=A3+A4)_x,p(A1>A3)_x,p(A1>A4)_x,p(A3∅A4)_x,p(A4=A1|3)_x,p(A1=A2)_y,p(A1=A3+A4)_y,p(A1>A3)_y,p(A1>A4)_y,p(A3∅A4)_y,p(A4=A1|3)_y
A,overall,fixing,llama3.1:8b,0.1267,0.0267,0.155,0.1233,0.6183,0.0183,0.0217,0.0167,0.0267,0.4549,0.3316,0.1126,0.2414,0.1659,0.1602,0.18,,,,,,0.0,0.0017,0.0067,0.0,0.0217,0.1687,0.0021,,,,,,,,,,,,,,,,,0.0,0.0001,0.0,0.0,1.0,0.0005,0.0,0.0001,0.0,0.0,1.0,0.0005
B,overall,fixing,gpt-oss:20b,0.5833,0.65,0.8067,0.9233,0.9533,0.62,0.2433,0.2767,0.2433,0.765,0.8443,0.0397,0.7864,0.4946,0.5291,0.4899,,,,,,0.1067,0.08,0.16,0.1867,0.2544,0.5045,0.13335,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
C,overall,fixing,gpt-4.1-nano-2025-04-14,0.5467,0.3083,0.5917,0.6017,0.6183,0.215,0.3483,0.3533,0.44,0.6237,0.5043,0.2257,0.3509,0.4852,0.4825,0.5477,,,,,,0.1667,0.2067,0.2683,0.1333,0.3805,0.5051,0.19374999999999998,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.55,0.0,0.0,0.0,0.0,0.0,0.55,0.0
D,overall,fixing,mistral-small:24b,0.72,0.5183,0.7467,0.9,0.9583,0.5017,0.5233,0.5383,0.5367,0.8296,0.7563,0.0148,0.6782,0.7518,0.7489,0.759,,,,,,0.1683,0.105,0.1917,0.1567,0.5328,0.7532,0.15542499999999998,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
E,overall,fixing,llama3.1:70b,0.61,0.5283,0.7267,0.8217,0.935,0.5067,0.21,0.2283,0.2067,0.7529,0.7957,0.0337,0.7635,0.4702,0.4774,0.4645,,,,,,0.01,0.0483,0.0983,0.045,0.215,0.4707,0.0504,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F,overall,fixing,gemini-2.0-flash,0.79,0.7517,0.8417,0.92,0.9667,0.7383,0.775,0.7683,0.7467,0.8233,0.9066,0.0137,0.8415,0.8706,0.8656,0.8529,,,,,,0.0167,0.0033,0.0133,0.085,0.7633,0.863,0.029575,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
G,overall,fixing,gpt-4.1-mini-2025-04-14,0.775,0.73,0.8767,0.9633,0.8967,0.6883,0.445,0.4517,0.4667,0.8881,0.8835,0.061,0.7953,0.7114,0.727,0.7168,,,,,,0.0517,0.06,0.1267,0.1117,0.4545,0.7184,0.08752499999999999,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
H,overall,fixing,gpt-4o,0.8533,0.785,0.9183,0.9483,0.865,0.6783,0.54,0.555,0.5483,0.7343,0.739,0.1209,0.5879,0.5486,0.5576,0.5574,,,,,,0.32,0.3233,0.37,0.3417,0.5478,0.5545,0.33875,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
I,overall,fixing,gpt-4.1-2025-04-14,0.825,0.78,0.9183,0.945,0.9533,0.7483,0.5217,0.5333,0.5233,0.9207,0.9252,0.0309,0.8364,0.7766,0.7789,0.7743,,,,,,0.045,0.045,0.0733,0.1017,0.5261,0.7766,0.06625,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
J,overall,fixing,grok-3-mini,0.8817,0.85,0.9267,0.985,0.955,0.81,0.4083,0.3917,0.3967,0.9363,0.9448,0.04,0.7916,0.6373,0.6291,0.643,,,,,,0.0983,0.1033,0.1367,0.2,0.3989,0.6365,0.134575,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
K,overall,fixing,deepseek-chat,0.8167,0.7467,0.8883,0.95,0.9467,0.71,0.4883,0.5133,0.505,0.9117,0.9164,0.0396,0.798,0.714,0.7276,0.7305,,,,,,0.0667,0.0633,0.1317,0.1383,0.5022,0.724,0.1,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
L,overall,fixing,gemini-2.5-flash,0.8917,0.89,0.9017,0.9533,0.9567,0.855,0.6817,0.68,0.665,0.9256,0.9172,0.0408,0.8349,0.7806,0.7855,0.777,,,,,,0.0767,0.045,0.0667,0.1317,0.6756,0.781,0.08002500000000001,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
M,overall,fixing,gpt-5-nano,0.8417,0.7467,0.8317,0.9717,0.7783,0.5333,0.625,0.63,0.6033,0.8965,0.8536,0.214,0.5845,0.748,0.7591,0.7455,,,,,,0.4283,0.3767,0.3683,0.4767,0.6194,0.7509,0.4125,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
N,overall,fixing,deepseek-reasoner,0.6833,0.6133,0.7567,0.8633,0.93,0.5833,0.2033,0.1867,0.1883,0.8221,0.8357,0.0537,0.7955,0.4582,0.4436,0.4432,,,,,,0.0467,0.0417,0.0883,0.0733,0.1928,0.4483,0.0625,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
O,overall,fixing,gemini-2.5-pro,0.7733,0.7583,0.8017,0.9217,0.9783,0.7483,0.4717,0.485,0.4367,0.8197,0.8579,0.0206,0.8089,0.6247,0.6606,0.6217,,,,,,0.115,0.085,0.11,0.11,0.4645,0.6357,0.105,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
P,overall,fixing,gpt-5-mini,0.8683,0.7867,0.9167,0.95,0.7367,0.545,0.68,0.6933,0.6867,0.9104,0.873,0.2617,0.5503,0.7839,0.7999,0.8043,,,,,,0.5117,0.4833,0.5183,0.4883,0.6867,0.796,0.5004000000000001,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Q,overall,fixing,o3,0.7367,0.795,0.92,0.945,0.9733,0.7717,0.3783,0.3633,0.3783,0.8218,0.8565,0.0244,0.8082,0.5603,0.5549,0.5642,,,,,,0.1567,0.1333,0.11,0.155,0.3733,0.5598,0.13875,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R,overall,fixing,gpt-5,0.9067,0.8617,0.94,0.9817,0.79,0.6533,0.645,0.63,0.6167,0.9611,0.912,0.21,0.6917,0.8059,0.7947,0.7822,,,,,,0.3383,0.3417,0.3267,0.3767,0.6306,0.7943,0.34585,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0005,0.0,0.0,0.0,0.0,0.0,0.0005,0.0
