ID,dataset,action,llm,?A1=A2,?A1=A3+A4,?A1>A3,?A1>A4,?A3∅A4,?A4=A1|3,?A1=A1*,?A1=A1**,?A1*=A1**,J(A1-A2),J(A1-A34),J(A3-A4),J(A4-A1|3),J(A1-A1*),J(A1-A1**),J(A1*-A1**),?SC(A1=A2),?SC(A1>A3),?SC(A1>A4),?SC(A3∅A4),?SC(A4=A1|3),idk_A1,idk_A2,idk_A3,idk_A4,?A1=A1(ave),J_A1_ave,idk,?A1=A2(+),?A1=A2(-),J(1-2)+,J(1-2)-,?A1>A3(+),?A1>A3(-),?A1>A4(+),?A1>A4(-),?A3∅A4(+),?A3∅A4(-),J(3-4)+,J(3-4)-,?A1=A3+A4(+),?A1=A3+A4(-),J(1-34)+,J(1-34)-,p(A1=A2)_x,p(A1=A3+A4)_x,p(A1>A3)_x,p(A1>A4)_x,p(A3∅A4)_x,p(A4=A1|3)_x,p(A1=A2)_y,p(A1=A3+A4)_y,p(A1>A3)_y,p(A1>A4)_y,p(A3∅A4)_y,p(A4=A1|3)_y
A,overall,classification,llama3.1:8b,0.1633,0.0833,1.0,0.3383,0.6817,0.05,0.045,0.0367,0.0317,0.5603,0.5018,0.0974,0.3899,0.2944,0.253,0.2497,0.81,0.1383,,,0.8267,0.0,0.0033,1.0,0.0,0.0378,0.2657,0.250825,0.19230769230769232,0.16202090592334495,0.7328115384615385,0.5525364111498258,1.0,1.0,,,,,,,0.10256410256410256,0.08045977011494253,0.5077294871794872,0.5008572796934866,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
B,overall,classification,gpt-oss:20b,0.6717,0.4833,1.0,0.7317,0.8467,0.38,0.2717,0.2483,0.2633,0.7939,0.7677,0.1073,0.6099,0.4892,0.4829,0.4894,0.665,0.8933,,,0.4033,0.1567,0.1183,1.0,0.2067,0.2611,0.4872,0.370425,0.6784452296819788,0.5588235294117647,0.7968805653710247,0.7450470588235295,1.0,1.0,,,,,,,0.48257839721254353,0.5,0.7658412891986063,0.8090038461538462,0.0,0.0,0.0,0.0,0.2132,0.0,0.0,0.0,0.0,0.0,0.2132,0.0
C,overall,classification,gpt-4.1-nano-2025-04-14,0.9733,0.565,0.7167,0.5667,0.3817,0.0,0.96,0.9583,0.955,0.9757,0.7272,0.6149,0.0017,0.9608,0.96,0.9558,0.6933,0.365,,,1.0,0.9833,0.9733,0.7133,0.565,0.9578,0.9589,0.808725,0.9651162790697675,0.9941176470588236,0.9672204651162791,0.9970588235294118,0.6622516556291391,0.734966592427617,,,,,,,,0.565,,0.7272223333333333,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0
D,overall,classification,mistral-small:24b,0.845,0.565,1.0,0.8067,0.7983,0.4233,0.3467,0.2517,0.28,0.9257,0.811,0.1774,0.6371,0.5922,0.5026,0.524,0.885,0.0017,,,0.565,0.1133,0.1217,1.0,0.1083,0.2928,0.5396,0.335825,0.8981818181818182,0.26,0.9508209090909089,0.6489880000000001,1.0,1.0,,,,,,,0.5116279069767442,0.5691202872531418,0.816660465116279,0.8105967684021544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
E,overall,classification,llama3.1:70b,0.8033,0.5733,1.0,0.8633,0.935,0.5617,0.2033,0.1767,0.18,0.9071,0.8621,0.0295,0.812,0.4882,0.4549,0.4659,0.8233,0.09,,,0.5367,0.0133,0.03,1.0,0.0333,0.1867,0.4697,0.26915,0.8230240549828178,0.16666666666666666,0.9209295532646048,0.46067777777777785,1.0,1.0,,,,,,,0.5727069351230425,0.5751633986928104,0.8625514541387023,0.8609013071895424,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F,overall,classification,gemini-2.0-flash,0.8817,0.8483,1.0,0.945,0.6433,0.5383,0.42,0.3867,0.3683,0.9384,0.9292,0.2782,0.6268,0.659,0.6005,0.5822,0.885,0.4683,,,0.4617,0.155,0.1483,1.0,0.32,0.3917,0.6139,0.40582500000000005,0.8924914675767918,0.42857142857142855,0.9474943686006827,0.5566428571428571,1.0,1.0,,,,,,,0.8455056179775281,0.8524590163934426,0.9357103932584269,0.919794262295082,0.0,0.0,0.0,0.0,0.2967,0.0,0.0,0.0,0.0,0.0,0.2967,0.0
G,overall,classification,gpt-4.1-mini-2025-04-14,0.8983,0.8633,0.64,0.9483,0.7717,0.64,0.345,0.3383,0.365,0.9573,0.9482,0.2183,0.7241,0.5946,0.5772,0.6007,0.8933,0.4483,,,0.625,0.1667,0.165,0.3467,0.2517,0.3494,0.5908,0.23252499999999998,0.9341864716636198,0.5283018867924528,0.971663436928702,0.808588679245283,0.653179190751445,0.6346604215456675,,,,,,,0.8738574040219378,0.7547169811320755,0.9515482632541133,0.9139528301886793,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
H,overall,classification,gpt-4o,0.9733,0.9367,0.9933,0.9833,0.3633,0.3067,0.6,0.32,0.4233,0.5867,0.9801,0.6279,0.3508,0.4333,0.4147,0.5083,0.965,0.6633,,,0.735,0.5683,0.5733,0.89,0.635,0.4478,0.4521,0.66665,0.9828473413379074,0.6470588235294118,0.591153859348199,0.43373529411764705,0.9925,0.995,,,,,,,0.9257142857142857,0.9411764705882353,0.9834554285714284,0.9787383529411765,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
I,overall,classification,gpt-4.1-2025-04-14,0.9183,0.87,0.7033,0.975,0.87,0.7483,0.2983,0.2917,0.325,0.9754,0.9711,0.1174,0.8484,0.6209,0.6051,0.6229,0.9233,0.6967,,,0.7467,0.0517,0.0533,0.3167,0.1283,0.305,0.6163,0.13749999999999998,0.9453262786596119,0.45454545454545453,0.9865074074074074,0.7850454545454546,0.7033898305084746,0.7,,,,,,,0.8713550600343053,0.8235294117647058,0.9724267581475129,0.9273411764705882,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
J,overall,classification,grok-3-mini,0.9083,0.7883,1.0,0.9,0.8667,0.6683,0.3767,0.365,0.3717,0.9639,0.9366,0.1277,0.7869,0.6309,0.6203,0.6182,0.9183,0.9517,,,0.6933,0.1133,0.115,1.0,0.175,0.3711,0.6231,0.350825,0.9542124542124543,0.4444444444444444,0.986679304029304,0.73355,1.0,1.0,,,,,,,0.7951807228915663,0.5789473684210527,0.938937865748709,0.8646157894736843,0.0,0.0,0.0,0.0,0.7516,0.0,0.0,0.0,0.0,0.0,0.7516,0.0
K,overall,classification,deepseek-chat,0.945,0.825,0.8467,0.9117,0.6733,0.5417,0.5033,0.48,0.4917,0.9722,0.9404,0.3101,0.6274,0.6937,0.6687,0.6839,0.8933,0.1,,,0.52,0.2167,0.2267,0.4367,0.3467,0.4917,0.6821,0.3067,0.9280575539568345,0.5454545454545454,0.9597827338129498,0.740909090909091,1.0,1.0,,,,,,,0.8571428571428571,0.8391608391608392,0.9642857142857143,0.9183146853146854,0.0,0.0,0.0,0.0,0.0001,0.0,0.0,0.0,0.0,0.0,0.0001,0.0
L,overall,classification,gemini-2.5-flash,0.8983,0.93,1.0,0.9683,0.9067,0.8483,0.33,0.3383,0.3267,0.9383,0.9607,0.0882,0.8392,0.6356,0.6168,0.6381,0.875,0.9483,,,0.8533,0.0717,0.0667,1.0,0.14,0.3317,0.6302,0.3196,0.9478764478764479,0.5853658536585366,0.9560220077220076,0.8265573170731707,1.0,1.0,,,,,,,0.9457092819614711,0.6206896551724138,0.9653462346760071,0.8686103448275863,0.0,0.0,0.0,0.0,0.0004,0.0,0.0,0.0,0.0,0.0,0.0004,0.0
M,overall,classification,gpt-5-nano,0.7783,0.69,1.0,0.7683,0.195,0.005,0.09,0.5633,0.075,0.7816,0.7608,0.8039,0.0782,0.096,0.566,0.0816,0.7767,0.8933,,,0.0883,0.1117,0.1067,1.0,0.0217,0.2428,0.2479,0.310025,0.8055045871559633,0.509090909090909,0.8081271559633028,0.5191600000000001,1.0,1.0,,,,,,,0.72,0.36,0.782636,0.520666,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0
N,overall,classification,deepseek-reasoner,0.785,0.7883,0.7533,0.8533,0.6967,0.515,0.4267,0.3817,0.45,0.824,0.8742,0.2966,0.5637,0.6299,0.5716,0.6319,0.6533,0.92,,,0.54,0.1883,0.1433,0.4783,0.3317,0.4195,0.6111,0.2854,0.6546762589928058,0.36363636363636365,0.6765122302158274,0.5545454545454546,1.0,1.0,,,,,,,0.6056338028169014,0.375,0.7532640845070423,0.731975,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
O,overall,classification,gemini-2.5-pro,0.8533,0.765,1.0,0.88,0.93,0.7133,0.3733,0.3133,0.3367,0.8978,0.8616,0.0622,0.7902,0.6515,0.5929,0.6195,0.9067,0.9583,,,0.7467,0.0917,0.0883,1.0,0.14,0.3411,0.6213,0.32999999999999996,0.9269662921348315,0.25757575757575757,0.9306636704119851,0.6316727272727273,1.0,1.0,,,,,,,0.7949640287769785,0.38636363636363635,0.8822052158273381,0.6009159090909091,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
P,overall,classification,gpt-5-mini,0.88,0.6417,1.0,0.75,0.6733,0.34,0.59,0.5933,0.5817,0.9258,0.7667,0.3267,0.4464,0.731,0.738,0.7328,0.8467,0.9617,,,0.38,0.4083,0.3883,1.0,0.4067,0.5883,0.7339,0.550825,0.8935018050541517,0.717391304347826,0.9371158844765343,0.7889826086956522,1.0,1.0,,,,,,,0.6433566433566433,0.6071428571428571,0.7665884615384616,0.7700071428571428,0.0,0.0,0.0,0.0022,0.0012,0.0,0.0,0.0,0.0,0.0022,0.0012,0.0
Q,overall,classification,o3,0.82,0.81,0.8833,0.9133,0.925,0.7467,0.325,0.3217,0.3283,0.9324,0.9124,0.0674,0.8397,0.6034,0.6148,0.6098,0.8433,0.8567,,,0.775,0.1,0.08,0.7733,0.11,0.325,0.6093,0.26582500000000003,0.8553571428571428,0.325,0.9499392857142858,0.6866475,0.884083044982699,0.8636363636363636,,,,,,,0.830122591943958,0.41379310344827586,0.9222970227670754,0.716851724137931,0.0,0.0,0.0,0.0,0.0001,0.0,0.0,0.0,0.0,0.0,0.0001,0.0
R,overall,classification,gpt-5,0.9233,0.8817,1.0,0.965,0.7067,0.5983,0.5583,0.5717,0.5867,0.9663,0.9334,0.292,0.6683,0.7534,0.7402,0.7621,0.9133,0.9817,,,0.63,0.2717,0.27,1.0,0.3583,0.5722,0.7519,0.47500000000000003,0.9547101449275363,0.5625,0.9820644927536232,0.7848270833333334,1.0,1.0,,,,,,,0.8949211908931699,0.6206896551724138,0.9381744308231172,0.8404068965517242,0.0,0.0,0.0,0.0,0.965,0.0,0.0,0.0,0.0,0.0,0.965,0.0
