action,dataset,predicate,llm,llama3.1:8b,gpt-oss:20b,gpt-4.1-nano-2025-04-14,mistral-small:24b,llama3.1:70b,gemini-2.0-flash,gpt-4.1-mini-2025-04-14,gpt-4o,gpt-4.1-2025-04-14,grok-3-mini,deepseek-chat,gemini-2.5-flash,gpt-5-nano,deepseek-reasoner,gemini-2.5-pro,gpt-5-mini,o3,gpt-5
classification,LC-QuAD,?A1=A2,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1=A2,gpt-oss:20b,0.0,1.0,1.0,0.9993,0.9663,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9865,0.1712,1.0,1.0,0.9912,1.0
classification,LC-QuAD,?A1=A2,gpt-4.1-nano-2025-04-14,0.0,0.0,1.0,0.0,0.0,0.0112,0.0002,0.6875,0.0112,0.0547,0.0012,0.0021,0.0,0.0,0.0327,0.0004,0.0,0.0898
classification,LC-QuAD,?A1=A2,mistral-small:24b,0.0,0.0019,1.0,1.0,0.14,0.9955,0.9252,1.0,0.9974,0.9997,0.9855,0.9927,0.2354,0.0,0.9993,0.9449,0.2612,0.9998
classification,LC-QuAD,?A1=A2,llama3.1:70b,0.0,0.0587,1.0,0.9179,1.0,0.9999,0.9942,1.0,0.9999,1.0,0.9989,0.9993,0.6641,0.0041,1.0,0.996,0.7084,1.0
classification,LC-QuAD,?A1=A2,gemini-2.0-flash,0.0,0.0,0.9983,0.0122,0.0005,1.0,0.1239,0.9983,0.6128,0.8666,0.2517,0.3318,0.0006,0.0,0.7597,0.1635,0.0007,0.8949
classification,LC-QuAD,?A1=A2,gpt-4.1-mini-2025-04-14,0.0,0.0001,1.0,0.1405,0.0138,0.939,1.0,1.0,0.9534,0.9887,0.779,0.8463,0.0147,0.0,0.9827,0.6399,0.0288,0.9978
classification,LC-QuAD,?A1=A2,gpt-4o,0.0,0.0,0.6875,0.0,0.0,0.0112,0.0001,1.0,0.0112,0.0547,0.0012,0.0002,0.0,0.0,0.0327,0.0001,0.0,0.0625
classification,LC-QuAD,?A1=A2,gpt-4.1-2025-04-14,0.0,0.0,0.9983,0.0081,0.0002,0.6128,0.105,0.9983,1.0,0.8666,0.2517,0.3238,0.0004,0.0,0.7597,0.1635,0.0003,0.8949
classification,LC-QuAD,?A1=A2,grok-3-mini,0.0,0.0,0.9893,0.0012,0.0,0.2905,0.032,0.9893,0.2905,1.0,0.0717,0.1316,0.0,0.0,0.5,0.0318,0.0001,0.7095
classification,LC-QuAD,?A1=A2,deepseek-chat,0.0,0.0,0.9999,0.0378,0.0032,0.8684,0.3506,0.9999,0.8684,0.9755,1.0,0.655,0.0057,0.0,0.9519,0.4225,0.0068,0.9738
classification,LC-QuAD,?A1=A2,gemini-2.5-flash,0.0,0.0,0.9997,0.0216,0.0022,0.8083,0.2706,1.0,0.8204,0.9423,0.5,1.0,0.0017,0.0,0.9408,0.3555,0.0012,0.9755
classification,LC-QuAD,?A1=A2,gpt-5-nano,0.0,0.0267,1.0,0.8438,0.4439,0.9998,0.9947,1.0,0.9999,1.0,0.998,0.9996,1.0,0.0009,1.0,0.9917,0.6286,1.0
classification,LC-QuAD,?A1=A2,deepseek-reasoner,0.0,0.8825,1.0,1.0,0.9982,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9997,1.0,1.0,1.0,0.9998,1.0
classification,LC-QuAD,?A1=A2,gemini-2.5-pro,0.0,0.0,0.9941,0.0026,0.0001,0.4073,0.0466,0.9941,0.4073,0.6964,0.1189,0.1509,0.0001,0.0,1.0,0.0669,0.0002,0.788
classification,LC-QuAD,?A1=A2,gpt-5-mini,0.0,0.0001,1.0,0.1077,0.0098,0.9157,0.5,1.0,0.9157,0.9904,0.7214,0.7709,0.0192,0.0,0.9738,1.0,0.0266,0.9915
classification,LC-QuAD,?A1=A2,o3,0.0,0.0182,1.0,0.8316,0.3919,0.9998,0.9878,1.0,0.9999,1.0,0.9977,0.9997,0.5,0.0005,1.0,0.9881,1.0,1.0
classification,LC-QuAD,?A1=A2,gpt-5,0.0,0.0,0.9805,0.0007,0.0,0.2272,0.0096,0.9922,0.2272,0.5,0.0669,0.0717,0.0,0.0,0.3953,0.0262,0.0,1.0
classification,LC-QuAD,?A1=A3+A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1=A3+A4,gpt-oss:20b,0.0,1.0,0.8736,0.9191,0.9989,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9948,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1=A3+A4,gpt-4.1-nano-2025-04-14,0.0,0.1871,1.0,0.6362,0.9789,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9995,0.9349,1.0,0.9992,1.0,1.0
classification,LC-QuAD,?A1=A3+A4,mistral-small:24b,0.0,0.1264,0.4538,1.0,0.9662,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9993,0.9091,1.0,0.9991,1.0,1.0
classification,LC-QuAD,?A1=A3+A4,llama3.1:70b,0.0,0.0023,0.0356,0.055,1.0,0.9998,1.0,1.0,1.0,1.0,1.0,1.0,0.9402,0.3045,1.0,0.8556,1.0,1.0
classification,LC-QuAD,?A1=A3+A4,gemini-2.0-flash,0.0,0.0,0.0,0.0,0.0005,1.0,0.99,1.0,0.99,0.7443,0.8659,1.0,0.038,0.0,0.9962,0.0124,0.72,0.9338
classification,LC-QuAD,?A1=A3+A4,gpt-4.1-mini-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.0235,1.0,0.9996,0.5,0.0662,0.1553,0.9793,0.0001,0.0,0.7077,0.0,0.0662,0.2366
classification,LC-QuAD,?A1=A3+A4,gpt-4o,0.0,0.0,0.0,0.0,0.0,0.0,0.0022,1.0,0.0007,0.0,0.0001,0.1334,0.0,0.0,0.0096,0.0,0.0,0.0003
classification,LC-QuAD,?A1=A3+A4,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.0251,0.6399,0.9999,1.0,0.068,0.1808,0.9827,0.0002,0.0,0.7878,0.0,0.0748,0.2983
classification,LC-QuAD,?A1=A3+A4,grok-3-mini,0.0,0.0,0.0,0.0,0.0001,0.3714,0.9674,1.0,0.9693,1.0,0.7566,0.9999,0.015,0.0,0.9919,0.0027,0.5679,0.8595
classification,LC-QuAD,?A1=A3+A4,deepseek-chat,0.0,0.0,0.0,0.0,0.0,0.2148,0.9123,1.0,0.8998,0.3642,1.0,0.9995,0.0052,0.0,0.9552,0.0004,0.3642,0.6911
classification,LC-QuAD,?A1=A3+A4,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.0001,0.0577,0.9539,0.0466,0.0005,0.002,1.0,0.0,0.0,0.0898,0.0,0.0011,0.0073
classification,LC-QuAD,?A1=A3+A4,gpt-5-nano,0.0,0.0,0.0011,0.0016,0.0941,0.9799,1.0,1.0,1.0,0.9928,0.9977,1.0,1.0,0.0284,1.0,0.3638,0.9928,0.9987
classification,LC-QuAD,?A1=A3+A4,deepseek-reasoner,0.0,0.0103,0.1003,0.1375,0.7787,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.984,1.0,1.0,0.9615,1.0,1.0
classification,LC-QuAD,?A1=A3+A4,gemini-2.5-pro,0.0,0.0,0.0,0.0,0.0,0.01,0.4278,0.9978,0.345,0.0214,0.0877,0.9713,0.0,0.0,1.0,0.0,0.0288,0.1405
classification,LC-QuAD,?A1=A3+A4,gpt-5-mini,0.0,0.0001,0.0018,0.0022,0.2048,0.994,1.0,1.0,1.0,0.9989,0.9999,1.0,0.7193,0.065,1.0,1.0,0.9991,1.0
classification,LC-QuAD,?A1=A3+A4,o3,0.0,0.0,0.0,0.0,0.0001,0.3854,0.9674,1.0,0.9646,0.5679,0.7566,0.9997,0.015,0.0,0.9878,0.0023,1.0,0.8316
classification,LC-QuAD,?A1=A3+A4,gpt-5,0.0,0.0,0.0,0.0,0.0,0.1215,0.8595,1.0,0.8115,0.2366,0.434,0.998,0.0031,0.0,0.9252,0.0002,0.2612,1.0
classification,LC-QuAD,?A1>A3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1>A3,gpt-oss:20b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1>A3,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1>A3,mistral-small:24b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1>A3,llama3.1:70b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1>A3,gemini-2.0-flash,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1>A3,gpt-4.1-mini-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1>A3,gpt-4o,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1>A3,gpt-4.1-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1>A3,grok-3-mini,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1>A3,deepseek-chat,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1>A3,gemini-2.5-flash,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1>A3,gpt-5-nano,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1>A3,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1>A3,gemini-2.5-pro,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1>A3,gpt-5-mini,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1>A3,o3,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1>A3,gpt-5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1>A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1>A4,gpt-oss:20b,0.0,1.0,0.0,0.8438,0.9997,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.8092,0.1831,1.0,0.7787,1.0,1.0
classification,LC-QuAD,?A1>A4,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9997,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A1>A4,mistral-small:24b,0.0,0.2354,0.0,1.0,0.9931,1.0,1.0,1.0,1.0,0.9992,0.9995,1.0,0.5,0.0407,1.0,0.5,0.9995,1.0
classification,LC-QuAD,?A1>A4,llama3.1:70b,0.0,0.001,0.0,0.0158,1.0,0.9827,0.9964,1.0,0.9999,0.7976,0.8463,0.9941,0.0098,0.0,0.9987,0.0113,0.8675,0.9622
classification,LC-QuAD,?A1>A4,gemini-2.0-flash,0.0,0.0,0.0,0.0,0.0466,1.0,0.788,0.998,0.9922,0.1189,0.1796,0.6964,0.0001,0.0,0.8867,0.0,0.2517,0.5
classification,LC-QuAD,?A1>A4,gpt-4.1-mini-2025-04-14,0.0,0.0,0.0,0.0,0.0133,0.3953,1.0,0.9922,0.9102,0.0481,0.0592,0.5,0.0,0.0,0.7256,0.0,0.1051,0.2905
classification,LC-QuAD,?A1>A4,gpt-4o,0.0,0.0,0.0,0.0,0.0001,0.0195,0.0625,1.0,0.3125,0.0005,0.0009,0.0352,0.0,0.0,0.1094,0.0,0.0017,0.0107
classification,LC-QuAD,?A1>A4,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.0,0.0007,0.0625,0.2539,0.9375,1.0,0.0017,0.0106,0.1719,0.0,0.0,0.3438,0.0,0.0176,0.073
classification,LC-QuAD,?A1>A4,grok-3-mini,0.0,0.0001,0.0,0.0025,0.3388,0.9519,0.9846,1.0,0.9999,1.0,0.6612,0.9755,0.0017,0.0,0.9936,0.0033,0.7214,0.9283
classification,LC-QuAD,?A1>A4,deepseek-chat,0.0,0.0001,0.0,0.0017,0.2706,0.9165,0.9824,0.9999,0.9979,0.5,1.0,0.9519,0.0008,0.0,0.9846,0.0006,0.6612,0.8811
classification,LC-QuAD,?A1>A4,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0207,0.5,0.7461,0.9961,0.9453,0.0717,0.1189,1.0,0.0,0.0,0.8555,0.0,0.1796,0.3872
classification,LC-QuAD,?A1>A4,gpt-5-nano,0.0,0.28,0.0,0.6043,0.996,1.0,1.0,1.0,1.0,0.9995,0.9998,1.0,1.0,0.0524,1.0,0.5522,0.9999,1.0
classification,LC-QuAD,?A1>A4,deepseek-reasoner,0.0,0.8775,0.0008,0.978,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.971,1.0,1.0,0.9615,1.0,1.0
classification,LC-QuAD,?A1>A4,gemini-2.5-pro,0.0,0.0,0.0,0.0,0.0059,0.2744,0.5,0.9844,0.8906,0.0245,0.0481,0.3633,0.0,0.0,1.0,0.0,0.0717,0.212
classification,LC-QuAD,?A1>A4,gpt-5-mini,0.0,0.3045,0.0,0.6196,0.9952,1.0,1.0,1.0,1.0,0.9988,0.9998,1.0,0.5522,0.065,1.0,1.0,0.9999,1.0
classification,LC-QuAD,?A1>A4,o3,0.0,0.0001,0.0,0.0014,0.2291,0.8684,0.9616,0.9999,0.9963,0.4225,0.5,0.9165,0.0005,0.0,0.9755,0.0005,1.0,0.8204
classification,LC-QuAD,?A1>A4,gpt-5,0.0,0.0,0.0,0.0001,0.0843,0.6855,0.8666,0.999,0.9807,0.1662,0.2403,0.8062,0.0001,0.0,0.9102,0.0,0.3238,1.0
classification,LC-QuAD,?A3∅A4,llama3.1:8b,1.0,1.0,0.0,0.9989,1.0,0.9998,0.976,0.0,1.0,1.0,0.077,1.0,0.0,0.9987,1.0,0.049,1.0,0.8775
classification,LC-QuAD,?A3∅A4,gpt-oss:20b,0.0,1.0,0.0,0.0843,0.998,0.2122,0.0068,0.0,0.9242,0.2786,0.0,0.9915,0.0,0.1325,0.9998,0.0,0.9827,0.0003
classification,LC-QuAD,?A3∅A4,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.9977,1.0,0.0,1.0,1.0,0.9942,1.0,1.0
classification,LC-QuAD,?A3∅A4,mistral-small:24b,0.0027,0.9622,0.0,1.0,1.0,0.7634,0.1405,0.0,0.9988,0.8275,0.0,1.0,0.0,0.6321,1.0,0.0,1.0,0.0175
classification,LC-QuAD,?A3∅A4,llama3.1:70b,0.0,0.0073,0.0,0.0001,1.0,0.0011,0.0,0.0,0.0946,0.0012,0.0,0.3238,0.0,0.0003,0.6964,0.0,0.2517,0.0
classification,LC-QuAD,?A3∅A4,gemini-2.0-flash,0.0006,0.8852,0.0,0.3601,0.9997,1.0,0.0662,0.0,0.9964,0.6399,0.0,0.9998,0.0,0.4357,1.0,0.0,0.9988,0.0083
classification,LC-QuAD,?A3∅A4,gpt-4.1-mini-2025-04-14,0.0435,0.9977,0.0,0.9252,1.0,0.9674,1.0,0.0,1.0,0.9795,0.0,1.0,0.0,0.9283,1.0,0.0,1.0,0.2148
classification,LC-QuAD,?A3∅A4,gpt-4o,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.4402,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A3∅A4,gpt-4.1-2025-04-14,0.0,0.1537,0.0,0.0047,0.9608,0.0133,0.0002,0.0,1.0,0.0065,0.0,0.9453,0.0,0.0121,0.9968,0.0,0.8338,0.0
classification,LC-QuAD,?A3∅A4,grok-3-mini,0.0001,0.8365,0.0,0.2858,0.9997,0.5,0.0448,0.0,0.9991,1.0,0.0,0.9998,0.0,0.3679,1.0,0.0,0.998,0.0023
classification,LC-QuAD,?A3∅A4,deepseek-chat,0.952,1.0,0.0048,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.5,1.0,0.9999
classification,LC-QuAD,?A3∅A4,gemini-2.5-flash,0.0,0.0262,0.0,0.0003,0.8204,0.0013,0.0,0.0,0.1719,0.0013,0.0,1.0,0.0,0.0017,0.9453,0.0,0.5,0.0
classification,LC-QuAD,?A3∅A4,gpt-5-nano,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.6742,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A3∅A4,deepseek-reasoner,0.0031,0.932,0.0,0.5,0.9999,0.6864,0.1279,0.0,0.9959,0.7502,0.0,0.9996,0.0,1.0,1.0,0.0,0.9989,0.0244
classification,LC-QuAD,?A3∅A4,gemini-2.5-pro,0.0,0.0013,0.0,0.0,0.5,0.0,0.0,0.0,0.0193,0.0,0.0,0.1719,0.0,0.0,1.0,0.0,0.1509,0.0
classification,LC-QuAD,?A3∅A4,gpt-5-mini,0.9721,1.0,0.0107,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.6286,1.0,0.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A3∅A4,o3,0.0,0.0466,0.0,0.0002,0.8684,0.0047,0.0,0.0,0.3145,0.0073,0.0,0.6964,0.0,0.0035,0.9408,0.0,1.0,0.0
classification,LC-QuAD,?A3∅A4,gpt-5,0.1831,0.9999,0.0,0.9932,1.0,0.9968,0.8659,0.0,1.0,0.9993,0.0003,1.0,0.0,0.9887,1.0,0.0002,1.0,1.0
classification,LC-QuAD,?A4=A1|3,llama3.1:8b,1.0,1.0,0.125,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.3125,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A4=A1|3,gpt-oss:20b,0.0,1.0,0.0,0.7825,1.0,1.0,1.0,0.0002,1.0,1.0,0.9951,1.0,0.0,0.9978,1.0,0.0557,1.0,1.0
classification,LC-QuAD,?A4=A1|3,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A4=A1|3,mistral-small:24b,0.0,0.3015,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.976,1.0,0.0,0.9836,1.0,0.008,1.0,1.0
classification,LC-QuAD,?A4=A1|3,llama3.1:70b,0.0,0.0,0.0,0.0001,1.0,0.8625,0.7787,0.0,0.9997,0.9296,0.007,1.0,0.0,0.0124,1.0,0.0,0.9984,0.4022
classification,LC-QuAD,?A4=A1|3,gemini-2.0-flash,0.0,0.0,0.0,0.0,0.1981,1.0,0.3939,0.0,0.9982,0.6641,0.0001,1.0,0.0,0.0004,1.0,0.0,0.9663,0.0804
classification,LC-QuAD,?A4=A1|3,gpt-4.1-mini-2025-04-14,0.0,0.0,0.0,0.0,0.3045,0.705,1.0,0.0,0.9995,0.7995,0.0002,1.0,0.0,0.0006,1.0,0.0,0.9923,0.1748
classification,LC-QuAD,?A4=A1|3,gpt-4o,0.0,0.9999,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.992,1.0,1.0
classification,LC-QuAD,?A4=A1|3,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.0,0.0008,0.0048,0.0015,0.0,1.0,0.0069,0.0,0.9999,0.0,0.0,0.9937,0.0,0.2612,0.0
classification,LC-QuAD,?A4=A1|3,grok-3-mini,0.0,0.0,0.0,0.0,0.1144,0.4439,0.2879,0.0,0.9975,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.9702,0.033
classification,LC-QuAD,?A4=A1|3,deepseek-chat,0.0,0.0102,0.0,0.0435,0.9966,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.6583,1.0,0.0,1.0,0.9989
classification,LC-QuAD,?A4=A1|3,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0008,0.0,0.0,1.0,0.0,0.0,0.2517,0.0,0.0004,0.0
classification,LC-QuAD,?A4=A1|3,gpt-5-nano,0.9375,1.0,0.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A4=A1|3,deepseek-reasoner,0.0,0.0049,0.0,0.03,0.9937,0.9999,0.9998,0.0,1.0,1.0,0.446,1.0,0.0,1.0,1.0,0.0,1.0,0.9912
classification,LC-QuAD,?A4=A1|3,gemini-2.5-pro,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0178,0.0,0.0,0.8684,0.0,0.0,1.0,0.0,0.0047,0.0
classification,LC-QuAD,?A4=A1|3,gpt-5-mini,0.0,0.9703,0.0,0.9967,1.0,1.0,1.0,0.0178,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
classification,LC-QuAD,?A4=A1|3,o3,0.0,0.0,0.0,0.0,0.0038,0.0587,0.0164,0.0,0.8316,0.0586,0.0,0.9999,0.0,0.0,0.9983,0.0,1.0,0.0003
classification,LC-QuAD,?A4=A1|3,gpt-5,0.0,0.0,0.0,0.0001,0.6899,0.954,0.8856,0.0,1.0,0.9842,0.0032,1.0,0.0,0.0182,1.0,0.0,0.9999,1.0
classification,overall,?A1=A2,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,overall,?A1=A2,gpt-oss:20b,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,overall,?A1=A2,gpt-4.1-nano-2025-04-14,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.536,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
classification,overall,?A1=A2,mistral-small:24b,0.0,0.0,1.0,1.0,0.0001,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.8126,0.9999,0.0101,1.0
classification,overall,?A1=A2,llama3.1:70b,0.0,0.0,1.0,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0169,0.0616,1.0,1.0,0.9376,1.0
classification,overall,?A1=A2,gemini-2.0-flash,0.0,0.0,1.0,0.0001,0.0,1.0,0.9729,1.0,1.0,0.9991,1.0,0.9724,0.0,0.0,0.0021,0.4463,0.0,1.0
classification,overall,?A1=A2,gpt-4.1-mini-2025-04-14,0.0,0.0,1.0,0.0,0.0,0.0335,1.0,1.0,0.9936,0.8938,1.0,0.5197,0.0,0.0,0.0,0.0229,0.0,0.9991
classification,overall,?A1=A2,gpt-4o,0.0,0.0,0.536,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
classification,overall,?A1=A2,gpt-4.1-2025-04-14,0.0,0.0,1.0,0.0,0.0,0.0,0.0085,1.0,1.0,0.1081,0.9999,0.0076,0.0,0.0,0.0,0.0,0.0,0.7602
classification,overall,?A1=A2,grok-3-mini,0.0,0.0,1.0,0.0,0.0,0.0013,0.1256,1.0,0.9106,1.0,1.0,0.119,0.0,0.0,0.0,0.0008,0.0,0.9764
classification,overall,?A1=A2,deepseek-chat,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0001,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0013
classification,overall,?A1=A2,gemini-2.5-flash,0.0,0.0,1.0,0.0,0.0,0.0341,0.5197,1.0,0.9943,0.9002,1.0,1.0,0.0,0.0,0.0,0.0217,0.0,0.9993
classification,overall,?A1=A2,gpt-5-nano,0.0,0.0,1.0,1.0,0.9859,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.7251,1.0,1.0,0.9999,1.0
classification,overall,?A1=A2,deepseek-reasoner,0.0,0.0,1.0,1.0,0.9467,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2989,1.0,1.0,1.0,0.9993,1.0
classification,overall,?A1=A2,gemini-2.5-pro,0.0,0.0,1.0,0.211,0.0,0.9984,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.9975,0.0008,1.0
classification,overall,?A1=A2,gpt-5-mini,0.0,0.0,1.0,0.0002,0.0,0.589,0.9817,1.0,1.0,0.9995,1.0,0.9827,0.0,0.0,0.0032,1.0,0.0,1.0
classification,overall,?A1=A2,o3,0.0,0.0,1.0,0.9918,0.0722,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0001,0.001,0.9994,1.0,1.0,1.0
classification,overall,?A1=A2,gpt-5,0.0,0.0,1.0,0.0,0.0,0.0,0.0012,1.0,0.2748,0.0302,0.9991,0.0011,0.0,0.0,0.0,0.0,0.0,1.0
classification,overall,?A1=A3+A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,overall,?A1=A3+A4,gpt-oss:20b,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,overall,?A1=A3+A4,gpt-4.1-nano-2025-04-14,0.0,0.0,1.0,0.5114,0.729,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,overall,?A1=A3+A4,mistral-small:24b,0.0,0.0,0.5114,1.0,0.7283,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,overall,?A1=A3+A4,llama3.1:70b,0.0,0.0,0.2906,0.2912,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,overall,?A1=A3+A4,gemini-2.0-flash,0.0,0.0,0.0,0.0,0.0,1.0,0.9381,1.0,0.9874,0.0,0.0136,1.0,0.0,0.0,0.0,0.0,0.0002,0.9998
classification,overall,?A1=A3+A4,gpt-4.1-mini-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.0727,1.0,1.0,0.7686,0.0,0.0001,1.0,0.0,0.0,0.0,0.0,0.0,0.9773
classification,overall,?A1=A3+A4,gpt-4o,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.1956,0.0,0.0,0.0,0.0,0.0,0.0
classification,overall,?A1=A3+A4,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.0157,0.2585,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.9022
classification,overall,?A1=A3+A4,grok-3-mini,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.9996,1.0,0.0,0.5142,0.0259,0.0,0.9748,1.0
classification,overall,?A1=A3+A4,deepseek-chat,0.0,0.0,0.0,0.0,0.0,0.989,0.9999,1.0,1.0,0.0006,1.0,1.0,0.0,0.0006,0.0,0.0,0.0897,1.0
classification,overall,?A1=A3+A4,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8344,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
classification,overall,?A1=A3+A4,gpt-5-nano,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0002,1.0,1.0
classification,overall,?A1=A3+A4,deepseek-reasoner,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.5142,0.9995,1.0,0.0,1.0,0.0272,0.0,0.9729,1.0
classification,overall,?A1=A3+A4,gemini-2.5-pro,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.9781,1.0,1.0,0.0,0.9769,1.0,0.0,1.0,1.0
classification,overall,?A1=A3+A4,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9998,1.0,1.0,1.0,1.0,1.0
classification,overall,?A1=A3+A4,o3,0.0,0.0,0.0,0.0,0.0,0.9999,1.0,1.0,1.0,0.0299,0.9221,1.0,0.0,0.0319,0.0001,0.0,1.0,1.0
classification,overall,?A1=A3+A4,gpt-5,0.0,0.0,0.0,0.0,0.0,0.0002,0.0279,1.0,0.1141,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
classification,overall,?A1>A3,llama3.1:8b,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,overall,?A1>A3,gpt-oss:20b,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,overall,?A1>A3,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.1675,1.0,1.0,1.0,1.0,0.998,1.0,1.0,1.0,1.0
classification,overall,?A1>A3,mistral-small:24b,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,overall,?A1>A3,llama3.1:70b,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,overall,?A1>A3,gemini-2.0-flash,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,overall,?A1>A3,gpt-4.1-mini-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,overall,?A1>A3,gpt-4o,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,overall,?A1>A3,gpt-4.1-2025-04-14,1.0,1.0,0.8476,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,overall,?A1>A3,grok-3-mini,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,overall,?A1>A3,deepseek-chat,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.9999,1.0
classification,overall,?A1>A3,gemini-2.5-flash,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,overall,?A1>A3,gpt-5-nano,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,overall,?A1>A3,deepseek-reasoner,1.0,1.0,0.0024,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,overall,?A1>A3,gemini-2.5-pro,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,overall,?A1>A3,gpt-5-mini,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,overall,?A1>A3,o3,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0001,1.0,1.0,0.0,1.0,1.0,1.0,1.0
classification,overall,?A1>A3,gpt-5,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,overall,?A1>A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,overall,?A1>A4,gpt-oss:20b,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9986,1.0,1.0,0.9283,1.0,1.0
classification,overall,?A1>A4,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,overall,?A1>A4,mistral-small:24b,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0007,1.0,1.0,0.0,1.0,1.0
classification,overall,?A1>A4,llama3.1:70b,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.1727,0.9637,0.0,1.0,1.0
classification,overall,?A1>A4,gemini-2.0-flash,0.0,0.0,0.0,0.0,0.0,1.0,0.7201,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.9998
classification,overall,?A1>A4,gpt-4.1-mini-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.3251,1.0,1.0,1.0,0.0,0.0,0.9999,0.0,0.0,0.0,0.0,0.0,0.9984
classification,overall,?A1>A4,gpt-4o,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0235,0.0,0.0,0.0004,0.0,0.0,0.0,0.0,0.0,0.0
classification,overall,?A1>A4,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.986,1.0,0.0,0.0,0.0923,0.0,0.0,0.0,0.0,0.0,0.0249
classification,overall,?A1>A4,grok-3-mini,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.9255,1.0,0.0,0.0,0.0142,0.0,0.9493,1.0
classification,overall,?A1>A4,deepseek-chat,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0896,1.0,1.0,0.0,0.0,0.0002,0.0,0.6015,1.0
classification,overall,?A1>A4,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.0,0.0002,0.9998,0.9337,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.2826
classification,overall,?A1>A4,gpt-5-nano,0.0,0.0017,0.0,0.9995,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0736,1.0,1.0
classification,overall,?A1>A4,deepseek-reasoner,0.0,0.0,0.0,0.0,0.8475,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.9969,0.0,1.0,1.0
classification,overall,?A1>A4,gemini-2.5-pro,0.0,0.0,0.0,0.0,0.0439,1.0,1.0,1.0,1.0,0.9889,0.9999,1.0,0.0,0.004,1.0,0.0,1.0,1.0
classification,overall,?A1>A4,gpt-5-mini,0.0,0.081,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9354,1.0,1.0,1.0,1.0,1.0
classification,overall,?A1>A4,o3,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0619,0.4387,1.0,0.0,0.0,0.0001,0.0,1.0,1.0
classification,overall,?A1>A4,gpt-5,0.0,0.0,0.0,0.0,0.0,0.0004,0.0025,1.0,0.9835,0.0,0.0,0.7702,0.0,0.0,0.0,0.0,0.0,1.0
classification,overall,?A3∅A4,llama3.1:8b,1.0,1.0,0.0,1.0,1.0,0.0023,1.0,0.0,1.0,1.0,0.2767,1.0,0.0,0.8793,1.0,0.2777,1.0,0.9732
classification,overall,?A3∅A4,gpt-oss:20b,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.9924,0.9815,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
classification,overall,?A3∅A4,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.1014,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
classification,overall,?A3∅A4,mistral-small:24b,0.0,1.0,0.0,1.0,1.0,0.0,0.0097,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
classification,overall,?A3∅A4,llama3.1:70b,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0002,0.0,0.0,0.2613,0.0,0.0935,0.0
classification,overall,?A3∅A4,gemini-2.0-flash,0.9981,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.99,1.0,0.0,1.0,1.0,0.989,1.0,1.0
classification,overall,?A3∅A4,gpt-4.1-mini-2025-04-14,0.0,1.0,0.0,0.9921,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
classification,overall,?A3∅A4,gpt-4o,1.0,1.0,0.9087,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
classification,overall,?A3∅A4,gpt-4.1-2025-04-14,0.0,0.0096,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.3749,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
classification,overall,?A3∅A4,grok-3-mini,0.0,0.0227,0.0,0.0,1.0,0.0,0.0,0.0,0.6591,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
classification,overall,?A3∅A4,deepseek-chat,0.7437,1.0,0.0,1.0,1.0,0.0119,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.9668,1.0,0.5129,1.0,0.9959
classification,overall,?A3∅A4,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.9999,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.9993,0.0,0.9924,0.0
classification,overall,?A3∅A4,gpt-5-nano,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,overall,?A3∅A4,deepseek-reasoner,0.1339,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0382,1.0,0.0,1.0,1.0,0.0373,1.0,0.7961
classification,overall,?A3∅A4,gemini-2.5-pro,0.0,0.0,0.0,0.0,0.775,0.0,0.0,0.0,0.0,0.0,0.0,0.0011,0.0,0.0,1.0,0.0,0.2606,0.0
classification,overall,?A3∅A4,gpt-5-mini,0.7427,1.0,0.0,1.0,1.0,0.0129,1.0,0.0,1.0,1.0,0.5129,1.0,0.0,0.9677,1.0,1.0,1.0,0.9965
classification,overall,?A3∅A4,o3,0.0,0.0,0.0,0.0,0.9243,0.0,0.0,0.0,0.0,0.0,0.0,0.0101,0.0,0.0,0.7758,0.0,1.0,0.0
classification,overall,?A3∅A4,gpt-5,0.031,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0049,1.0,0.0,0.2232,1.0,0.0043,1.0,1.0
classification,overall,?A4=A1|3,llama3.1:8b,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
classification,overall,?A4=A1|3,gpt-oss:20b,0.0,1.0,0.0,0.9993,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0017,1.0,1.0
classification,overall,?A4=A1|3,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,overall,?A4=A1|3,mistral-small:24b,0.0,0.0009,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0
classification,overall,?A4=A1|3,llama3.1:70b,0.0,0.0,0.0,0.0,1.0,0.0518,1.0,0.0,1.0,1.0,0.081,1.0,0.0,0.0005,1.0,0.0,1.0,0.9961
classification,overall,?A4=A1|3,gemini-2.0-flash,0.0,0.0,0.0,0.0,0.9542,1.0,1.0,0.0,1.0,1.0,0.6087,1.0,0.0,0.0488,1.0,0.0,1.0,1.0
classification,overall,?A4=A1|3,gpt-4.1-mini-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.9847,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0009
classification,overall,?A4=A1|3,gpt-4o,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.9952,1.0,1.0
classification,overall,?A4=A1|3,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0025,0.0,0.4586,0.0
classification,overall,?A4=A1|3,grok-3-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0179,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.9998,0.0,1.0,0.0
classification,overall,?A4=A1|3,deepseek-chat,0.0,0.0,0.0,0.0,0.9275,0.415,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0287,1.0,0.0,1.0,1.0
classification,overall,?A4=A1|3,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
classification,overall,?A4=A1|3,gpt-5-nano,1.0,1.0,0.0002,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,overall,?A4=A1|3,deepseek-reasoner,0.0,0.0,0.0,0.0,0.9996,0.957,1.0,0.0,1.0,1.0,0.975,1.0,0.0,1.0,1.0,0.0,1.0,1.0
classification,overall,?A4=A1|3,gemini-2.5-pro,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.998,0.0003,0.0,1.0,0.0,0.0,1.0,0.0,0.9966,0.0
classification,overall,?A4=A1|3,gpt-5-mini,0.0,0.9986,0.0,1.0,1.0,1.0,1.0,0.0057,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
classification,overall,?A4=A1|3,o3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5688,0.0,0.0,1.0,0.0,0.0,0.0041,0.0,1.0,0.0
classification,overall,?A4=A1|3,gpt-5,0.0,0.0,0.0,0.0,0.0047,0.0,0.9992,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0
classification,qawiki,?A1=A2,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,qawiki,?A1=A2,gpt-oss:20b,0.0,1.0,1.0,0.9993,0.9799,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,0.9444,1.0,1.0,0.9998,0.9033,1.0
classification,qawiki,?A1=A2,gpt-4.1-nano-2025-04-14,0.0,0.0,1.0,0.0002,0.0,0.002,0.0318,0.8555,0.0717,0.0038,0.8555,0.0096,0.0,0.0133,0.0106,0.0008,0.0,0.0481
classification,qawiki,?A1=A2,mistral-small:24b,0.0,0.0019,1.0,1.0,0.1744,0.7017,0.9622,1.0,0.9855,0.8675,1.0,0.9075,0.0717,0.8998,0.9461,0.6399,0.0298,0.9693
classification,qawiki,?A1=A2,llama3.1:70b,0.0,0.038,1.0,0.8945,1.0,0.9506,0.9977,1.0,0.9996,0.9856,1.0,0.9932,0.3899,0.9917,0.9955,0.9283,0.2307,0.9985
classification,qawiki,?A1=A2,gemini-2.0-flash,0.0,0.0004,0.9995,0.43,0.0939,1.0,0.9242,1.0,0.968,0.779,1.0,0.8365,0.0326,0.8192,0.8595,0.5,0.0158,0.939
classification,qawiki,?A1=A2,gpt-4.1-mini-2025-04-14,0.0,0.0,0.9904,0.0843,0.0068,0.1537,1.0,0.9999,0.7728,0.3238,0.9999,0.4225,0.0025,0.4225,0.5,0.1148,0.0003,0.6612
classification,qawiki,?A1=A2,gpt-4o,0.0,0.0,0.3633,0.0,0.0,0.0,0.0017,1.0,0.0112,0.0007,0.6875,0.0012,0.0,0.0012,0.0021,0.0,0.0,0.0065
classification,qawiki,?A1=A2,gpt-4.1-2025-04-14,0.0,0.0,0.9755,0.0378,0.0017,0.0758,0.4018,0.9983,1.0,0.1917,0.9995,0.2706,0.0006,0.2706,0.345,0.0539,0.0001,0.5
classification,qawiki,?A1=A2,grok-3-mini,0.0,0.0001,0.9993,0.2291,0.0326,0.3506,0.8204,0.9999,0.9054,1.0,1.0,0.6682,0.0119,0.6494,0.7597,0.2858,0.0015,0.8569
classification,qawiki,?A1=A2,deepseek-chat,0.0,0.0,0.3633,0.0,0.0,0.0001,0.0017,0.6875,0.0059,0.0003,1.0,0.0005,0.0,0.0012,0.0021,0.0,0.0,0.0065
classification,qawiki,?A1=A2,gemini-2.5-flash,0.0,0.0,0.9978,0.1725,0.0175,0.2786,0.7214,0.9999,0.8463,0.5,1.0,1.0,0.0035,0.5841,0.6855,0.2122,0.0013,0.8083
classification,qawiki,?A1=A2,gpt-5-nano,0.0,0.0924,1.0,0.9635,0.7121,0.9856,0.9992,1.0,0.9998,0.9953,1.0,0.9989,1.0,0.9985,0.9987,0.967,0.3854,0.9998
classification,qawiki,?A1=A2,deepseek-reasoner,0.0,0.0001,0.9964,0.1808,0.0205,0.2923,0.7214,0.9999,0.8463,0.5,0.9999,0.5841,0.0045,1.0,0.655,0.221,0.0022,0.8083
classification,qawiki,?A1=A2,gemini-2.5-pro,0.0,0.0,0.9979,0.1148,0.0122,0.2366,0.6762,0.9997,0.7878,0.4073,0.9997,0.5,0.0038,0.5,1.0,0.1537,0.0002,0.7383
classification,qawiki,?A1=A2,gpt-5-mini,0.0,0.0006,0.9999,0.5,0.1279,0.6321,0.9461,1.0,0.9784,0.8275,1.0,0.8852,0.0631,0.8761,0.9242,1.0,0.027,0.968
classification,qawiki,?A1=A2,o3,0.0,0.1562,1.0,0.9862,0.849,0.9931,0.9999,1.0,1.0,0.9996,1.0,0.9996,0.72,0.9993,1.0,0.9871,1.0,1.0
classification,qawiki,?A1=A2,gpt-5,0.0,0.0,0.9846,0.068,0.0045,0.1239,0.5,0.9991,0.6855,0.2617,0.9991,0.3318,0.0007,0.3318,0.4159,0.0758,0.0002,1.0
classification,qawiki,?A1=A3+A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,qawiki,?A1=A3+A4,gpt-oss:20b,0.0,1.0,0.5,0.9887,0.1332,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9981,1.0,0.9994,0.9908,1.0,1.0
classification,qawiki,?A1=A3+A4,gpt-4.1-nano-2025-04-14,0.0,0.5901,1.0,0.9846,0.1477,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9988,1.0,0.9998,0.9846,0.9999,1.0
classification,qawiki,?A1=A3+A4,mistral-small:24b,0.0,0.0215,0.0268,1.0,0.0012,1.0,1.0,1.0,1.0,0.985,1.0,1.0,0.8288,1.0,0.8825,0.5489,0.9538,1.0
classification,qawiki,?A1=A3+A4,llama3.1:70b,0.0,0.909,0.8997,0.9995,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9999,1.0,1.0
classification,qawiki,?A1=A3+A4,gemini-2.0-flash,0.0,0.0,0.0,0.0,0.0,1.0,0.4225,0.9682,0.5806,0.0057,0.1725,0.9827,0.0001,0.3555,0.0004,0.0,0.0008,0.9075
classification,qawiki,?A1=A3+A4,gpt-4.1-mini-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.7214,1.0,0.9739,0.7142,0.0098,0.3038,0.9927,0.0005,0.5,0.0007,0.0,0.0033,0.9622
classification,qawiki,?A1=A3+A4,gpt-4o,0.0,0.0,0.0,0.0,0.0,0.0835,0.061,1.0,0.105,0.0001,0.0096,0.7728,0.0,0.0262,0.0,0.0,0.0,0.5
classification,qawiki,?A1=A3+A4,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.5806,0.4253,0.9534,1.0,0.0066,0.1725,0.9827,0.0001,0.3642,0.0003,0.0,0.0006,0.9242
classification,qawiki,?A1=A3+A4,grok-3-mini,0.0,0.0,0.0,0.0279,0.0,0.9977,0.996,1.0,0.9972,1.0,0.9756,1.0,0.1528,0.9931,0.2005,0.0279,0.3327,1.0
classification,qawiki,?A1=A3+A4,deepseek-chat,0.0,0.0,0.0,0.0001,0.0,0.9075,0.8042,0.997,0.9075,0.0481,1.0,0.9992,0.0019,0.7502,0.0027,0.0,0.0164,0.99
classification,qawiki,?A1=A3+A4,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.0466,0.0216,0.4018,0.0466,0.0,0.003,1.0,0.0,0.0059,0.0,0.0,0.0,0.3036
classification,qawiki,?A1=A3+A4,gpt-5-nano,0.0,0.004,0.0026,0.2383,0.0001,1.0,0.9998,1.0,1.0,0.9,0.9993,1.0,1.0,0.9997,0.6641,0.2285,0.7865,1.0
classification,qawiki,?A1=A3+A4,deepseek-reasoner,0.0,0.0,0.0,0.0,0.0,0.7709,0.6445,0.9915,0.7566,0.0158,0.3679,0.9987,0.0009,1.0,0.0005,0.0,0.0017,0.9827
classification,qawiki,?A1=A3+A4,gemini-2.5-pro,0.0,0.0015,0.0005,0.1712,0.0,0.9999,0.9998,1.0,0.9999,0.8688,0.999,1.0,0.4439,0.9999,1.0,0.1446,0.705,1.0
classification,qawiki,?A1=A3+A4,gpt-5-mini,0.0,0.0182,0.0268,0.5489,0.0004,1.0,1.0,1.0,1.0,0.985,1.0,1.0,0.8395,1.0,0.9076,1.0,0.951,1.0
classification,qawiki,?A1=A3+A4,o3,0.0,0.0001,0.0003,0.0775,0.0,0.9997,0.9987,1.0,0.9998,0.7646,0.9923,1.0,0.2983,0.9995,0.3939,0.0809,1.0,1.0
classification,qawiki,?A1=A3+A4,gpt-5,0.0,0.0,0.0,0.0,0.0,0.1725,0.0843,0.6682,0.1537,0.0,0.0251,0.8491,0.0,0.0466,0.0,0.0,0.0,1.0
classification,qawiki,?A1>A3,llama3.1:8b,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0
classification,qawiki,?A1>A3,gpt-oss:20b,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0
classification,qawiki,?A1>A3,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0004,1.0,1.0,1.0,1.0,0.0207,1.0,1.0,1.0,1.0
classification,qawiki,?A1>A3,mistral-small:24b,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0
classification,qawiki,?A1>A3,llama3.1:70b,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0
classification,qawiki,?A1>A3,gemini-2.0-flash,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0
classification,qawiki,?A1>A3,gpt-4.1-mini-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.849,1.0,1.0,1.0,1.0,0.9973,1.0,1.0,1.0,1.0
classification,qawiki,?A1>A3,gpt-4o,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0
classification,qawiki,?A1>A3,gpt-4.1-2025-04-14,1.0,1.0,0.9998,1.0,1.0,1.0,0.2307,1.0,1.0,1.0,1.0,1.0,1.0,0.978,1.0,1.0,1.0,1.0
classification,qawiki,?A1>A3,grok-3-mini,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0
classification,qawiki,?A1>A3,deepseek-chat,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0
classification,qawiki,?A1>A3,gemini-2.5-flash,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0
classification,qawiki,?A1>A3,gpt-5-nano,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0
classification,qawiki,?A1>A3,deepseek-reasoner,1.0,1.0,0.9888,1.0,1.0,1.0,0.006,1.0,0.0407,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,qawiki,?A1>A3,gemini-2.5-pro,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0
classification,qawiki,?A1>A3,gpt-5-mini,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0
classification,qawiki,?A1>A3,o3,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0
classification,qawiki,?A1>A3,gpt-5,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0
classification,qawiki,?A1>A4,llama3.1:8b,1.0,1.0,0.9995,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,qawiki,?A1>A4,gpt-oss:20b,0.0,1.0,0.0005,0.9989,0.9818,1.0,1.0,1.0,1.0,0.9999,1.0,1.0,0.8512,1.0,0.9982,0.8987,0.9996,1.0
classification,qawiki,?A1>A4,gpt-4.1-nano-2025-04-14,0.0012,0.9998,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,qawiki,?A1>A4,mistral-small:24b,0.0,0.0027,0.0,1.0,0.2148,0.9999,0.997,1.0,1.0,0.779,0.9962,1.0,0.0352,0.939,0.3746,0.0298,0.6358,0.9999
classification,qawiki,?A1>A4,llama3.1:70b,0.0,0.0337,0.0,0.8659,1.0,0.9998,0.9999,1.0,1.0,0.9646,0.9989,1.0,0.1611,0.99,0.7388,0.1958,0.9186,1.0
classification,qawiki,?A1>A4,gemini-2.0-flash,0.0,0.0,0.0,0.0012,0.0007,1.0,0.6128,1.0,0.9648,0.032,0.2744,0.9453,0.0,0.1051,0.0019,0.0,0.0113,0.8867
classification,qawiki,?A1>A4,gpt-4.1-mini-2025-04-14,0.0,0.0,0.0,0.0096,0.0003,0.6128,1.0,0.9805,0.9453,0.0207,0.3145,0.9648,0.0,0.1051,0.0012,0.0,0.0085,0.9102
classification,qawiki,?A1>A4,gpt-4o,0.0,0.0,0.0,0.0,0.0,0.0312,0.0898,1.0,0.5,0.0004,0.0039,0.5,0.0,0.0017,0.0,0.0,0.0,0.3438
classification,qawiki,?A1>A4,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.1445,0.1719,0.875,1.0,0.0007,0.0195,0.6562,0.0,0.0106,0.0,0.0,0.0004,0.5
classification,qawiki,?A1>A4,grok-3-mini,0.0,0.0003,0.0,0.3506,0.0748,0.9887,0.9941,1.0,0.9999,1.0,0.9534,0.9999,0.0069,0.8365,0.1808,0.0098,0.4253,0.9996
classification,qawiki,?A1>A4,deepseek-chat,0.0,0.0,0.0,0.0154,0.0035,0.8867,0.8338,1.0,0.998,0.105,1.0,0.9888,0.0001,0.3318,0.0073,0.0,0.0539,0.9713
classification,qawiki,?A1>A4,gemini-2.5-flash,0.0,0.0,0.0,0.0002,0.0,0.1719,0.1445,0.8125,0.6562,0.0007,0.0461,1.0,0.0,0.0032,0.0,0.0,0.0002,0.5
classification,qawiki,?A1>A4,gpt-5-nano,0.0,0.2175,0.0,0.9818,0.8987,1.0,1.0,1.0,1.0,0.9973,1.0,1.0,1.0,0.9998,0.9825,0.6101,0.9962,1.0
classification,qawiki,?A1>A4,deepseek-reasoner,0.0,0.0,0.0,0.1239,0.0235,0.9616,0.9616,0.9999,0.9979,0.2786,0.8083,0.9998,0.0007,1.0,0.0494,0.0006,0.1537,0.9963
classification,qawiki,?A1>A4,gemini-2.5-pro,0.0,0.0048,0.0,0.7388,0.3746,0.9995,0.9997,1.0,1.0,0.8998,0.998,1.0,0.0401,0.9786,1.0,0.0871,0.8192,1.0
classification,qawiki,?A1>A4,gpt-5-mini,0.0,0.1611,0.0,0.9862,0.8736,1.0,1.0,1.0,1.0,0.996,1.0,1.0,0.5,0.9998,0.9519,1.0,0.9902,1.0
classification,qawiki,?A1>A4,o3,0.0,0.001,0.0,0.5,0.1481,0.9967,0.9978,1.0,0.9999,0.7142,0.9784,1.0,0.01,0.9242,0.2923,0.0218,1.0,0.9999
classification,qawiki,?A1>A4,gpt-5,0.0,0.0,0.0,0.0008,0.0,0.2744,0.2539,0.8906,0.7734,0.0022,0.0898,0.8125,0.0,0.0176,0.0001,0.0,0.0007,1.0
classification,qawiki,?A3∅A4,llama3.1:8b,1.0,0.9901,0.0,0.653,1.0,0.0,0.4487,0.0,0.9937,0.9961,0.0192,0.9994,0.0,0.0,1.0,0.1945,1.0,0.0021
classification,qawiki,?A3∅A4,gpt-oss:20b,0.0201,1.0,0.0,0.0122,1.0,0.0,0.002,0.0,0.6399,0.7214,0.0,0.8923,0.0,0.0,0.9943,0.0004,0.9879,0.0
classification,qawiki,?A3∅A4,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,1.0,1.0,0.32,1.0,0.0249,1.0,1.0,0.9992,1.0,0.0,0.7167,1.0,1.0,1.0,0.9908
classification,qawiki,?A3∅A4,mistral-small:24b,0.4478,0.9955,0.0,1.0,1.0,0.0,0.2403,0.0,0.9998,0.998,0.0011,0.9999,0.0,0.0,1.0,0.0967,1.0,0.0001
classification,qawiki,?A3∅A4,llama3.1:70b,0.0,0.0002,0.0,0.0,1.0,0.0,0.0,0.0,0.0001,0.0004,0.0,0.0019,0.0,0.0,0.0758,0.0,0.0261,0.0
classification,qawiki,?A3∅A4,gemini-2.0-flash,1.0,1.0,0.7586,1.0,1.0,1.0,1.0,0.0326,1.0,1.0,1.0,1.0,0.0001,0.9599,1.0,1.0,1.0,1.0
classification,qawiki,?A3∅A4,gpt-4.1-mini-2025-04-14,0.6506,0.9994,0.0,0.8811,1.0,0.0,1.0,0.0,1.0,1.0,0.0069,1.0,0.0,0.0,1.0,0.2257,1.0,0.0003
classification,qawiki,?A3∅A4,gpt-4o,1.0,1.0,0.9864,1.0,1.0,0.9856,1.0,1.0,1.0,1.0,1.0,1.0,0.0135,1.0,1.0,1.0,1.0,1.0
classification,qawiki,?A3∅A4,gpt-4.1-2025-04-14,0.0135,0.5,0.0,0.0013,1.0,0.0,0.0001,0.0,1.0,0.655,0.0,0.9165,0.0,0.0,0.997,0.0001,0.9915,0.0
classification,qawiki,?A3∅A4,grok-3-mini,0.0088,0.4225,0.0,0.0057,0.9999,0.0,0.0002,0.0,0.5,1.0,0.0,0.8569,0.0,0.0,0.9967,0.0,0.9784,0.0
classification,qawiki,?A3∅A4,deepseek-chat,0.9897,1.0,0.0017,0.9997,1.0,0.0,0.9975,0.0,1.0,1.0,1.0,1.0,0.0,0.0003,1.0,0.9519,1.0,0.2088
classification,qawiki,?A3∅A4,gemini-2.5-flash,0.0016,0.1885,0.0,0.0003,0.9995,0.0,0.0,0.0,0.1796,0.2617,0.0,1.0,0.0,0.0,0.9846,0.0,0.8852,0.0
classification,qawiki,?A3∅A4,gpt-5-nano,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9937,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,qawiki,?A3∅A4,deepseek-reasoner,1.0,1.0,0.3655,1.0,1.0,0.0814,1.0,0.0002,1.0,1.0,0.9999,1.0,0.0,1.0,1.0,1.0,1.0,0.9985
classification,qawiki,?A3∅A4,gemini-2.5-pro,0.0,0.0144,0.0,0.0,0.968,0.0,0.0,0.0,0.0096,0.0113,0.0,0.0481,0.0,0.0,1.0,0.0,0.3388,0.0
classification,qawiki,?A3∅A4,gpt-5-mini,0.8661,0.9999,0.0,0.9443,1.0,0.0,0.8544,0.0,1.0,1.0,0.0871,1.0,0.0,0.0,1.0,1.0,1.0,0.0147
classification,qawiki,?A3∅A4,o3,0.0001,0.0307,0.0,0.0,0.9904,0.0,0.0,0.0,0.0262,0.0539,0.0,0.2122,0.0,0.0,0.7976,0.0,1.0,0.0
classification,qawiki,?A3∅A4,gpt-5,0.9991,1.0,0.0165,1.0,1.0,0.0,0.9999,0.0,1.0,1.0,0.8721,1.0,0.0,0.0045,1.0,0.9934,1.0,1.0
classification,qawiki,?A4=A1|3,llama3.1:8b,1.0,1.0,0.002,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0107,1.0,1.0,1.0,1.0,1.0
classification,qawiki,?A4=A1|3,gpt-oss:20b,0.0,1.0,0.0,0.8438,0.9716,0.0586,0.9999,0.0003,1.0,1.0,0.9733,1.0,0.0,0.1358,1.0,0.1684,1.0,0.9703
classification,qawiki,?A4=A1|3,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,qawiki,?A4=A1|3,mistral-small:24b,0.0,0.2354,0.0,1.0,0.8892,0.0047,0.9996,0.0,1.0,0.9999,0.8785,1.0,0.0,0.02,0.9999,0.0587,1.0,0.8438
classification,qawiki,?A4=A1|3,llama3.1:70b,0.0,0.048,0.0,0.1642,1.0,0.0005,0.951,0.0,1.0,0.9887,0.4518,1.0,0.0,0.0018,0.9974,0.0041,0.9972,0.4022
classification,qawiki,?A4=A1|3,gemini-2.0-flash,0.0,0.9702,0.0,0.9983,0.9998,1.0,1.0,0.0178,1.0,1.0,1.0,1.0,0.0,0.6962,1.0,0.7693,1.0,1.0
classification,qawiki,?A4=A1|3,gpt-4.1-mini-2025-04-14,0.0,0.0002,0.0,0.0012,0.0809,0.0,1.0,0.0,0.9994,0.8198,0.0325,1.0,0.0,0.0,0.9413,0.0,0.9443,0.01
classification,qawiki,?A4=A1|3,gpt-4o,0.0001,0.9999,0.0,1.0,1.0,0.9937,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.9974,1.0,0.998,1.0,1.0
classification,qawiki,?A4=A1|3,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.0,0.0001,0.0,0.0018,0.0,1.0,0.02,0.0,0.9955,0.0,0.0,0.1446,0.0,0.0717,0.0
classification,qawiki,?A4=A1|3,grok-3-mini,0.0,0.0,0.0,0.0003,0.0215,0.0,0.2712,0.0,0.9907,1.0,0.0055,1.0,0.0,0.0,0.8296,0.0,0.8198,0.0028
classification,qawiki,?A4=A1|3,deepseek-chat,0.0,0.0492,0.0,0.1908,0.6418,0.0001,0.9836,0.0,1.0,0.9977,1.0,1.0,0.0,0.0005,0.9996,0.0033,0.9999,0.5
classification,qawiki,?A4=A1|3,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0122,0.0,0.0,1.0,0.0,0.0,0.0003,0.0,0.0001,0.0
classification,qawiki,?A4=A1|3,gpt-5-nano,0.999,1.0,0.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,qawiki,?A4=A1|3,deepseek-reasoner,0.0,0.9155,0.0,0.9907,0.9992,0.4321,1.0,0.0081,1.0,1.0,0.9999,1.0,0.0,1.0,1.0,0.6742,1.0,1.0
classification,qawiki,?A4=A1|3,gemini-2.5-pro,0.0,0.0,0.0,0.0002,0.0053,0.0,0.0963,0.0,0.9076,0.2483,0.0011,0.9999,0.0,0.0,1.0,0.0,0.5,0.0009
classification,qawiki,?A4=A1|3,gpt-5-mini,0.0,0.9002,0.0,0.9663,0.9982,0.3294,1.0,0.0057,1.0,1.0,0.9987,1.0,0.0,0.4402,1.0,1.0,1.0,0.9985
classification,qawiki,?A4=A1|3,o3,0.0,0.0,0.0,0.0,0.0058,0.0,0.0967,0.0,0.9635,0.2712,0.0002,1.0,0.0,0.0,0.6081,0.0,1.0,0.0
classification,qawiki,?A4=A1|3,gpt-5,0.0,0.0557,0.0,0.2354,0.6899,0.0001,0.9962,0.0,1.0,0.999,0.6224,1.0,0.0,0.0002,0.9996,0.004,1.0,1.0
classification,spinach,?A1=A2,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,spinach,?A1=A2,gpt-oss:20b,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9962,1.0,0.9999,1.0,0.9999,1.0
classification,spinach,?A1=A2,gpt-4.1-nano-2025-04-14,0.0,0.0,1.0,0.0001,0.0,0.0036,0.1938,0.7734,0.0461,0.0096,0.5,0.0592,0.0,0.0036,0.0,0.0096,0.0,0.073
classification,spinach,?A1=A2,mistral-small:24b,0.0,0.0,1.0,1.0,0.2557,0.9252,0.9997,1.0,0.9987,0.9827,1.0,0.997,0.0298,0.9252,0.1885,0.9693,0.2088,0.9988
classification,spinach,?A1=A2,llama3.1:70b,0.0,0.0001,1.0,0.838,1.0,0.9919,1.0,1.0,0.9999,0.9974,1.0,0.9998,0.1659,0.9808,0.5,0.9943,0.5,0.9998
classification,spinach,?A1=A2,gemini-2.0-flash,0.0,0.0,0.9993,0.1405,0.0214,1.0,0.9904,0.9999,0.9331,0.7483,0.9978,0.9157,0.0014,0.57,0.0298,0.7142,0.0147,0.939
classification,spinach,?A1=A2,gpt-4.1-mini-2025-04-14,0.0,0.0,0.927,0.0012,0.0,0.0318,1.0,0.9805,0.2905,0.0946,0.8867,0.3145,0.0,0.0539,0.0001,0.0835,0.0001,0.4018
classification,spinach,?A1=A2,gpt-4o,0.0,0.0,0.5,0.0,0.0,0.0007,0.0898,1.0,0.0193,0.0038,0.3633,0.0193,0.0,0.0013,0.0,0.0009,0.0,0.0327
classification,spinach,?A1=A2,gpt-4.1-2025-04-14,0.0,0.0,0.9888,0.0053,0.0005,0.1431,0.8666,0.9968,1.0,0.2403,0.9807,0.5927,0.0,0.1316,0.0009,0.2617,0.0003,0.6855
classification,spinach,?A1=A2,grok-3-mini,0.0,0.0,0.9978,0.0466,0.0081,0.4119,0.9608,0.9993,0.8811,1.0,0.9941,0.8949,0.0005,0.4194,0.0068,0.5722,0.002,0.9165
classification,spinach,?A1=A2,deepseek-chat,0.0,0.0,0.7461,0.0,0.0,0.0085,0.2744,0.8555,0.073,0.0207,1.0,0.1051,0.0,0.0038,0.0,0.0106,0.0,0.1509
classification,spinach,?A1=A2,gemini-2.5-flash,0.0,0.0,0.9824,0.0096,0.0007,0.1635,0.8338,0.9968,0.5927,0.2272,0.9616,1.0,0.0001,0.1431,0.0004,0.2617,0.0003,0.7095
classification,spinach,?A1=A2,gpt-5-nano,0.0,0.0082,1.0,0.9862,0.8942,0.9995,1.0,1.0,1.0,0.9999,1.0,1.0,1.0,0.9995,0.8642,0.9999,0.8688,1.0
classification,spinach,?A1=A2,deepseek-reasoner,0.0,0.0,0.9993,0.1405,0.0403,0.57,0.9784,0.9998,0.9423,0.7294,0.9993,0.9331,0.0014,1.0,0.0235,0.7142,0.0175,0.9682
classification,spinach,?A1=A2,gemini-2.5-pro,0.0,0.0003,1.0,0.8923,0.6321,0.9862,1.0,1.0,0.9997,0.9977,1.0,0.9999,0.2051,0.99,1.0,0.9953,0.5598,1.0
classification,spinach,?A1=A2,gpt-5-mini,0.0,0.0,0.9978,0.068,0.0144,0.4253,0.9682,0.9999,0.8569,0.5722,0.9979,0.8569,0.0005,0.4253,0.0119,1.0,0.0178,0.9165
classification,spinach,?A1=A2,o3,0.0,0.0002,1.0,0.8721,0.617,0.9947,1.0,1.0,0.9999,0.9995,1.0,0.9999,0.2005,0.9932,0.5598,0.992,1.0,1.0
classification,spinach,?A1=A2,gpt-5,0.0,0.0,0.9807,0.0047,0.0007,0.1239,0.7728,0.9941,0.5,0.1796,0.9408,0.5,0.0,0.0835,0.0001,0.1796,0.0002,1.0
classification,spinach,?A1=A3+A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,spinach,?A1=A3+A4,gpt-oss:20b,0.0,1.0,0.9867,0.7952,0.9957,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9976,1.0,1.0
classification,spinach,?A1=A3+A4,gpt-4.1-nano-2025-04-14,0.0,0.0237,1.0,0.1093,0.595,1.0,1.0,1.0,1.0,0.9983,1.0,1.0,0.9887,1.0,0.9995,0.792,0.9998,1.0
classification,spinach,?A1=A3+A4,mistral-small:24b,0.0,0.278,0.9272,1.0,0.9497,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9996,1.0,1.0,0.9878,1.0,1.0
classification,spinach,?A1=A3+A4,llama3.1:70b,0.0,0.0092,0.5,0.0799,1.0,1.0,1.0,1.0,1.0,0.9995,1.0,1.0,0.9836,1.0,0.9996,0.7617,0.9999,1.0
classification,spinach,?A1=A3+A4,gemini-2.0-flash,0.0,0.0,0.0,0.0,0.0,1.0,0.2148,0.9987,0.5,0.0106,0.3642,0.9449,0.0003,0.1958,0.0297,0.0,0.0595,0.5679
classification,spinach,?A1=A3+A4,gpt-4.1-mini-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.8659,1.0,1.0,0.8519,0.0676,0.7566,0.9931,0.0099,0.5612,0.1562,0.0001,0.2204,0.8998
classification,spinach,?A1=A3+A4,gpt-4o,0.0,0.0,0.0,0.0,0.0,0.0059,0.0003,1.0,0.0053,0.0,0.003,0.2617,0.0,0.0015,0.0,0.0,0.0001,0.0085
classification,spinach,?A1=A3+A4,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.6286,0.2434,0.9987,1.0,0.0045,0.4253,0.9739,0.0016,0.2664,0.0266,0.0,0.0401,0.6399
classification,spinach,?A1=A3+A4,grok-3-mini,0.0,0.0,0.0035,0.0,0.0013,0.9953,0.9638,1.0,0.9985,1.0,0.9943,1.0,0.2094,0.9765,0.7502,0.0192,0.8316,0.9993
classification,spinach,?A1=A3+A4,deepseek-chat,0.0,0.0,0.0,0.0,0.0,0.7566,0.3642,0.9992,0.7142,0.0144,1.0,0.9879,0.0043,0.3746,0.0631,0.0,0.1055,0.779
classification,spinach,?A1=A3+A4,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.1077,0.0168,0.8569,0.061,0.0,0.0307,1.0,0.0,0.0081,0.0001,0.0,0.0004,0.0843
classification,spinach,?A1=A3+A4,gpt-5-nano,0.0,0.0,0.0215,0.0008,0.03,0.9999,0.9955,1.0,0.9994,0.8597,0.9981,1.0,1.0,0.9961,0.9368,0.124,0.9593,0.9998
classification,spinach,?A1=A3+A4,deepseek-reasoner,0.0,0.0,0.0,0.0,0.0,0.8853,0.5612,0.9996,0.8256,0.0494,0.7388,0.9974,0.0088,1.0,0.1456,0.0001,0.2307,0.8785
classification,spinach,?A1=A3+A4,gemini-2.5-pro,0.0,0.0,0.0011,0.0,0.0009,0.9853,0.9033,1.0,0.9881,0.3679,0.967,1.0,0.1058,0.9129,1.0,0.0043,0.7142,0.9943
classification,spinach,?A1=A3+A4,gpt-5-mini,0.0,0.0048,0.2807,0.0222,0.3177,1.0,1.0,1.0,1.0,0.9897,1.0,1.0,0.9173,1.0,0.9981,1.0,0.9987,1.0
classification,spinach,?A1=A3+A4,o3,0.0,0.0,0.0004,0.0,0.0004,0.9675,0.86,1.0,0.9825,0.2612,0.9414,0.9999,0.0704,0.849,0.4253,0.003,1.0,0.9856
classification,spinach,?A1=A3+A4,gpt-5,0.0,0.0,0.0,0.0,0.0,0.5679,0.1808,0.9978,0.5,0.0023,0.3506,0.9622,0.0006,0.2025,0.0144,0.0,0.0326,1.0
classification,spinach,?A1>A3,llama3.1:8b,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0625,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,spinach,?A1>A3,gpt-oss:20b,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0625,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,spinach,?A1>A3,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,0.0015,1.0,0.0503,1.0,0.0023,1.0,1.0,0.0002,1.0,1.0,0.0003,1.0
classification,spinach,?A1>A3,mistral-small:24b,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0625,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,spinach,?A1>A3,llama3.1:70b,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0625,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,spinach,?A1>A3,gemini-2.0-flash,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0625,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,spinach,?A1>A3,gpt-4.1-mini-2025-04-14,1.0,1.0,0.9994,1.0,1.0,1.0,1.0,1.0,0.9476,1.0,0.6123,1.0,1.0,0.2175,1.0,1.0,0.2257,1.0
classification,spinach,?A1>A3,gpt-4o,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,spinach,?A1>A3,gpt-4.1-2025-04-14,1.0,1.0,0.9698,1.0,1.0,1.0,0.0885,1.0,1.0,1.0,0.1144,1.0,1.0,0.0099,1.0,1.0,0.012,1.0
classification,spinach,?A1>A3,grok-3-mini,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0625,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,spinach,?A1>A3,deepseek-chat,1.0,1.0,0.9989,1.0,1.0,1.0,0.5,1.0,0.9296,1.0,1.0,1.0,1.0,0.1341,1.0,1.0,0.2213,1.0
classification,spinach,?A1>A3,gemini-2.5-flash,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0625,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,spinach,?A1>A3,gpt-5-nano,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0625,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,spinach,?A1>A3,deepseek-reasoner,1.0,1.0,0.9999,1.0,1.0,1.0,0.8512,1.0,0.9955,1.0,0.9231,1.0,1.0,1.0,1.0,1.0,0.6026,1.0
classification,spinach,?A1>A3,gemini-2.5-pro,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0625,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,spinach,?A1>A3,gpt-5-mini,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0625,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,spinach,?A1>A3,o3,1.0,1.0,0.9999,1.0,1.0,1.0,0.8544,1.0,0.9945,1.0,0.8472,1.0,1.0,0.5,1.0,1.0,1.0,1.0
classification,spinach,?A1>A3,gpt-5,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0625,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
classification,spinach,?A1>A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,spinach,?A1>A4,gpt-oss:20b,0.0,1.0,0.0382,0.8252,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9703,0.9995,1.0,0.9,1.0,1.0
classification,spinach,?A1>A4,gpt-4.1-nano-2025-04-14,0.0,0.9778,1.0,0.9979,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9999,1.0,1.0,0.9992,1.0,1.0
classification,spinach,?A1>A4,mistral-small:24b,0.0,0.2522,0.0046,1.0,0.9977,1.0,1.0,1.0,1.0,0.9998,0.9999,1.0,0.8209,0.996,0.9997,0.6927,0.9998,1.0
classification,spinach,?A1>A4,llama3.1:70b,0.0,0.0004,0.0,0.0057,1.0,0.9999,0.8998,1.0,0.9995,0.7214,0.8192,0.9999,0.0365,0.5,0.7502,0.0267,0.7709,1.0
classification,spinach,?A1>A4,gemini-2.0-flash,0.0,0.0,0.0,0.0,0.0008,1.0,0.0154,0.9688,0.5,0.0022,0.0038,0.623,0.0,0.0005,0.0022,0.0,0.0036,0.8125
classification,spinach,?A1>A4,gpt-4.1-mini-2025-04-14,0.0,0.0,0.0,0.0001,0.1808,0.9962,1.0,1.0,0.9904,0.2786,0.4194,0.9941,0.0025,0.1405,0.345,0.001,0.3388,0.9999
classification,spinach,?A1>A4,gpt-4o,0.0,0.0,0.0,0.0,0.0,0.1875,0.0001,1.0,0.1094,0.0001,0.0004,0.2266,0.0,0.0,0.0001,0.0,0.0001,0.3125
classification,spinach,?A1>A4,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.0,0.002,0.7461,0.0318,0.9844,1.0,0.0036,0.0064,0.7461,0.0,0.0012,0.0059,0.0,0.0059,0.8555
classification,spinach,?A1>A4,grok-3-mini,0.0,0.0,0.0,0.0008,0.4225,0.9996,0.8365,1.0,0.9993,1.0,0.7142,0.9992,0.0168,0.3388,0.6682,0.012,0.6445,1.0
classification,spinach,?A1>A4,deepseek-chat,0.0,0.0001,0.0,0.0005,0.2923,0.9993,0.7294,1.0,0.9988,0.4253,1.0,0.9987,0.0147,0.2434,0.5,0.0033,0.5,0.9999
classification,spinach,?A1>A4,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0008,0.623,0.0207,0.9375,0.5,0.0033,0.0059,1.0,0.0,0.0008,0.0036,0.0,0.0022,0.7461
classification,spinach,?A1>A4,gpt-5-nano,0.0,0.0557,0.0002,0.2559,0.9832,1.0,0.9992,1.0,1.0,0.9931,0.9934,1.0,1.0,0.967,0.9942,0.3939,0.9931,1.0
classification,spinach,?A1>A4,deepseek-reasoner,0.0,0.0013,0.0,0.0093,0.6321,0.9999,0.9252,1.0,0.9997,0.7976,0.8519,0.9998,0.0631,1.0,0.8192,0.0297,0.8042,1.0
classification,spinach,?A1>A4,gemini-2.5-pro,0.0,0.0,0.0,0.001,0.3679,0.9996,0.7878,1.0,0.9987,0.5,0.6445,0.9993,0.0138,0.2923,1.0,0.0057,0.5806,1.0
classification,spinach,?A1>A4,gpt-5-mini,0.0,0.1528,0.0018,0.4007,0.9865,1.0,0.9997,1.0,1.0,0.9945,0.9988,1.0,0.705,0.9853,0.9977,1.0,0.9972,1.0
classification,spinach,?A1>A4,o3,0.0,0.0001,0.0,0.0007,0.3555,0.9993,0.7976,1.0,0.9987,0.5,0.655,0.9996,0.0158,0.3038,0.5806,0.0066,1.0,1.0
classification,spinach,?A1>A4,gpt-5,0.0,0.0,0.0,0.0,0.0002,0.5,0.0017,0.9375,0.3633,0.0001,0.0012,0.5,0.0,0.0,0.0001,0.0,0.0003,1.0
classification,spinach,?A3∅A4,llama3.1:8b,1.0,0.9993,0.0,0.962,1.0,0.1909,0.8554,0.0,0.9989,1.0,0.0668,1.0,0.0,0.3957,1.0,0.2285,1.0,0.5497
classification,spinach,?A3∅A4,gpt-oss:20b,0.0019,1.0,0.0,0.0769,0.9988,0.0,0.0083,0.0,0.5,0.8675,0.0,0.968,0.0,0.0002,0.9904,0.0,0.9996,0.0005
classification,spinach,?A3∅A4,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.6756,1.0,1.0,1.0,1.0,0.0011,1.0,1.0,1.0,1.0,1.0
classification,spinach,?A3∅A4,mistral-small:24b,0.0668,0.9597,0.0,1.0,1.0,0.0028,0.2434,0.0,0.9693,0.9992,0.0001,0.9997,0.0,0.0138,1.0,0.0027,1.0,0.0365
classification,spinach,?A3∅A4,llama3.1:70b,0.0,0.0047,0.0,0.0,1.0,0.0,0.0,0.0,0.0041,0.0466,0.0,0.1431,0.0,0.0,0.3238,0.0,0.5982,0.0
classification,spinach,?A3∅A4,gemini-2.0-flash,0.8698,1.0,0.0,0.999,1.0,1.0,0.9953,0.0,1.0,1.0,0.3179,1.0,0.0,0.8447,1.0,0.617,1.0,0.9129
classification,spinach,?A3∅A4,gpt-4.1-mini-2025-04-14,0.2135,0.997,0.0,0.8519,1.0,0.0119,1.0,0.0,0.9992,1.0,0.0001,1.0,0.0,0.0494,1.0,0.0168,1.0,0.1405
classification,spinach,?A3∅A4,gpt-4o,1.0,1.0,0.4099,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0004,1.0,1.0,1.0,1.0,1.0
classification,spinach,?A3∅A4,gpt-4.1-2025-04-14,0.0027,0.6494,0.0,0.068,0.9988,0.0,0.0033,0.0,1.0,0.9423,0.0,0.9963,0.0,0.0001,0.9987,0.0,0.9998,0.0001
classification,spinach,?A3∅A4,grok-3-mini,0.0001,0.2291,0.0,0.003,0.9827,0.0,0.0001,0.0,0.1316,1.0,0.0,0.8666,0.0,0.0,0.9807,0.0,0.9904,0.0
classification,spinach,?A3∅A4,deepseek-chat,0.962,1.0,0.0,1.0,1.0,0.7852,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.9599,1.0,0.8595,1.0,0.9919
classification,spinach,?A3∅A4,gemini-2.5-flash,0.0,0.0758,0.0,0.0011,0.9331,0.0,0.0,0.0,0.0176,0.2905,0.0,1.0,0.0,0.0,0.8867,0.0,0.9616,0.0
classification,spinach,?A3∅A4,gpt-5-nano,1.0,1.0,0.9996,1.0,1.0,1.0,1.0,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,spinach,?A3∅A4,deepseek-reasoner,0.7017,1.0,0.0,0.9942,1.0,0.2498,0.9786,0.0,1.0,1.0,0.0814,1.0,0.0,1.0,1.0,0.3136,1.0,0.7634
classification,spinach,?A3∅A4,gemini-2.5-pro,0.0,0.0261,0.0,0.0001,0.8204,0.0,0.0,0.0,0.0059,0.073,0.0,0.2744,0.0,0.0,1.0,0.0,0.8867,0.0
classification,spinach,?A3∅A4,gpt-5-mini,0.8395,1.0,0.0,0.999,1.0,0.5,0.9931,0.0,1.0,1.0,0.2366,1.0,0.0,0.7912,1.0,1.0,1.0,0.9461
classification,spinach,?A3∅A4,o3,0.0,0.0022,0.0,0.0001,0.5982,0.0,0.0,0.0,0.0013,0.0318,0.0,0.1051,0.0,0.0,0.2744,0.0,1.0,0.0
classification,spinach,?A3∅A4,gpt-5,0.5497,0.9999,0.0,0.9832,1.0,0.1456,0.9252,0.0,1.0,1.0,0.0214,1.0,0.0,0.3601,1.0,0.1148,1.0,1.0
classification,spinach,?A4=A1|3,llama3.1:8b,1.0,1.0,0.0312,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.1094,1.0,1.0,1.0,1.0,1.0
classification,spinach,?A4=A1|3,gpt-oss:20b,0.0,1.0,0.0,0.5505,0.9999,0.9998,1.0,0.2354,1.0,1.0,0.9977,1.0,0.0,0.9977,1.0,0.0885,1.0,1.0
classification,spinach,?A4=A1|3,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,spinach,?A4=A1|3,mistral-small:24b,0.0,0.5505,0.0,1.0,0.9999,0.9997,1.0,0.2399,1.0,1.0,0.9968,1.0,0.0,0.9973,1.0,0.1037,1.0,1.0
classification,spinach,?A4=A1|3,llama3.1:70b,0.0,0.0002,0.0,0.0003,1.0,0.4487,0.6026,0.0,0.9984,0.9225,0.0967,1.0,0.0,0.1264,0.9966,0.0,0.9992,0.5489
classification,spinach,?A4=A1|3,gemini-2.0-flash,0.0,0.0005,0.0,0.0007,0.6506,1.0,0.7084,0.0,0.9997,0.9565,0.1562,1.0,0.0,0.1562,0.9997,0.0,1.0,0.6611
classification,spinach,?A4=A1|3,gpt-4.1-mini-2025-04-14,0.0,0.0001,0.0,0.0001,0.5,0.3919,1.0,0.0,0.9998,0.9281,0.0541,1.0,0.0,0.0631,0.9987,0.0,0.9999,0.5
classification,spinach,?A4=A1|3,gpt-4o,0.0,0.8438,0.0,0.8389,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.2366,1.0,1.0
classification,spinach,?A4=A1|3,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.0,0.0038,0.0009,0.0006,0.0,1.0,0.0586,0.0,0.9991,0.0,0.0,0.566,0.0,0.8192,0.0007
classification,spinach,?A4=A1|3,grok-3-mini,0.0,0.0,0.0,0.0,0.1225,0.074,0.1215,0.0,0.9702,1.0,0.0018,1.0,0.0,0.0014,0.9765,0.0,0.9942,0.0769
classification,spinach,?A4=A1|3,deepseek-chat,0.0,0.0055,0.0,0.0072,0.9443,0.9033,0.9734,0.0,1.0,0.9994,1.0,1.0,0.0,0.5679,1.0,0.0,1.0,0.9674
classification,spinach,?A4=A1|3,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.003,0.0,0.0,1.0,0.0,0.0,0.003,0.0,0.0205,0.0
classification,spinach,?A4=A1|3,gpt-5-nano,0.9844,1.0,0.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,spinach,?A4=A1|3,deepseek-reasoner,0.0,0.0055,0.0,0.0063,0.9191,0.9033,0.967,0.0,1.0,0.9995,0.5679,1.0,0.0,1.0,1.0,0.0,1.0,0.9786
classification,spinach,?A4=A1|3,gemini-2.5-pro,0.0,0.0,0.0,0.0,0.007,0.0009,0.0033,0.0,0.566,0.0494,0.0,0.9991,0.0,0.0,1.0,0.0,0.8192,0.0005
classification,spinach,?A4=A1|3,gpt-5-mini,0.0,0.9476,0.0,0.935,1.0,1.0,1.0,0.8595,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
classification,spinach,?A4=A1|3,o3,0.0,0.0,0.0,0.0,0.0018,0.0001,0.0003,0.0,0.2923,0.0138,0.0,0.9917,0.0,0.0,0.2923,0.0,1.0,0.0
classification,spinach,?A4=A1|3,gpt-5,0.0,0.0001,0.0,0.0001,0.5489,0.4449,0.6224,0.0,0.9998,0.9597,0.0662,1.0,0.0,0.0494,0.9999,0.0,1.0,1.0
classification,synthetic,?A1=A2,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,synthetic,?A1=A2,gpt-oss:20b,0.0,1.0,1.0,1.0,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9937,0.932,0.9933,1.0,1.0,1.0
classification,synthetic,?A1=A2,gpt-4.1-nano-2025-04-14,0.0,0.0,1.0,0.0008,0.0,0.0005,0.0012,0.2905,0.0207,0.0207,0.1509,0.002,0.0,0.0,0.0,0.0033,0.0481,0.0133
classification,synthetic,?A1=A2,mistral-small:24b,0.0,0.0,0.9998,1.0,0.2434,0.5,0.6445,0.9996,0.9461,0.9461,0.9967,0.7214,0.0676,0.0022,0.0403,0.7709,0.9784,0.9075
classification,synthetic,?A1=A2,llama3.1:70b,0.0,0.0002,1.0,0.8519,1.0,0.7912,0.8853,1.0,0.9919,0.9937,1.0,0.9061,0.2148,0.027,0.1802,0.9564,0.9981,0.9904
classification,synthetic,?A1=A2,gemini-2.0-flash,0.0,0.0,0.9999,0.6445,0.3136,1.0,0.7077,0.9995,0.9622,0.9506,0.998,0.7709,0.092,0.0032,0.0541,0.8275,0.9749,0.9186
classification,synthetic,?A1=A2,gpt-4.1-mini-2025-04-14,0.0,0.0,0.9997,0.5,0.1958,0.4278,1.0,0.9993,0.9157,0.9242,0.9947,0.655,0.0442,0.0014,0.033,0.7214,0.9506,0.8675
classification,synthetic,?A1=A2,gpt-4o,0.0,0.0,0.8666,0.0022,0.0,0.002,0.0036,1.0,0.0592,0.0327,0.3633,0.0059,0.0,0.0,0.0,0.0096,0.1509,0.0384
classification,synthetic,?A1=A2,gpt-4.1-2025-04-14,0.0,0.0,0.9941,0.1148,0.0214,0.0843,0.1635,0.9824,1.0,0.5982,0.9283,0.1917,0.004,0.0,0.0007,0.2706,0.7483,0.5
classification,synthetic,?A1=A2,grok-3-mini,0.0,0.0,0.9941,0.1148,0.0178,0.1002,0.1537,0.9941,0.5982,1.0,0.9283,0.1796,0.002,0.0,0.0004,0.2786,0.7483,0.5
classification,synthetic,?A1=A2,deepseek-chat,0.0,0.0,0.9408,0.0113,0.0001,0.0073,0.0173,0.8555,0.1662,0.1662,1.0,0.032,0.0,0.0,0.0,0.0466,0.3238,0.1316
classification,synthetic,?A1=A2,gemini-2.5-flash,0.0,0.0,0.9995,0.4225,0.162,0.3555,0.5,0.9987,0.9054,0.9165,0.9887,1.0,0.0395,0.001,0.0081,0.6358,0.9682,0.8463
classification,synthetic,?A1=A2,gpt-5-nano,0.0,0.0124,1.0,0.9638,0.8659,0.9481,0.9782,1.0,0.9986,0.9994,1.0,0.98,1.0,0.1748,0.5,0.9917,0.9995,0.9987
classification,synthetic,?A1=A2,deepseek-reasoner,0.0,0.1073,1.0,0.9993,0.9871,0.9989,0.9995,1.0,1.0,1.0,1.0,0.9997,0.8856,1.0,0.8837,0.9998,1.0,1.0
classification,synthetic,?A1=A2,gemini-2.5-pro,0.0,0.0137,1.0,0.9808,0.889,0.9734,0.9842,1.0,0.9998,0.9999,1.0,0.9974,0.6081,0.1856,1.0,0.9953,1.0,0.9996
classification,synthetic,?A1=A2,gpt-5-mini,0.0,0.0,0.9992,0.3555,0.0925,0.2858,0.4225,0.9978,0.8463,0.8365,0.9827,0.5,0.0192,0.0006,0.0119,1.0,0.9242,0.7976
classification,synthetic,?A1=A2,o3,0.0,0.0,0.9846,0.0539,0.0063,0.0551,0.1002,0.9408,0.4119,0.4119,0.8204,0.0835,0.0014,0.0,0.0002,0.1537,1.0,0.3036
classification,synthetic,?A1=A2,gpt-5,0.0,0.0,0.9964,0.1725,0.0261,0.1481,0.2291,0.9894,0.6682,0.6682,0.9423,0.2706,0.0038,0.0001,0.0015,0.3388,0.8491,1.0
classification,synthetic,?A1=A3+A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,synthetic,?A1=A3+A4,gpt-oss:20b,0.0,1.0,0.9976,0.9876,0.9981,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9997,1.0,1.0,0.9952,1.0,1.0
classification,synthetic,?A1=A3+A4,gpt-4.1-nano-2025-04-14,0.0,0.0048,1.0,0.2199,0.4076,1.0,1.0,1.0,1.0,0.9998,0.9989,1.0,0.7277,1.0,0.8459,0.4076,1.0,1.0
classification,synthetic,?A1=A3+A4,mistral-small:24b,0.0,0.024,0.8399,1.0,0.7865,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9375,1.0,0.9876,0.7715,1.0,1.0
classification,synthetic,?A1=A3+A4,llama3.1:70b,0.0,0.0043,0.68,0.2983,1.0,1.0,1.0,1.0,1.0,1.0,0.9999,1.0,0.8358,1.0,0.9508,0.5489,1.0,1.0
classification,synthetic,?A1=A3+A4,gemini-2.0-flash,0.0,0.0,0.0,0.0,0.0,1.0,0.8115,0.9564,0.7142,0.3089,0.0541,0.9927,0.0,0.8115,0.0001,0.0,0.9564,0.9157
classification,synthetic,?A1=A3+A4,gpt-4.1-mini-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.2983,1.0,0.8275,0.4159,0.0925,0.0068,0.9682,0.0,0.5722,0.0,0.0,0.8569,0.7383
classification,synthetic,?A1=A3+A4,gpt-4o,0.0,0.0,0.0,0.0,0.0,0.0925,0.2858,1.0,0.1725,0.0251,0.0009,0.8083,0.0,0.2786,0.0,0.0,0.5841,0.4253
classification,synthetic,?A1=A3+A4,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.4253,0.7383,0.9075,1.0,0.1635,0.0147,0.9904,0.0,0.7142,0.0,0.0,0.9519,0.8684
classification,synthetic,?A1=A3+A4,grok-3-mini,0.0,0.0,0.0004,0.0,0.0,0.7975,0.9564,0.99,0.9157,1.0,0.1744,0.9998,0.0001,0.9506,0.001,0.0,0.9953,0.9915
classification,synthetic,?A1=A3+A4,deepseek-chat,0.0,0.0,0.0026,0.0,0.0003,0.9734,0.9977,0.9997,0.9947,0.8945,1.0,1.0,0.0102,0.9962,0.0267,0.0006,0.9999,0.9987
classification,synthetic,?A1=A3+A4,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.0216,0.0835,0.3318,0.0318,0.0013,0.0001,1.0,0.0,0.0946,0.0,0.0,0.3238,0.1662
classification,synthetic,?A1=A3+A4,gpt-5-nano,0.0,0.0006,0.3582,0.0973,0.2319,1.0,1.0,1.0,1.0,1.0,0.9951,1.0,1.0,1.0,0.7338,0.2319,1.0,1.0
classification,synthetic,?A1=A3+A4,deepseek-reasoner,0.0,0.0,0.0,0.0,0.0,0.2983,0.5722,0.8365,0.4253,0.1002,0.01,0.9608,0.0,1.0,0.0,0.0,0.8463,0.7383
classification,synthetic,?A1=A3+A4,gemini-2.5-pro,0.0,0.0,0.2141,0.024,0.0845,1.0,1.0,1.0,1.0,0.9996,0.9865,1.0,0.354,1.0,1.0,0.1073,1.0,1.0
classification,synthetic,?A1=A3+A4,gpt-5-mini,0.0,0.0093,0.68,0.3101,0.5489,1.0,1.0,1.0,1.0,1.0,0.9998,1.0,0.8358,1.0,0.932,1.0,1.0,1.0
classification,synthetic,?A1=A3+A4,o3,0.0,0.0,0.0,0.0,0.0,0.0925,0.2617,0.5841,0.1189,0.0145,0.0004,0.8204,0.0,0.2706,0.0,0.0,1.0,0.4073
classification,synthetic,?A1=A3+A4,gpt-5,0.0,0.0,0.0,0.0,0.0,0.1635,0.4159,0.7142,0.2517,0.0262,0.0038,0.9283,0.0,0.4159,0.0,0.0,0.7597,1.0
classification,synthetic,?A1>A3,llama3.1:8b,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0001,1.0,1.0,0.0005,1.0,1.0,1.0,1.0
classification,synthetic,?A1>A3,gpt-oss:20b,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0001,1.0,1.0,0.0005,1.0,1.0,1.0,1.0
classification,synthetic,?A1>A3,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,0.9777,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,synthetic,?A1>A3,mistral-small:24b,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0001,1.0,1.0,0.0005,1.0,1.0,1.0,1.0
classification,synthetic,?A1>A3,llama3.1:70b,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0001,1.0,1.0,0.0005,1.0,1.0,1.0,1.0
classification,synthetic,?A1>A3,gemini-2.0-flash,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0001,1.0,1.0,0.0005,1.0,1.0,1.0,1.0
classification,synthetic,?A1>A3,gpt-4.1-mini-2025-04-14,1.0,1.0,0.0363,1.0,1.0,1.0,1.0,1.0,0.9993,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,synthetic,?A1>A3,gpt-4o,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0001,1.0,1.0,0.0005,1.0,1.0,1.0,1.0
classification,synthetic,?A1>A3,gpt-4.1-2025-04-14,1.0,1.0,0.0,1.0,1.0,1.0,0.0019,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,synthetic,?A1>A3,grok-3-mini,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0001,1.0,1.0,0.0005,1.0,1.0,1.0,1.0
classification,synthetic,?A1>A3,deepseek-chat,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.8338,1.0,1.0,1.0,1.0
classification,synthetic,?A1>A3,gemini-2.5-flash,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0001,1.0,1.0,0.0005,1.0,1.0,1.0,1.0
classification,synthetic,?A1>A3,gpt-5-nano,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0001,1.0,1.0,0.0005,1.0,1.0,1.0,1.0
classification,synthetic,?A1>A3,deepseek-reasoner,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.3145,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,synthetic,?A1>A3,gemini-2.5-pro,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0001,1.0,1.0,0.0005,1.0,1.0,1.0,1.0
classification,synthetic,?A1>A3,gpt-5-mini,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0001,1.0,1.0,0.0005,1.0,1.0,1.0,1.0
classification,synthetic,?A1>A3,o3,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0001,1.0,1.0,0.0005,1.0,1.0,1.0,1.0
classification,synthetic,?A1>A3,gpt-5,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0001,1.0,1.0,0.0005,1.0,1.0,1.0,1.0
classification,synthetic,?A1>A4,llama3.1:8b,1.0,1.0,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,synthetic,?A1>A4,gpt-oss:20b,0.0,1.0,0.0067,0.9675,0.9988,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.446,1.0,0.9734,0.1037,1.0,1.0
classification,synthetic,?A1>A4,gpt-4.1-nano-2025-04-14,0.0003,0.9969,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9888,1.0,1.0,0.8927,1.0,1.0
classification,synthetic,?A1>A4,mistral-small:24b,0.0,0.0595,0.0,1.0,0.9186,0.9978,1.0,1.0,1.0,0.9995,0.9953,1.0,0.0244,1.0,0.5,0.0013,1.0,1.0
classification,synthetic,?A1>A4,llama3.1:70b,0.0,0.0033,0.0,0.1481,1.0,0.9165,1.0,0.9993,1.0,0.9793,0.9054,0.9999,0.0012,0.9964,0.1002,0.0,1.0,1.0
classification,synthetic,?A1>A4,gemini-2.0-flash,0.0,0.0001,0.0,0.0085,0.1796,1.0,0.9991,0.9894,0.9991,0.8204,0.5982,0.9983,0.0,0.9713,0.0096,0.0,0.9991,0.9983
classification,synthetic,?A1>A4,gpt-4.1-mini-2025-04-14,0.0,0.0,0.0,0.0,0.0004,0.0065,1.0,0.3438,0.6875,0.0327,0.0065,0.5,0.0,0.1445,0.0,0.0,0.6875,0.5
classification,synthetic,?A1>A4,gpt-4o,0.0,0.0,0.0,0.0001,0.0036,0.0384,0.8906,1.0,0.8906,0.1334,0.0287,0.7734,0.0,0.377,0.0,0.0,0.8906,0.7734
classification,synthetic,?A1>A4,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.0,0.0001,0.0065,0.6875,0.3438,1.0,0.0195,0.0065,0.5,0.0,0.1445,0.0,0.0,0.75,0.5
classification,synthetic,?A1>A4,grok-3-mini,0.0,0.0,0.0,0.002,0.0577,0.3238,0.9941,0.9539,0.998,1.0,0.3145,0.9807,0.0,0.8666,0.0026,0.0,0.998,0.9961
classification,synthetic,?A1>A4,deepseek-chat,0.0,0.0001,0.0,0.0145,0.1917,0.5982,0.9991,0.9935,0.9991,0.8338,1.0,0.9983,0.0001,0.9713,0.0121,0.0,0.9991,0.9983
classification,synthetic,?A1>A4,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0007,0.0112,0.8125,0.5,0.8125,0.073,0.0112,1.0,0.0,0.2539,0.0,0.0,0.8125,0.6875
classification,synthetic,?A1>A4,gpt-5-nano,0.0,0.6583,0.0207,0.9887,0.9996,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9756,0.1528,1.0,1.0
classification,synthetic,?A1>A4,deepseek-reasoner,0.0,0.0,0.0,0.0001,0.0133,0.0898,0.9648,0.8281,0.9648,0.2905,0.0898,0.9102,0.0,1.0,0.0002,0.0,0.9648,0.9102
classification,synthetic,?A1>A4,gemini-2.5-pro,0.0,0.0541,0.0001,0.6286,0.9506,0.997,1.0,1.0,1.0,0.9993,0.9959,1.0,0.046,1.0,1.0,0.0023,1.0,1.0
classification,synthetic,?A1>A4,gpt-5-mini,0.0,0.935,0.1605,0.9995,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9,1.0,0.9991,1.0,1.0,1.0
classification,synthetic,?A1>A4,o3,0.0,0.0,0.0,0.0,0.0001,0.0065,0.6875,0.3438,0.75,0.0195,0.0065,0.5,0.0,0.1445,0.0,0.0,1.0,0.5
classification,synthetic,?A1>A4,gpt-5,0.0,0.0,0.0,0.0,0.0001,0.0112,0.8125,0.5,0.875,0.0352,0.0112,0.6875,0.0,0.2539,0.0,0.0,0.875,1.0
classification,synthetic,?A3∅A4,llama3.1:8b,1.0,1.0,0.0,1.0,1.0,0.9296,1.0,0.071,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.9995,1.0,1.0
classification,synthetic,?A3∅A4,gpt-oss:20b,0.0,1.0,0.0,0.7878,0.8569,0.0005,0.7976,0.0,0.9793,0.9936,0.9331,0.9997,0.0,0.4225,0.9988,0.1077,1.0,0.5
classification,synthetic,?A3∅A4,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0279,1.0,1.0,1.0,1.0,1.0
classification,synthetic,?A3∅A4,mistral-small:24b,0.0,0.345,0.0,1.0,0.6682,0.0,0.6047,0.0,0.9408,0.9713,0.8867,0.9995,0.0,0.2024,0.9894,0.0261,0.9998,0.2706
classification,synthetic,?A3∅A4,llama3.1:70b,0.0,0.2617,0.0,0.5,1.0,0.0,0.5,0.0,0.8949,0.9408,0.7728,0.9968,0.0,0.1431,0.9941,0.0113,0.9995,0.1917
classification,synthetic,?A3∅A4,gemini-2.0-flash,0.1144,0.9999,0.0,1.0,1.0,1.0,1.0,0.0004,1.0,1.0,1.0,1.0,0.0,0.9999,1.0,0.9943,1.0,1.0
classification,synthetic,?A3∅A4,gpt-4.1-mini-2025-04-14,0.0,0.3388,0.0,0.6047,0.6762,0.0,1.0,0.0,0.9673,0.9893,0.9102,0.9995,0.0,0.1796,0.9935,0.0216,0.9998,0.2403
classification,synthetic,?A3∅A4,gpt-4o,0.9568,1.0,0.0,1.0,1.0,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
classification,synthetic,?A3∅A4,gpt-4.1-2025-04-14,0.0,0.0577,0.0,0.1509,0.2272,0.0,0.1133,0.0,1.0,0.8125,0.377,0.9844,0.0,0.0154,0.8867,0.0012,0.9922,0.0245
classification,synthetic,?A3∅A4,grok-3-mini,0.0,0.0245,0.0,0.0898,0.1509,0.0,0.0547,0.0,0.5,1.0,0.2539,0.9688,0.0,0.0037,0.8281,0.0002,0.9844,0.0106
classification,synthetic,?A3∅A4,deepseek-chat,0.0,0.1431,0.0,0.2744,0.4018,0.0,0.2539,0.0,0.8281,0.9102,1.0,0.9961,0.0,0.0384,0.9539,0.0033,0.998,0.0717
classification,synthetic,?A3∅A4,gemini-2.5-flash,0.0,0.0021,0.0,0.0059,0.0193,0.0,0.0059,0.0,0.1094,0.1875,0.0352,1.0,0.0,0.0001,0.5,0.0,0.875,0.0005
classification,synthetic,?A3∅A4,gpt-5-nano,1.0,1.0,0.985,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,synthetic,?A3∅A4,deepseek-reasoner,0.0,0.7214,0.0,0.895,0.9331,0.0003,0.9165,0.0,0.9962,0.9995,0.9894,1.0,0.0,1.0,0.9999,0.1537,1.0,0.7095
classification,synthetic,?A3∅A4,gemini-2.5-pro,0.0,0.0064,0.0,0.0384,0.0327,0.0,0.0287,0.0,0.2744,0.377,0.1334,0.7734,0.0,0.0012,1.0,0.0,0.9375,0.0021
classification,synthetic,?A3∅A4,gpt-5-mini,0.0013,0.9449,0.0,0.9904,0.9967,0.0144,0.9927,0.0,0.9997,1.0,0.9992,1.0,0.0,0.9242,1.0,1.0,1.0,0.9608
classification,synthetic,?A3∅A4,o3,0.0,0.0005,0.0,0.0032,0.0059,0.0,0.0032,0.0,0.0625,0.1094,0.0195,0.5,0.0,0.0,0.3125,0.0,1.0,0.0001
classification,synthetic,?A3∅A4,gpt-5,0.0,0.655,0.0,0.8463,0.9054,0.0002,0.8811,0.0,0.9936,0.9979,0.9755,1.0,0.0,0.5,0.9997,0.0946,1.0,1.0
classification,synthetic,?A4=A1|3,llama3.1:8b,1.0,1.0,0.0001,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0001,1.0,1.0,1.0,1.0,1.0
classification,synthetic,?A4=A1|3,gpt-oss:20b,0.0,1.0,0.0,0.985,0.9996,1.0,1.0,0.9832,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.8091,1.0,1.0
classification,synthetic,?A4=A1|3,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,synthetic,?A4=A1|3,mistral-small:24b,0.0,0.029,0.0,1.0,0.9296,0.9923,1.0,0.3877,1.0,1.0,1.0,1.0,0.0,1.0,0.9962,0.1528,1.0,1.0
classification,synthetic,?A4=A1|3,llama3.1:70b,0.0,0.0009,0.0,0.1144,1.0,0.8296,1.0,0.074,1.0,0.9999,0.9991,1.0,0.0,0.9999,0.9155,0.0124,1.0,1.0
classification,synthetic,?A4=A1|3,gemini-2.0-flash,0.0,0.0,0.0,0.0164,0.2483,1.0,0.9998,0.0072,1.0,0.9995,0.992,1.0,0.0,0.9995,0.7084,0.002,1.0,1.0
classification,synthetic,?A4=A1|3,gpt-4.1-mini-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.0005,1.0,0.0,0.8569,0.4321,0.1002,0.9999,0.0,0.3038,0.0014,0.0,0.9974,0.5
classification,synthetic,?A4=A1|3,gpt-4o,0.0,0.0365,0.0,0.7159,0.9544,0.9968,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.9987,0.2629,1.0,1.0
classification,synthetic,?A4=A1|3,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.0,0.2617,0.0,1.0,0.1808,0.0251,0.9996,0.0,0.1147,0.0001,0.0,0.9941,0.221
classification,synthetic,?A4=A1|3,grok-3-mini,0.0,0.0,0.0,0.0,0.0002,0.0015,0.6962,0.0,0.8998,1.0,0.2148,1.0,0.0,0.4321,0.0066,0.0,0.9995,0.6399
classification,synthetic,?A4=A1|3,deepseek-chat,0.0,0.0,0.0,0.0,0.0023,0.0178,0.9506,0.0,0.99,0.8659,1.0,1.0,0.0,0.8115,0.0595,0.0,1.0,0.9186
classification,synthetic,?A4=A1|3,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.0,0.0005,0.0,0.0022,0.0001,0.0,1.0,0.0,0.0,0.0,0.0,0.3238,0.0001
classification,synthetic,?A4=A1|3,gpt-5-nano,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
classification,synthetic,?A4=A1|3,deepseek-reasoner,0.0,0.0,0.0,0.0,0.0004,0.0017,0.8042,0.0,0.9393,0.6962,0.2983,1.0,0.0,1.0,0.0147,0.0,0.9998,0.7878
classification,synthetic,?A4=A1|3,gemini-2.5-pro,0.0,0.0,0.0,0.0082,0.1358,0.3919,0.9995,0.0031,1.0,0.9972,0.9675,1.0,0.0,0.9934,1.0,0.0005,1.0,0.9977
classification,synthetic,?A4=A1|3,gpt-5-mini,0.0,0.2662,0.0,0.9,0.9937,0.9991,1.0,0.8129,1.0,1.0,1.0,1.0,0.0,1.0,0.9998,1.0,1.0,1.0
classification,synthetic,?A4=A1|3,o3,0.0,0.0,0.0,0.0,0.0,0.0,0.0081,0.0,0.0207,0.0019,0.0,0.8204,0.0,0.0007,0.0,0.0,1.0,0.002
classification,synthetic,?A4=A1|3,gpt-5,0.0,0.0,0.0,0.0,0.0001,0.0002,0.6399,0.0,0.8761,0.5,0.1481,1.0,0.0,0.345,0.0055,0.0,0.9995,1.0
fixing,LC-QuAD,?A1=A2,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,LC-QuAD,?A1=A2,gpt-oss:20b,0.0,1.0,0.0361,0.9997,0.97,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9998,1.0
fixing,LC-QuAD,?A1=A2,gpt-4.1-nano-2025-04-14,0.0,0.9802,1.0,1.0,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,LC-QuAD,?A1=A2,mistral-small:24b,0.0,0.0007,0.0,1.0,0.065,0.9894,0.7906,0.9973,0.9296,1.0,0.9573,1.0,0.9999,0.9296,0.9999,1.0,0.5,1.0
fixing,LC-QuAD,?A1=A2,llama3.1:70b,0.0,0.0517,0.0003,0.9615,1.0,0.9999,0.9933,1.0,0.9991,1.0,0.9996,1.0,1.0,0.9989,1.0,1.0,0.9565,1.0
fixing,LC-QuAD,?A1=A2,gemini-2.0-flash,0.0,0.0,0.0,0.0222,0.0002,1.0,0.0967,0.7693,0.2712,0.9952,0.3359,1.0,0.9638,0.2712,0.9519,0.9967,0.024,0.9995
fixing,LC-QuAD,?A1=A2,gpt-4.1-mini-2025-04-14,0.0,0.0001,0.0,0.295,0.0137,0.9443,1.0,0.9887,0.8198,1.0,0.8209,1.0,0.9991,0.7995,0.9973,1.0,0.2629,1.0
fixing,LC-QuAD,?A1=A2,gpt-4o,0.0,0.0,0.0,0.0063,0.0,0.3294,0.0244,1.0,0.111,0.9878,0.1341,0.9996,0.8945,0.1312,0.8389,0.9862,0.0028,0.9975
fixing,LC-QuAD,?A1=A2,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.1144,0.0022,0.8198,0.2712,0.9369,1.0,0.9996,0.6146,1.0,0.9943,0.5573,0.9931,0.9995,0.0924,1.0
fixing,LC-QuAD,?A1=A2,grok-3-mini,0.0,0.0,0.0,0.0,0.0,0.0113,0.0001,0.0288,0.0012,1.0,0.0028,0.9461,0.2434,0.0012,0.1808,0.6445,0.0,0.8275
fixing,LC-QuAD,?A1=A2,deepseek-chat,0.0,0.0,0.0,0.0762,0.0011,0.7601,0.2559,0.9231,0.5,0.999,1.0,1.0,0.992,0.5,0.9818,0.9998,0.0632,0.9999
fixing,LC-QuAD,?A1=A2,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.0001,0.0,0.0013,0.0,0.1148,0.0,1.0,0.0178,0.0,0.0096,0.1537,0.0,0.3238
fixing,LC-QuAD,?A1=A2,gpt-5-nano,0.0,0.0,0.0,0.0003,0.0,0.0676,0.0023,0.1744,0.0129,0.8519,0.0178,0.9937,1.0,0.0147,0.5,0.8923,0.0,0.9646
fixing,LC-QuAD,?A1=A2,deepseek-reasoner,0.0,0.0,0.0,0.1144,0.0026,0.8198,0.2879,0.9196,0.5573,0.9996,0.6101,1.0,0.9934,1.0,0.9931,0.9995,0.1037,1.0
fixing,LC-QuAD,?A1=A2,gemini-2.5-pro,0.0,0.0,0.0,0.0004,0.0,0.0871,0.006,0.2399,0.0158,0.8998,0.0352,0.997,0.6286,0.0158,1.0,0.9123,0.0002,0.9749
fixing,LC-QuAD,?A1=A2,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,0.008,0.0001,0.0298,0.0013,0.5,0.0007,0.9242,0.1885,0.0013,0.1553,1.0,0.0,0.7878
fixing,LC-QuAD,?A1=A2,o3,0.0,0.0004,0.0,0.5924,0.074,0.9876,0.8129,0.999,0.9444,1.0,0.9648,1.0,1.0,0.935,0.9999,1.0,1.0,1.0
fixing,LC-QuAD,?A1=A2,gpt-5,0.0,0.0,0.0,0.0,0.0,0.0014,0.0,0.0069,0.0001,0.2858,0.0002,0.8204,0.0748,0.0001,0.0551,0.345,0.0,1.0
fixing,LC-QuAD,?A1=A3+A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,LC-QuAD,?A1=A3+A4,gpt-oss:20b,0.0,1.0,0.0,0.0876,0.0009,0.9942,0.9381,0.9997,0.9973,1.0,0.9115,1.0,0.9989,0.9924,1.0,0.9957,0.9998,1.0
fixing,LC-QuAD,?A1=A3+A4,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,1.0,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,LC-QuAD,?A1=A3+A4,mistral-small:24b,0.0,0.9456,0.0,1.0,0.0753,1.0,0.9989,1.0,1.0,1.0,0.9966,1.0,1.0,0.9999,1.0,0.9999,1.0,1.0
fixing,LC-QuAD,?A1=A3+A4,llama3.1:70b,0.0,0.9996,0.0003,0.9515,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,LC-QuAD,?A1=A3+A4,gemini-2.0-flash,0.0,0.0112,0.0,0.0,0.0,1.0,0.1312,0.7906,0.5573,0.9985,0.1,0.9999,0.6583,0.4415,0.9952,0.5,0.7478,0.9481
fixing,LC-QuAD,?A1=A3+A4,gpt-4.1-mini-2025-04-14,0.0,0.1,0.0,0.0026,0.0,0.9196,1.0,0.9853,0.9324,1.0,0.446,1.0,0.9444,0.8785,0.9999,0.8736,0.9756,0.9957
fixing,LC-QuAD,?A1=A3+A4,gpt-4o,0.0,0.0008,0.0,0.0,0.0,0.295,0.0297,1.0,0.2841,0.9902,0.0259,0.9983,0.3919,0.2005,0.967,0.2483,0.5,0.8042
fixing,LC-QuAD,?A1=A3+A4,gpt-4.1-2025-04-14,0.0,0.006,0.0,0.0001,0.0,0.5573,0.1163,0.8042,1.0,0.9995,0.0719,0.9998,0.6673,0.4388,0.9985,0.5,0.7601,0.9519
fixing,LC-QuAD,?A1=A3+A4,grok-3-mini,0.0,0.0,0.0,0.0,0.0,0.004,0.0,0.0218,0.0017,1.0,0.0,0.7634,0.008,0.0012,0.3506,0.0039,0.0138,0.0998
fixing,LC-QuAD,?A1=A3+A4,deepseek-chat,0.0,0.1403,0.0,0.007,0.0,0.9381,0.6583,0.9863,0.9605,1.0,1.0,1.0,0.9686,0.9369,1.0,0.9481,0.985,0.9995
fixing,LC-QuAD,?A1=A3+A4,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.0005,0.0,0.0047,0.0006,0.3601,0.0,1.0,0.0014,0.0001,0.1635,0.0004,0.004,0.0365
fixing,LC-QuAD,?A1=A3+A4,gpt-5-nano,0.0,0.0027,0.0,0.0,0.0,0.446,0.0924,0.7084,0.4427,0.9967,0.0556,0.9995,1.0,0.3389,0.9931,0.3746,0.6673,0.9179
fixing,LC-QuAD,?A1=A3+A4,deepseek-reasoner,0.0,0.015,0.0,0.0003,0.0,0.6706,0.1908,0.8688,0.678,0.9996,0.111,1.0,0.7558,1.0,0.9992,0.6081,0.8438,0.973
fixing,LC-QuAD,?A1=A3+A4,gemini-2.5-pro,0.0,0.0,0.0,0.0,0.0,0.0113,0.0003,0.0631,0.0045,0.779,0.0,0.9157,0.0168,0.0025,1.0,0.0058,0.0442,0.2088
fixing,LC-QuAD,?A1=A3+A4,gpt-5-mini,0.0,0.0092,0.0,0.0003,0.0,0.6026,0.1871,0.8296,0.6009,0.9984,0.092,0.9999,0.7388,0.5,0.9978,1.0,0.7906,0.9638
fixing,LC-QuAD,?A1=A3+A4,o3,0.0,0.0005,0.0,0.0,0.0,0.3444,0.046,0.6146,0.3359,0.9942,0.029,0.9986,0.4427,0.2354,0.9782,0.295,1.0,0.8659
fixing,LC-QuAD,?A1=A3+A4,gpt-5,0.0,0.0001,0.0,0.0,0.0,0.092,0.0092,0.2841,0.0871,0.9459,0.0015,0.9832,0.14,0.0519,0.8721,0.0676,0.2148,1.0
fixing,LC-QuAD,?A1>A3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,LC-QuAD,?A1>A3,gpt-oss:20b,0.0,1.0,0.0,0.1102,0.029,0.6146,0.9756,0.9842,0.9987,1.0,0.9481,0.9999,0.7693,0.9917,0.9808,0.9978,0.9997,0.9968
fixing,LC-QuAD,?A1>A3,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,0.9995,0.9968,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,LC-QuAD,?A1>A3,mistral-small:24b,0.0,0.9332,0.0012,1.0,0.2841,0.9573,0.9997,0.9999,1.0,1.0,0.9992,1.0,0.9853,0.9999,0.9996,1.0,1.0,1.0
fixing,LC-QuAD,?A1>A3,llama3.1:70b,0.0,0.985,0.0064,0.8042,1.0,0.9889,1.0,1.0,1.0,1.0,1.0,1.0,0.9973,1.0,0.9999,1.0,1.0,1.0
fixing,LC-QuAD,?A1>A3,gemini-2.0-flash,0.0,0.5,0.0,0.0762,0.022,1.0,0.9825,0.9832,0.9993,1.0,0.9552,0.9999,0.7288,0.99,0.9795,0.9985,0.9996,0.997
fixing,LC-QuAD,?A1>A3,gpt-4.1-mini-2025-04-14,0.0,0.0481,0.0,0.0008,0.0001,0.0401,1.0,0.655,0.8675,0.9887,0.4321,0.9827,0.1279,0.7077,0.57,0.8675,0.9564,0.8042
fixing,LC-QuAD,?A1>A3,gpt-4o,0.0,0.033,0.0,0.0004,0.0001,0.0365,0.5,1.0,0.8365,0.9867,0.3601,0.9622,0.0939,0.6445,0.5,0.8275,0.939,0.7634
fixing,LC-QuAD,?A1>A3,gpt-4.1-2025-04-14,0.0,0.0038,0.0,0.0,0.0,0.0026,0.2291,0.2786,1.0,0.9283,0.105,0.8684,0.0205,0.3036,0.2122,0.5775,0.7976,0.5
fixing,LC-QuAD,?A1>A3,grok-3-mini,0.0,0.0002,0.0,0.0,0.0,0.0002,0.032,0.0392,0.1662,1.0,0.0085,0.5,0.0011,0.0577,0.0378,0.1917,0.4119,0.1431
fixing,LC-QuAD,?A1>A3,deepseek-chat,0.0,0.092,0.0,0.0023,0.0001,0.0877,0.6962,0.7634,0.9534,0.9978,1.0,0.9927,0.2088,0.8811,0.7214,0.932,0.9822,0.9242
fixing,LC-QuAD,?A1>A3,gemini-2.5-flash,0.0,0.0004,0.0,0.0,0.0,0.0004,0.0466,0.0843,0.2517,0.6762,0.0216,1.0,0.0038,0.0835,0.0392,0.2706,0.5,0.2024
fixing,LC-QuAD,?A1>A3,gpt-5-nano,0.0,0.3294,0.0,0.0297,0.0063,0.3804,0.9283,0.9506,0.9917,0.9997,0.8721,0.9987,1.0,0.9558,0.9283,0.9932,0.998,0.9856
fixing,LC-QuAD,?A1>A3,deepseek-reasoner,0.0,0.0192,0.0,0.0003,0.0,0.0235,0.4278,0.5,0.8491,0.9793,0.2403,0.9682,0.0821,1.0,0.4194,0.7709,0.9157,0.7483
fixing,LC-QuAD,?A1>A3,gemini-2.5-pro,0.0,0.0403,0.0,0.001,0.0002,0.0448,0.57,0.6399,0.8852,0.9855,0.4225,0.9867,0.1279,0.7294,1.0,0.8675,0.9564,0.8463
fixing,LC-QuAD,?A1>A3,gpt-5-mini,0.0,0.0058,0.0,0.0,0.0,0.0045,0.2291,0.2858,0.5775,0.9054,0.1325,0.8463,0.0175,0.3555,0.2291,1.0,0.7878,0.5
fixing,LC-QuAD,?A1>A3,o3,0.0,0.0011,0.0,0.0,0.0,0.0013,0.0925,0.1239,0.3388,0.7483,0.0436,0.6682,0.0057,0.1635,0.0925,0.345,1.0,0.2786
fixing,LC-QuAD,?A1>A3,gpt-5,0.0,0.0083,0.0,0.0001,0.0,0.0083,0.3038,0.3601,0.6612,0.9331,0.1537,0.895,0.0326,0.4119,0.2706,0.6494,0.8365,1.0
fixing,LC-QuAD,?A1>A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,LC-QuAD,?A1>A4,gpt-oss:20b,0.0,1.0,0.0,0.0946,0.0003,0.2617,0.4073,0.5,0.5,0.9893,0.3145,0.3318,0.8491,0.3238,0.8666,0.5982,0.9941,0.9539
fixing,LC-QuAD,?A1>A4,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,LC-QuAD,?A1>A4,mistral-small:24b,0.0,0.9608,0.0,1.0,0.0326,0.8083,0.9054,0.9242,0.9242,0.9996,0.8275,0.8365,0.9915,0.8275,0.9941,0.9461,1.0,0.9993
fixing,LC-QuAD,?A1>A4,llama3.1:70b,0.0,0.9999,0.0,0.9856,1.0,0.997,0.9996,0.9997,0.9997,1.0,0.9989,0.9989,1.0,0.998,1.0,0.9999,1.0,1.0
fixing,LC-QuAD,?A1>A4,gemini-2.0-flash,0.0,0.8569,0.0,0.3318,0.0083,1.0,0.7383,0.7976,0.8338,0.9979,0.6682,0.6682,0.9682,0.6612,0.9824,0.8569,0.9995,0.9936
fixing,LC-QuAD,?A1>A4,gpt-4.1-mini-2025-04-14,0.0,0.7597,0.0,0.1917,0.0017,0.4159,1.0,0.6762,0.6855,0.9935,0.5,0.5,0.9283,0.5,0.9283,0.7483,0.9983,0.9888
fixing,LC-QuAD,?A1>A4,gpt-4o,0.0,0.6762,0.0,0.1537,0.0011,0.3388,0.5,1.0,0.5881,0.9888,0.4119,0.4073,0.9102,0.4119,0.9102,0.6964,0.9968,0.9807
fixing,LC-QuAD,?A1>A4,gpt-4.1-2025-04-14,0.0,0.6762,0.0,0.1537,0.0011,0.3145,0.5,0.5881,1.0,0.9888,0.4018,0.4159,0.9102,0.4018,0.8949,0.6762,0.9968,0.9807
fixing,LC-QuAD,?A1>A4,grok-3-mini,0.0,0.0547,0.0,0.0022,0.0,0.0106,0.0287,0.0461,0.0461,1.0,0.0112,0.0176,0.2539,0.0176,0.2266,0.073,0.8125,0.5
fixing,LC-QuAD,?A1>A4,deepseek-chat,0.0,0.8338,0.0,0.2858,0.0035,0.5,0.6682,0.7483,0.7728,0.9983,1.0,0.5881,0.9519,0.5881,0.9616,0.8491,0.9998,0.9894
fixing,LC-QuAD,?A1>A4,gemini-2.5-flash,0.0,0.8083,0.0,0.2786,0.0035,0.5,0.6762,0.7597,0.7383,0.9963,0.5881,1.0,0.9713,0.5927,0.9713,0.8204,0.9991,0.9935
fixing,LC-QuAD,?A1>A4,gpt-5-nano,0.0,0.3036,0.0,0.0262,0.0001,0.0835,0.1662,0.212,0.212,0.9102,0.1189,0.0898,1.0,0.073,0.623,0.2744,0.9648,0.8281
fixing,LC-QuAD,?A1>A4,deepseek-reasoner,0.0,0.8204,0.0,0.2858,0.0057,0.5,0.6682,0.7483,0.7728,0.9963,0.5881,0.5927,0.9807,1.0,0.9807,0.8338,0.9991,0.9935
fixing,LC-QuAD,?A1>A4,gemini-2.5-pro,0.0,0.2905,0.0,0.0207,0.0001,0.0592,0.1662,0.212,0.2272,0.9375,0.1051,0.0898,0.623,0.073,1.0,0.2905,0.9648,0.8281
fixing,LC-QuAD,?A1>A4,gpt-5-mini,0.0,0.5982,0.0,0.1148,0.0003,0.2617,0.4119,0.5,0.5,0.9807,0.3036,0.3238,0.8867,0.3145,0.8666,1.0,0.9941,0.9539
fixing,LC-QuAD,?A1>A4,o3,0.0,0.0327,0.0,0.0003,0.0,0.0037,0.0112,0.0193,0.0193,0.5,0.0032,0.0065,0.1445,0.0065,0.1445,0.0327,1.0,0.3438
fixing,LC-QuAD,?A1>A4,gpt-5,0.0,0.1334,0.0,0.0038,0.0,0.0245,0.0461,0.073,0.073,0.7734,0.0384,0.0287,0.377,0.0287,0.377,0.1334,0.8906,1.0
fixing,LC-QuAD,?A3∅A4,llama3.1:8b,1.0,1.0,0.7906,1.0,1.0,1.0,1.0,0.4495,1.0,1.0,1.0,1.0,0.8358,1.0,1.0,0.646,1.0,1.0
fixing,LC-QuAD,?A3∅A4,gpt-oss:20b,0.0,1.0,0.0,0.9936,0.5927,0.9755,0.1808,0.0,0.9165,0.8083,0.5841,0.9423,0.0,0.9713,0.9991,0.0,0.9888,0.0214
fixing,LC-QuAD,?A3∅A4,gpt-4.1-nano-2025-04-14,0.295,1.0,1.0,1.0,1.0,1.0,1.0,0.2213,1.0,1.0,1.0,1.0,0.6482,1.0,1.0,0.405,1.0,0.9997
fixing,LC-QuAD,?A3∅A4,mistral-small:24b,0.0,0.0245,0.0,1.0,0.0245,0.3633,0.0001,0.0,0.1938,0.0898,0.0176,0.2744,0.0,0.2744,0.7734,0.0,0.377,0.0001
fixing,LC-QuAD,?A3∅A4,llama3.1:70b,0.0,0.5927,0.0,0.9936,1.0,0.9755,0.1635,0.0,0.9165,0.8338,0.5881,0.9616,0.0,0.9616,0.9998,0.0,0.9755,0.0251
fixing,LC-QuAD,?A3∅A4,gemini-2.0-flash,0.0,0.0717,0.0,0.8555,0.0717,1.0,0.0073,0.0,0.3953,0.212,0.0717,0.5,0.0,0.5,0.9102,0.0,0.623,0.0003
fixing,LC-QuAD,?A3∅A4,gpt-4.1-mini-2025-04-14,0.0,0.8998,0.0,1.0,0.9157,0.998,1.0,0.0,0.9947,0.9904,0.9242,0.9987,0.0,0.9987,1.0,0.0,0.9987,0.2088
fixing,LC-QuAD,?A3∅A4,gpt-4o,0.6482,1.0,0.8472,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9238,1.0,1.0,0.7646,1.0,1.0
fixing,LC-QuAD,?A3∅A4,gpt-4.1-2025-04-14,0.0,0.1796,0.0,0.927,0.1796,0.788,0.0173,0.0,1.0,0.3953,0.1662,0.7095,0.0,0.7461,0.9922,0.0,0.8281,0.0012
fixing,LC-QuAD,?A3∅A4,grok-3-mini,0.0,0.3318,0.0,0.9713,0.3145,0.9102,0.0318,0.0,0.788,1.0,0.2905,0.9375,0.0,0.8867,0.998,0.0,0.927,0.0041
fixing,LC-QuAD,?A3∅A4,deepseek-chat,0.0,0.5841,0.0,0.9963,0.5881,0.9755,0.1537,0.0,0.9283,0.8666,1.0,0.9893,0.0,0.9713,0.9998,0.0,0.9888,0.0145
fixing,LC-QuAD,?A3∅A4,gemini-2.5-flash,0.0,0.1316,0.0,0.8867,0.1051,0.7256,0.0059,0.0,0.5,0.2266,0.0547,1.0,0.0,0.623,0.9844,0.0,0.7256,0.0005
fixing,LC-QuAD,?A3∅A4,gpt-5-nano,0.2319,1.0,0.4495,1.0,1.0,1.0,1.0,0.1264,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2841,1.0,1.0
fixing,LC-QuAD,?A3∅A4,deepseek-reasoner,0.0,0.0898,0.0,0.8867,0.1051,0.7095,0.0059,0.0,0.5,0.2744,0.0898,0.623,0.0,1.0,0.9844,0.0,0.7734,0.0007
fixing,LC-QuAD,?A3∅A4,gemini-2.5-pro,0.0,0.0065,0.0,0.5,0.0032,0.2539,0.0001,0.0,0.0625,0.0195,0.0032,0.1094,0.0,0.1094,1.0,0.0,0.2266,0.0
fixing,LC-QuAD,?A3∅A4,gpt-5-mini,0.4503,1.0,0.6848,1.0,1.0,1.0,1.0,0.3327,1.0,1.0,1.0,1.0,0.8042,1.0,1.0,1.0,1.0,1.0
fixing,LC-QuAD,?A3∅A4,o3,0.0,0.0461,0.0,0.8281,0.0717,0.623,0.0053,0.0,0.377,0.1938,0.0461,0.5,0.0,0.5,0.9375,0.0,1.0,0.0
fixing,LC-QuAD,?A3∅A4,gpt-5,0.0001,0.9919,0.0008,1.0,0.99,0.9999,0.8721,0.0,0.9997,0.9988,0.9953,0.9999,0.0001,0.9998,1.0,0.0,1.0,1.0
fixing,LC-QuAD,?A4=A1|3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,LC-QuAD,?A4=A1|3,gpt-oss:20b,0.0,1.0,0.0,0.3127,0.0043,0.9995,0.97,0.0082,0.9998,1.0,0.7787,1.0,0.0164,0.9982,1.0,0.0019,1.0,0.9818
fixing,LC-QuAD,?A4=A1|3,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,LC-QuAD,?A4=A1|3,mistral-small:24b,0.0,0.7681,0.0,1.0,0.0485,1.0,0.9944,0.049,1.0,1.0,0.8997,1.0,0.1175,0.9997,1.0,0.0232,1.0,0.9907
fixing,LC-QuAD,?A4=A1|3,llama3.1:70b,0.0,0.9981,0.0001,0.9701,1.0,1.0,1.0,0.6641,1.0,1.0,0.9999,1.0,0.7787,1.0,1.0,0.5,1.0,1.0
fixing,LC-QuAD,?A4=A1|3,gemini-2.0-flash,0.0,0.0013,0.0,0.0001,0.0,1.0,0.0557,0.0,0.5,0.9703,0.0049,0.9998,0.0,0.3258,0.9967,0.0,0.6026,0.074
fixing,LC-QuAD,?A4=A1|3,gpt-4.1-mini-2025-04-14,0.0,0.0517,0.0,0.0113,0.0,0.9703,1.0,0.0,0.9734,0.9998,0.1358,1.0,0.0003,0.9033,1.0,0.0,0.971,0.5489
fixing,LC-QuAD,?A4=A1|3,gpt-4o,0.0,0.9962,0.0,0.9721,0.4439,1.0,1.0,1.0,1.0,1.0,0.9994,1.0,0.6955,1.0,1.0,0.3991,1.0,1.0
fixing,LC-QuAD,?A4=A1|3,gpt-4.1-2025-04-14,0.0,0.0005,0.0,0.0001,0.0,0.6146,0.0541,0.0,1.0,0.9842,0.0018,0.9997,0.0,0.3804,0.9996,0.0,0.6641,0.0885
fixing,LC-QuAD,?A4=A1|3,grok-3-mini,0.0,0.0,0.0,0.0,0.0,0.0557,0.0006,0.0,0.033,1.0,0.0,0.9646,0.0,0.0098,0.8675,0.0,0.0676,0.0009
fixing,LC-QuAD,?A4=A1|3,deepseek-chat,0.0,0.3045,0.0,0.1477,0.0002,0.9978,0.9155,0.0016,0.9994,1.0,1.0,1.0,0.0038,0.9978,1.0,0.0001,0.9989,0.9196
fixing,LC-QuAD,?A4=A1|3,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.0007,0.0,0.0,0.0008,0.0748,0.0,1.0,0.0,0.0001,0.2706,0.0,0.0018,0.0
fixing,LC-QuAD,?A4=A1|3,gpt-5-nano,0.0,0.9923,0.0,0.923,0.3045,1.0,0.9999,0.3991,1.0,1.0,0.9984,1.0,1.0,1.0,1.0,0.2307,1.0,1.0
fixing,LC-QuAD,?A4=A1|3,deepseek-reasoner,0.0,0.0041,0.0,0.0008,0.0,0.7743,0.1562,0.0,0.7288,0.996,0.0058,1.0,0.0,1.0,0.9999,0.0,0.8092,0.1748
fixing,LC-QuAD,?A4=A1|3,gemini-2.5-pro,0.0,0.0,0.0,0.0,0.0,0.008,0.0,0.0,0.0015,0.2291,0.0,0.8463,0.0,0.0005,1.0,0.0,0.0113,0.0
fixing,LC-QuAD,?A4=A1|3,gpt-5-mini,0.0,0.9993,0.0001,0.9876,0.6026,1.0,1.0,0.6955,1.0,1.0,1.0,1.0,0.849,1.0,1.0,1.0,1.0,1.0
fixing,LC-QuAD,?A4=A1|3,o3,0.0,0.0001,0.0,0.0001,0.0,0.5,0.0524,0.0,0.4439,0.9638,0.0027,0.9994,0.0,0.28,0.9952,0.0,1.0,0.033
fixing,LC-QuAD,?A4=A1|3,gpt-5,0.0,0.0352,0.0,0.0172,0.0,0.9565,0.5489,0.0,0.9476,0.9997,0.1312,1.0,0.0001,0.8856,1.0,0.0,0.9842,1.0
fixing,overall,?A1=A2,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,overall,?A1=A2,gpt-oss:20b,0.0,1.0,0.0353,1.0,0.9211,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,overall,?A1=A2,gpt-4.1-nano-2025-04-14,0.0,0.9708,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,overall,?A1=A2,mistral-small:24b,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0026,1.0,1.0,0.9101,1.0
fixing,overall,?A1=A2,llama3.1:70b,0.0,0.0923,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,overall,?A1=A2,gemini-2.0-flash,0.0,0.0,0.0,0.0,0.0,1.0,0.1033,1.0,0.9993,1.0,0.9922,1.0,1.0,0.0,0.0837,1.0,0.0,1.0
fixing,overall,?A1=A2,gpt-4.1-mini-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.9091,1.0,1.0,1.0,1.0,0.9999,1.0,1.0,0.0,0.4578,1.0,0.0012,1.0
fixing,overall,?A1=A2,gpt-4o,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0035,0.9987,0.0003,1.0,0.1371,0.0,0.0,0.9398,0.0,1.0
fixing,overall,?A1=A2,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.001,0.0,0.9973,1.0,1.0,0.2305,1.0,0.9442,0.0,0.0,1.0,0.0,1.0
fixing,overall,?A1=A2,grok-3-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0017,0.0,1.0,0.0,0.8721,0.0,0.0,0.0,0.0878,0.0,0.9977
fixing,overall,?A1=A2,deepseek-chat,0.0,0.0,0.0,0.0,0.0,0.0095,0.0001,0.9998,0.7924,1.0,1.0,1.0,0.991,0.0,0.0001,1.0,0.0,1.0
fixing,overall,?A1=A2,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1479,0.0,1.0,0.0,0.0,0.0,0.0061,0.0,0.9639
fixing,overall,?A1=A2,gpt-5-nano,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8798,0.065,1.0,0.0111,1.0,1.0,0.0,0.0,0.9965,0.0,1.0
fixing,overall,?A1=A2,deepseek-reasoner,0.0,0.0,0.0,0.9979,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,overall,?A1=A2,gemini-2.5-pro,0.0,0.0,0.0,0.0,0.0,0.9267,0.5701,1.0,1.0,1.0,0.9999,1.0,1.0,0.0,1.0,1.0,0.0017,1.0
fixing,overall,?A1=A2,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0709,0.0,0.9253,0.0,0.9953,0.0045,0.0,0.0,1.0,0.0,1.0
fixing,overall,?A1=A2,o3,0.0,0.0,0.0,0.1009,0.0,1.0,0.999,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.9987,1.0,1.0,1.0
fixing,overall,?A1=A2,gpt-5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0031,0.0,0.0445,0.0,0.0,0.0,0.0,0.0,1.0
fixing,overall,?A1=A3+A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,overall,?A1=A3+A4,gpt-oss:20b,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0313,1.0,1.0,1.0,1.0
fixing,overall,?A1=A3+A4,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,overall,?A1=A3+A4,mistral-small:24b,0.0,1.0,0.0,1.0,0.7651,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,overall,?A1=A3+A4,llama3.1:70b,0.0,1.0,0.0,0.253,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,overall,?A1=A3+A4,gemini-2.0-flash,0.0,0.0,0.0,0.0,0.0,1.0,0.0422,0.9971,0.9908,1.0,0.3548,1.0,0.3565,0.0,0.7176,0.9983,0.9999,1.0
fixing,overall,?A1=A3+A4,gpt-4.1-mini-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.9635,1.0,1.0,1.0,1.0,0.9168,1.0,0.9149,0.0,0.9889,1.0,1.0,1.0
fixing,overall,?A1=A3+A4,gpt-4o,0.0,0.0,0.0,0.0,0.0,0.0036,0.0,1.0,0.3463,1.0,0.0008,1.0,0.0009,0.0,0.016,0.5697,0.8149,1.0
fixing,overall,?A1=A3+A4,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.011,0.0,0.6798,1.0,1.0,0.0027,1.0,0.0031,0.0,0.0372,0.7256,0.9057,1.0
fixing,overall,?A1=A3+A4,grok-3-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.8846
fixing,overall,?A1=A3+A4,deepseek-chat,0.0,0.0,0.0,0.0,0.0,0.6701,0.094,0.9993,0.9979,1.0,1.0,1.0,0.5133,0.0,0.835,0.9996,1.0,1.0
fixing,overall,?A1=A3+A4,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0016
fixing,overall,?A1=A3+A4,gpt-5-nano,0.0,0.0,0.0,0.0,0.0,0.6681,0.096,0.9993,0.9975,1.0,0.5133,1.0,1.0,0.0,0.8356,0.9997,1.0,1.0
fixing,overall,?A1=A3+A4,deepseek-reasoner,0.0,0.9743,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,overall,?A1=A3+A4,gemini-2.5-pro,0.0,0.0,0.0,0.0,0.0,0.3058,0.0131,0.9865,0.9681,1.0,0.1822,1.0,0.1816,0.0,1.0,0.992,0.999,1.0
fixing,overall,?A1=A3+A4,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,0.0021,0.0,0.458,0.2984,1.0,0.0006,1.0,0.0004,0.0,0.0096,1.0,0.7725,1.0
fixing,overall,?A1=A3+A4,o3,0.0,0.0,0.0,0.0,0.0,0.0001,0.0,0.2048,0.1068,1.0,0.0,1.0,0.0,0.0,0.0012,0.2495,1.0,1.0
fixing,overall,?A1=A3+A4,gpt-5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1323,0.0,0.9988,0.0,0.0,0.0,0.0,0.0,1.0
fixing,overall,?A1>A3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,overall,?A1>A3,gpt-oss:20b,0.0,1.0,0.0,0.0002,0.0,0.9909,1.0,1.0,1.0,1.0,1.0,1.0,0.9508,0.0015,0.3991,1.0,1.0,1.0
fixing,overall,?A1>A3,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,overall,?A1>A3,mistral-small:24b,0.0,0.9999,0.0,1.0,0.0557,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.8006,1.0,1.0,1.0,1.0
fixing,overall,?A1>A3,llama3.1:70b,0.0,1.0,0.0,0.9515,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9929,1.0,1.0,1.0,1.0
fixing,overall,?A1>A3,gemini-2.0-flash,0.0,0.0121,0.0,0.0,0.0,1.0,0.9998,1.0,1.0,1.0,1.0,1.0,0.1809,0.0,0.0001,1.0,1.0,1.0
fixing,overall,?A1>A3,gpt-4.1-mini-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.0002,1.0,1.0,1.0,1.0,0.9105,0.9977,0.0,0.0,0.0,1.0,1.0,1.0
fixing,overall,?A1>A3,gpt-4o,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.5212,0.8769,0.0002,0.0249,0.0,0.0,0.0,0.4377,0.6042,0.9986
fixing,overall,?A1>A3,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5212,1.0,0.8726,0.0002,0.0229,0.0,0.0,0.0,0.437,0.6031,0.9986
fixing,overall,?A1>A3,grok-3-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1471,0.1514,1.0,0.0,0.001,0.0,0.0,0.0,0.1048,0.2052,0.9721
fixing,overall,?A1>A3,deepseek-chat,0.0,0.0,0.0,0.0,0.0,0.0,0.1055,0.9998,0.9999,1.0,1.0,0.943,0.0,0.0,0.0,0.9997,0.9999,1.0
fixing,overall,?A1>A3,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.0,0.0031,0.9804,0.9821,0.9993,0.0688,1.0,0.0,0.0,0.0,0.9699,0.9889,1.0
fixing,overall,?A1>A3,gpt-5-nano,0.0,0.061,0.0,0.0,0.0,0.8392,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0041,1.0,1.0,1.0
fixing,overall,?A1>A3,deepseek-reasoner,0.0,0.9989,0.0,0.2188,0.0086,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9999,1.0,1.0,1.0
fixing,overall,?A1>A3,gemini-2.5-pro,0.0,0.6399,0.0,0.0,0.0,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,0.9967,0.0001,1.0,1.0,1.0,1.0
fixing,overall,?A1>A3,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6031,0.6042,0.9137,0.0005,0.0377,0.0,0.0,0.0,1.0,0.6823,0.9994
fixing,overall,?A1>A3,o3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.437,0.4377,0.8246,0.0001,0.0145,0.0,0.0,0.0,0.3561,1.0,0.997
fixing,overall,?A1>A3,gpt-5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002,0.002,0.0362,0.0,0.0,0.0,0.0,0.0,0.0008,0.0041,1.0
fixing,overall,?A1>A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,overall,?A1>A4,gpt-oss:20b,0.0,1.0,0.0,0.0219,0.0,0.4111,1.0,0.9947,0.9859,1.0,0.9976,0.999,1.0,0.0,0.4698,0.9968,0.9864,1.0
fixing,overall,?A1>A4,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,overall,?A1>A4,mistral-small:24b,0.0,0.9848,0.0,1.0,0.0,0.9941,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.996,1.0,1.0,1.0
fixing,overall,?A1>A4,llama3.1:70b,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,overall,?A1>A4,gemini-2.0-flash,0.0,0.646,0.0,0.0079,0.0,1.0,1.0,1.0,0.9998,1.0,1.0,1.0,1.0,0.0,0.6045,1.0,0.9998,1.0
fixing,overall,?A1>A4,gpt-4.1-mini-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0061,0.0012,1.0,0.0133,0.0483,0.958,0.0,0.0,0.0137,0.0013,1.0
fixing,overall,?A1>A4,gpt-4o,0.0,0.0084,0.0,0.0,0.0,0.0001,0.996,1.0,0.3264,1.0,0.6297,0.8096,1.0,0.0,0.0001,0.6308,0.323,1.0
fixing,overall,?A1>A4,gpt-4.1-2025-04-14,0.0,0.0211,0.0,0.0,0.0,0.0003,0.9993,0.7185,1.0,1.0,0.8064,0.9198,1.0,0.0,0.0007,0.8003,0.525,1.0
fixing,overall,?A1>A4,grok-3-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0009,0.0,0.0,0.0,0.0,0.2141
fixing,overall,?A1>A4,deepseek-chat,0.0,0.004,0.0,0.0,0.0,0.0,0.9909,0.4213,0.2322,1.0,1.0,0.7317,1.0,0.0,0.0,0.5265,0.2407,1.0
fixing,overall,?A1>A4,gemini-2.5-flash,0.0,0.0017,0.0,0.0,0.0,0.0,0.9645,0.2292,0.1021,1.0,0.3154,1.0,0.9997,0.0,0.0,0.3178,0.109,1.0
fixing,overall,?A1>A4,gpt-5-nano,0.0,0.0,0.0,0.0,0.0,0.0,0.059,0.0,0.0,0.9996,0.0001,0.0005,1.0,0.0,0.0,0.0001,0.0,0.9931
fixing,overall,?A1>A4,deepseek-reasoner,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,overall,?A1>A4,gemini-2.5-pro,0.0,0.5899,0.0,0.0053,0.0,0.4368,1.0,0.9999,0.9995,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.9995,1.0
fixing,overall,?A1>A4,gpt-5-mini,0.0,0.0053,0.0,0.0,0.0,0.0,0.9906,0.4206,0.238,1.0,0.5265,0.7289,1.0,0.0,0.0,1.0,0.2361,1.0
fixing,overall,?A1>A4,o3,0.0,0.0204,0.0,0.0,0.0,0.0004,0.9992,0.7227,0.525,1.0,0.7973,0.9133,1.0,0.0,0.0007,0.8023,1.0,1.0
fixing,overall,?A1>A4,gpt-5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8459,0.0,0.0,0.0118,0.0,0.0,0.0,0.0,1.0
fixing,overall,?A3∅A4,llama3.1:8b,1.0,1.0,0.512,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,overall,?A3∅A4,gpt-oss:20b,0.0,1.0,0.0,0.7602,0.0264,0.9637,0.0,0.0,0.5394,0.6156,0.2484,0.6896,0.0,0.0083,0.9998,0.0,0.9978,0.0
fixing,overall,?A3∅A4,gpt-4.1-nano-2025-04-14,0.512,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,overall,?A3∅A4,mistral-small:24b,0.0,0.3069,0.0,1.0,0.0002,0.9455,0.0,0.0,0.2229,0.3068,0.0336,0.4168,0.0,0.0,1.0,0.0,0.9981,0.0
fixing,overall,?A3∅A4,llama3.1:70b,0.0,0.9831,0.0,0.9999,1.0,1.0,0.0,0.0,0.9979,0.9993,0.9635,0.9997,0.0,0.262,1.0,0.0,1.0,0.0
fixing,overall,?A3∅A4,gemini-2.0-flash,0.0,0.0567,0.0,0.0736,0.0,1.0,0.0,0.0,0.0099,0.0196,0.0004,0.0396,0.0,0.0,0.9955,0.0,0.9292,0.0
fixing,overall,?A3∅A4,gpt-4.1-mini-2025-04-14,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0003,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0
fixing,overall,?A3∅A4,gpt-4o,0.0,1.0,0.0,1.0,1.0,1.0,0.9998,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0
fixing,overall,?A3∅A4,gpt-4.1-2025-04-14,0.0,0.5394,0.0,0.8163,0.0031,0.9934,0.0,0.0,1.0,0.6395,0.1515,0.743,0.0,0.0002,1.0,0.0,0.9999,0.0
fixing,overall,?A3∅A4,grok-3-mini,0.0,0.461,0.0,0.7419,0.0011,0.9866,0.0,0.0,0.4152,1.0,0.099,0.6423,0.0,0.0001,1.0,0.0,0.9998,0.0
fixing,overall,?A3∅A4,deepseek-chat,0.0,0.8089,0.0,0.9754,0.0476,0.9998,0.0,0.0,0.8785,0.9226,1.0,0.9594,0.0,0.0079,1.0,0.0,1.0,0.0
fixing,overall,?A3∅A4,gemini-2.5-flash,0.0,0.3833,0.0,0.6368,0.0004,0.9718,0.0,0.0,0.3058,0.4134,0.0544,1.0,0.0,0.0,1.0,0.0,0.9995,0.0
fixing,overall,?A3∅A4,gpt-5-nano,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0003,1.0,0.8542
fixing,overall,?A3∅A4,deepseek-reasoner,0.0,0.995,0.0,1.0,0.7743,1.0,0.0,0.0,0.9999,1.0,0.9944,1.0,0.0,1.0,1.0,0.0,1.0,0.0
fixing,overall,?A3∅A4,gemini-2.5-pro,0.0,0.0005,0.0,0.0,0.0,0.0075,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.1514,0.0
fixing,overall,?A3∅A4,gpt-5-mini,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9998,1.0,1.0,1.0,1.0,1.0
fixing,overall,?A3∅A4,o3,0.0,0.0044,0.0,0.0031,0.0,0.0974,0.0,0.0,0.0001,0.0003,0.0,0.0009,0.0,0.0,0.8884,0.0,1.0,0.0
fixing,overall,?A3∅A4,gpt-5,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.1631,1.0,1.0,0.0,1.0,1.0
fixing,overall,?A4=A1|3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,overall,?A4=A1|3,gpt-oss:20b,0.0,1.0,0.0,0.0,0.0,1.0,0.9998,0.9991,1.0,1.0,1.0,1.0,0.0,0.0338,1.0,0.0001,1.0,0.9629
fixing,overall,?A4=A1|3,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,overall,?A4=A1|3,mistral-small:24b,0.0,1.0,0.0,1.0,0.6457,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9876,1.0,1.0,0.999,1.0,1.0
fixing,overall,?A4=A1|3,llama3.1:70b,0.0,1.0,0.0,0.3759,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9722,1.0,1.0,0.9969,1.0,1.0
fixing,overall,?A4=A1|3,gemini-2.0-flash,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.8003,1.0,0.0133,1.0,0.0,0.0,0.799,0.0,0.9974,0.0
fixing,overall,?A4=A1|3,gpt-4.1-mini-2025-04-14,0.0,0.0002,0.0,0.0,0.0,1.0,1.0,0.2335,1.0,1.0,0.9573,1.0,0.0,0.0,1.0,0.0,1.0,0.0053
fixing,overall,?A4=A1|3,gpt-4o,0.0,0.0013,0.0,0.0,0.0,1.0,0.7854,1.0,1.0,1.0,0.993,1.0,0.0,0.0,1.0,0.0,1.0,0.038
fixing,overall,?A4=A1|3,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.2191,0.0,0.0,1.0,1.0,0.001,1.0,0.0,0.0,0.5136,0.0,0.9746,0.0
fixing,overall,?A4=A1|3,grok-3-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0005,0.0
fixing,overall,?A4=A1|3,deepseek-chat,0.0,0.0,0.0,0.0,0.0,0.9888,0.049,0.0084,0.9992,1.0,1.0,1.0,0.0,0.0,0.9989,0.0,1.0,0.0
fixing,overall,?A4=A1|3,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
fixing,overall,?A4=A1|3,gpt-5-nano,0.0,1.0,0.0,0.0144,0.0318,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9999,1.0,0.8126,1.0,1.0
fixing,overall,?A4=A1|3,deepseek-reasoner,0.0,0.9721,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0002,1.0,1.0,0.003,1.0,1.0
fixing,overall,?A4=A1|3,gemini-2.5-pro,0.0,0.0,0.0,0.0,0.0,0.2204,0.0,0.0,0.5136,1.0,0.0014,1.0,0.0,0.0,1.0,0.0,0.9737,0.0
fixing,overall,?A4=A1|3,gpt-5-mini,0.0,1.0,0.0,0.0012,0.0038,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.2044,0.9975,1.0,1.0,1.0,1.0
fixing,overall,?A4=A1|3,o3,0.0,0.0,0.0,0.0,0.0,0.0032,0.0,0.0,0.0297,0.9996,0.0,1.0,0.0,0.0,0.0308,0.0,1.0,0.0
fixing,overall,?A4=A1|3,gpt-5,0.0,0.0448,0.0,0.0,0.0,1.0,0.9956,0.9667,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0
fixing,qawiki,?A1=A2,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,qawiki,?A1=A2,gpt-oss:20b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,qawiki,?A1=A2,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,0.9995,0.9716,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.984,1.0,1.0,1.0,1.0
fixing,qawiki,?A1=A2,mistral-small:24b,0.0,1.0,0.0012,1.0,0.0556,0.9799,0.9191,0.9999,0.985,1.0,0.9977,1.0,0.97,0.1073,0.985,1.0,0.8432,1.0
fixing,qawiki,?A1=A2,llama3.1:70b,0.0,1.0,0.048,0.9686,1.0,1.0,0.9992,1.0,1.0,1.0,1.0,1.0,0.9998,0.6506,0.9999,1.0,0.9964,1.0
fixing,qawiki,?A1=A2,gemini-2.0-flash,0.0,1.0,0.0,0.038,0.0001,1.0,0.3136,0.98,0.6196,0.9865,0.8785,0.998,0.5573,0.0009,0.617,0.9901,0.2135,0.9998
fixing,qawiki,?A1=A2,gpt-4.1-mini-2025-04-14,0.0,1.0,0.0,0.1264,0.0019,0.7912,1.0,0.9967,0.8256,0.9981,0.9519,0.9998,0.7478,0.0043,0.7865,0.9984,0.3991,1.0
fixing,qawiki,?A1=A2,gpt-4o,0.0,1.0,0.0,0.0003,0.0,0.0395,0.008,1.0,0.0481,0.6864,0.1856,0.7443,0.0235,0.0,0.0557,0.7443,0.0023,0.9393
fixing,qawiki,?A1=A2,gpt-4.1-2025-04-14,0.0,1.0,0.0,0.029,0.0001,0.5,0.2664,0.9756,1.0,0.9931,0.8198,0.9931,0.5,0.0004,0.5598,0.9907,0.1659,0.9995
fixing,qawiki,?A1=A2,grok-3-mini,0.0,1.0,0.0,0.0001,0.0,0.0267,0.0047,0.4357,0.0168,1.0,0.1055,0.6286,0.0244,0.0,0.0244,0.6321,0.0005,0.8923
fixing,qawiki,?A1=A2,deepseek-chat,0.0,1.0,0.0,0.0052,0.0,0.2025,0.0871,0.8837,0.2712,0.9414,1.0,0.9712,0.2483,0.0,0.2712,0.9481,0.0524,0.9953
fixing,qawiki,?A1=A2,gemini-2.5-flash,0.0,1.0,0.0,0.0,0.0,0.0057,0.0008,0.3714,0.0158,0.5,0.0607,1.0,0.0098,0.0,0.01,0.5679,0.0011,0.8852
fixing,qawiki,?A1=A2,gpt-5-nano,0.0,1.0,0.0,0.0517,0.0004,0.5573,0.3444,0.99,0.6043,0.988,0.8296,0.996,1.0,0.0009,0.6026,0.9968,0.2094,0.9998
fixing,qawiki,?A1=A2,deepseek-reasoner,0.0,1.0,0.0284,0.932,0.4487,0.9996,0.9981,1.0,0.9998,1.0,1.0,1.0,0.9996,1.0,0.9999,1.0,0.994,1.0
fixing,qawiki,?A1=A2,gemini-2.5-pro,0.0,1.0,0.0,0.029,0.0002,0.5,0.2983,0.9703,0.5598,0.9887,0.8198,0.9962,0.5,0.0003,1.0,0.988,0.1704,0.9995
fixing,qawiki,?A1=A2,gpt-5-mini,0.0,1.0,0.0,0.0,0.0,0.0201,0.0039,0.3714,0.02,0.5,0.092,0.5679,0.0083,0.0,0.0244,1.0,0.0003,0.895
fixing,qawiki,?A1=A2,o3,0.0,1.0,0.0,0.225,0.0076,0.8554,0.6955,0.9992,0.8942,0.9999,0.971,0.9996,0.8597,0.0124,0.8898,0.9999,1.0,1.0
fixing,qawiki,?A1=A2,gpt-5,0.0,1.0,0.0,0.0,0.0,0.0007,0.0001,0.1147,0.0014,0.1885,0.0119,0.2122,0.0007,0.0,0.0014,0.2024,0.0,1.0
fixing,qawiki,?A1=A3+A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,qawiki,?A1=A3+A4,gpt-oss:20b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,qawiki,?A1=A3+A4,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,qawiki,?A1=A3+A4,mistral-small:24b,0.0,1.0,0.0,1.0,0.3494,0.9987,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.7617,0.9763,1.0,1.0,1.0
fixing,qawiki,?A1=A3+A4,llama3.1:70b,0.0,1.0,0.0,0.7405,1.0,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.8892,0.9969,1.0,1.0,1.0
fixing,qawiki,?A1=A3+A4,gemini-2.0-flash,0.0,1.0,0.0,0.0028,0.0004,1.0,0.8092,0.9974,0.985,0.9991,0.9296,1.0,0.8858,0.0124,0.1909,1.0,0.9615,1.0
fixing,qawiki,?A1=A3+A4,gpt-4.1-mini-2025-04-14,0.0,1.0,0.0,0.0001,0.0,0.28,1.0,0.9923,0.9443,0.994,0.8144,1.0,0.7371,0.0032,0.068,0.9998,0.8898,1.0
fixing,qawiki,?A1=A3+A4,gpt-4o,0.0,1.0,0.0,0.0,0.0,0.0056,0.0164,1.0,0.2354,0.6706,0.0631,0.9962,0.0595,0.0,0.0003,0.9129,0.1456,0.998
fixing,qawiki,?A1=A3+A4,gpt-4.1-2025-04-14,0.0,1.0,0.0,0.0,0.0,0.029,0.0967,0.8438,1.0,0.9179,0.295,0.9999,0.2307,0.0,0.0016,0.9931,0.446,0.9997
fixing,qawiki,?A1=A3+A4,grok-3-mini,0.0,1.0,0.0,0.0,0.0,0.0022,0.0124,0.4415,0.14,1.0,0.0395,0.9947,0.0218,0.0,0.0002,0.8659,0.1102,0.9943
fixing,qawiki,?A1=A3+A4,deepseek-chat,0.0,1.0,0.0,0.0,0.0,0.1144,0.2757,0.967,0.7906,0.98,1.0,1.0,0.5,0.0001,0.0178,0.9977,0.7159,1.0
fixing,qawiki,?A1=A3+A4,gemini-2.5-flash,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0003,0.0147,0.0001,1.0,0.0,0.0,0.0,0.1481,0.0003,0.6494
fixing,qawiki,?A1=A3+A4,gpt-5-nano,0.0,1.0,0.0,0.0,0.0,0.1678,0.3518,0.9675,0.849,0.9902,0.6009,1.0,1.0,0.0002,0.0232,0.9994,0.7441,1.0
fixing,qawiki,?A1=A3+A4,deepseek-reasoner,0.0,1.0,0.0,0.3177,0.1642,0.9937,0.9985,1.0,1.0,1.0,1.0,1.0,0.9999,1.0,0.9381,1.0,0.9999,1.0
fixing,qawiki,?A1=A3+A4,gemini-2.5-pro,0.0,1.0,0.0,0.0403,0.0067,0.8698,0.9592,0.9999,0.9994,0.9999,0.9907,1.0,0.9876,0.1,1.0,1.0,0.9957,1.0
fixing,qawiki,?A1=A3+A4,gpt-5-mini,0.0,1.0,0.0,0.0,0.0,0.0001,0.0006,0.1456,0.0168,0.2148,0.0055,0.9186,0.0018,0.0,0.0,1.0,0.0182,0.9564
fixing,qawiki,?A1=A3+A4,o3,0.0,1.0,0.0,0.0,0.0,0.065,0.1704,0.9129,0.6583,0.9332,0.3877,0.9999,0.347,0.0002,0.0085,0.9912,1.0,0.9999
fixing,qawiki,?A1=A3+A4,gpt-5,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0057,0.0008,0.0144,0.0,0.5,0.0,0.0,0.0,0.0925,0.0003,1.0
fixing,qawiki,?A1>A3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,qawiki,?A1>A3,gpt-oss:20b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,qawiki,?A1>A3,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,0.9936,0.9897,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9857,0.9992,1.0,1.0,1.0
fixing,qawiki,?A1>A3,mistral-small:24b,0.0,1.0,0.0122,1.0,0.4449,0.9953,0.9991,1.0,0.9999,1.0,1.0,1.0,0.9898,0.4478,0.8432,1.0,0.9998,1.0
fixing,qawiki,?A1>A3,llama3.1:70b,0.0,1.0,0.0192,0.6611,1.0,0.9968,0.9996,1.0,1.0,1.0,1.0,1.0,0.9937,0.5505,0.9037,1.0,0.9998,1.0
fixing,qawiki,?A1>A3,gemini-2.0-flash,0.0,1.0,0.0,0.0106,0.0072,1.0,0.7912,0.9991,0.9506,0.9991,0.9832,0.9878,0.5585,0.0055,0.092,1.0,0.889,1.0
fixing,qawiki,?A1>A3,gpt-4.1-mini-2025-04-14,0.0,1.0,0.0,0.0023,0.0011,0.3136,1.0,0.9959,0.8519,0.9959,0.9564,0.9393,0.3179,0.0011,0.038,0.9998,0.7288,0.9999
fixing,qawiki,?A1>A3,gpt-4o,0.0,1.0,0.0,0.0,0.0,0.003,0.0121,1.0,0.0758,0.5927,0.221,0.2122,0.0008,0.0,0.0,0.9283,0.0494,0.9165
fixing,qawiki,?A1>A3,gpt-4.1-2025-04-14,0.0,1.0,0.0,0.0003,0.0001,0.0939,0.2434,0.968,1.0,0.9564,0.779,0.7566,0.0877,0.0001,0.004,0.998,0.43,0.998
fixing,qawiki,?A1>A3,grok-3-mini,0.0,1.0,0.0,0.0,0.0,0.003,0.0121,0.5927,0.0925,1.0,0.221,0.221,0.003,0.0,0.0,0.9283,0.0436,0.9283
fixing,qawiki,?A1>A3,deepseek-chat,0.0,1.0,0.0,0.0001,0.0,0.0365,0.0925,0.8761,0.3506,0.8761,1.0,0.5722,0.0403,0.0,0.001,0.9941,0.2366,0.9915
fixing,qawiki,?A1>A3,gemini-2.5-flash,0.0,1.0,0.0,0.0,0.0,0.0288,0.1147,0.8852,0.3642,0.8761,0.5722,1.0,0.0326,0.0,0.0006,0.9887,0.221,0.9887
fixing,qawiki,?A1>A3,gpt-5-nano,0.0,1.0,0.0,0.0198,0.0124,0.5585,0.7852,0.9998,0.9552,0.9991,0.9808,0.9856,1.0,0.0072,0.092,1.0,0.8785,1.0
fixing,qawiki,?A1>A3,deepseek-reasoner,0.0,1.0,0.0252,0.653,0.5505,0.9977,0.9996,1.0,1.0,1.0,1.0,1.0,0.9968,1.0,0.9076,1.0,1.0,1.0
fixing,qawiki,?A1>A3,gemini-2.5-pro,0.0,1.0,0.0018,0.225,0.1488,0.9481,0.9799,1.0,0.9985,1.0,0.9996,0.9998,0.9481,0.1446,1.0,1.0,0.9937,1.0
fixing,qawiki,?A1>A3,gpt-5-mini,0.0,1.0,0.0,0.0,0.0,0.0,0.0007,0.1662,0.0073,0.1662,0.0207,0.032,0.0,0.0,0.0,1.0,0.002,0.623
fixing,qawiki,?A1>A3,o3,0.0,1.0,0.0,0.0005,0.0005,0.1802,0.3804,0.9786,0.7017,0.9822,0.8595,0.8761,0.1908,0.0001,0.0135,0.9995,1.0,0.9992
fixing,qawiki,?A1>A3,gpt-5,0.0,1.0,0.0,0.0,0.0,0.0002,0.0005,0.1796,0.0073,0.1662,0.0262,0.032,0.0,0.0,0.0,0.623,0.003,1.0
fixing,qawiki,?A1>A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,qawiki,?A1>A4,gpt-oss:20b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,qawiki,?A1>A4,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,qawiki,?A1>A4,mistral-small:24b,0.0,1.0,0.0,1.0,0.0057,0.6494,1.0,0.9622,0.9964,1.0,0.9867,0.9987,0.9978,0.0769,0.4321,0.9996,0.7634,1.0
fixing,qawiki,?A1>A4,llama3.1:70b,0.0,1.0,0.0,0.998,1.0,0.9983,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.8544,0.9853,1.0,0.9988,1.0
fixing,qawiki,?A1>A4,gemini-2.0-flash,0.0,1.0,0.0,0.5,0.0047,1.0,0.9999,0.9461,0.9941,1.0,0.9738,0.9987,0.9964,0.0494,0.3555,0.9993,0.7077,1.0
fixing,qawiki,?A1>A4,gpt-4.1-mini-2025-04-14,0.0,1.0,0.0,0.0001,0.0,0.0007,1.0,0.0461,0.1445,0.8125,0.073,0.3633,0.2539,0.0,0.0001,0.5,0.0038,0.8125
fixing,qawiki,?A1>A4,gpt-4o,0.0,1.0,0.0,0.0843,0.0001,0.1148,0.9888,1.0,0.8338,0.9968,0.6855,0.9408,0.8949,0.0007,0.0436,0.9713,0.1796,0.9968
fixing,qawiki,?A1>A4,gpt-4.1-2025-04-14,0.0,1.0,0.0,0.0133,0.0,0.0207,0.9648,0.3145,1.0,0.9805,0.3953,0.8281,0.7095,0.0001,0.0073,0.8867,0.0577,0.9805
fixing,qawiki,?A1>A4,grok-3-mini,0.0,1.0,0.0,0.0002,0.0,0.0004,0.5,0.0193,0.0898,1.0,0.0327,0.1875,0.1445,0.0,0.0001,0.3438,0.0005,0.6875
fixing,qawiki,?A1>A4,deepseek-chat,0.0,1.0,0.0,0.0392,0.0,0.0669,0.9807,0.5,0.788,0.9941,1.0,0.9102,0.8666,0.0003,0.0261,0.9539,0.1537,0.9941
fixing,qawiki,?A1>A4,gemini-2.5-flash,0.0,1.0,0.0,0.0053,0.0,0.0059,0.8555,0.1509,0.377,0.9688,0.212,1.0,0.5,0.0,0.0013,0.7461,0.0207,0.9375
fixing,qawiki,?A1>A4,gpt-5-nano,0.0,1.0,0.0,0.0085,0.0,0.0133,0.9102,0.2272,0.5,0.9648,0.2905,0.7256,1.0,0.0,0.0033,0.8281,0.0245,0.9844
fixing,qawiki,?A1>A4,deepseek-reasoner,0.0,1.0,0.0,0.9597,0.2257,0.9765,1.0,0.9998,1.0,1.0,0.9999,1.0,1.0,1.0,0.9179,1.0,0.9932,1.0
fixing,qawiki,?A1>A4,gemini-2.5-pro,0.0,1.0,0.0,0.6962,0.0297,0.7709,1.0,0.9822,0.998,1.0,0.9904,0.9998,0.9992,0.14,1.0,0.9999,0.8595,1.0
fixing,qawiki,?A1>A4,gpt-5-mini,0.0,1.0,0.0,0.0022,0.0,0.0036,0.7734,0.0898,0.2744,0.8906,0.1334,0.5,0.377,0.0,0.0008,1.0,0.0096,0.8906
fixing,qawiki,?A1>A4,o3,0.0,1.0,0.0,0.3601,0.0033,0.4278,0.9993,0.9165,0.9793,1.0,0.9242,0.9941,0.9936,0.0175,0.2366,0.9978,1.0,0.9999
fixing,qawiki,?A1>A4,gpt-5,0.0,1.0,0.0,0.0001,0.0,0.0004,0.5,0.0193,0.0898,0.6875,0.0327,0.2266,0.1094,0.0,0.0001,0.3438,0.0012,1.0
fixing,qawiki,?A3∅A4,llama3.1:8b,1.0,1.0,0.3073,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9157,1.0,1.0,0.8432,1.0,0.2752
fixing,qawiki,?A3∅A4,gpt-oss:20b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,qawiki,?A3∅A4,gpt-4.1-nano-2025-04-14,0.775,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9734,1.0,1.0,0.9349,1.0,0.5
fixing,qawiki,?A3∅A4,mistral-small:24b,0.0,1.0,0.0,1.0,0.0262,0.3953,0.0133,0.1051,0.0207,0.212,0.3872,0.1662,0.0,0.0577,0.9102,0.0,0.7256,0.0
fixing,qawiki,?A3∅A4,llama3.1:70b,0.0,1.0,0.0,0.9915,1.0,0.9793,0.5,0.8463,0.5841,0.9331,0.9738,0.9165,0.0001,0.7383,0.9999,0.0001,0.9978,0.0
fixing,qawiki,?A3∅A4,gemini-2.0-flash,0.0,1.0,0.0,0.788,0.0577,1.0,0.0392,0.2272,0.0481,0.3872,0.6047,0.2905,0.0,0.1189,0.9922,0.0,0.9102,0.0
fixing,qawiki,?A3∅A4,gpt-4.1-mini-2025-04-14,0.0,1.0,0.0,0.9964,0.6612,0.9867,1.0,0.9054,0.6964,0.9534,0.9827,0.9519,0.0002,0.7976,0.9998,0.0001,0.9987,0.0
fixing,qawiki,?A3∅A4,gpt-4o,0.0,1.0,0.0,0.9616,0.2706,0.8949,0.1917,1.0,0.2706,0.7597,0.8811,0.6682,0.0,0.4073,0.9963,0.0,0.998,0.0
fixing,qawiki,?A3∅A4,gpt-4.1-2025-04-14,0.0,1.0,0.0,0.9941,0.5841,0.9846,0.5,0.8463,1.0,0.9423,0.9793,0.9673,0.0001,0.7383,0.9999,0.0,0.9978,0.0
fixing,qawiki,?A3∅A4,grok-3-mini,0.0,1.0,0.0,0.9102,0.1431,0.8062,0.105,0.4073,0.1316,1.0,0.7728,0.5,0.0,0.2517,0.9941,0.0,0.9805,0.0
fixing,qawiki,?A3∅A4,deepseek-chat,0.0,1.0,0.0,0.8062,0.0669,0.6047,0.0466,0.2403,0.0577,0.4018,1.0,0.3145,0.0,0.1189,0.9673,0.0,0.8867,0.0
fixing,qawiki,?A3∅A4,gemini-2.5-flash,0.0,1.0,0.0,0.9283,0.1796,0.8666,0.1189,0.5,0.1133,0.6964,0.8338,1.0,0.0,0.3238,0.9968,0.0,0.9713,0.0
fixing,qawiki,?A3∅A4,gpt-5-nano,0.1302,1.0,0.0456,1.0,1.0,1.0,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.3877,1.0,0.0083
fixing,qawiki,?A3∅A4,deepseek-reasoner,0.0,1.0,0.0,0.9793,0.4159,0.9519,0.3388,0.7597,0.4159,0.8684,0.9519,0.8204,0.0,1.0,0.9988,0.0,0.9963,0.0
fixing,qawiki,?A3∅A4,gemini-2.5-pro,0.0,1.0,0.0,0.2539,0.0012,0.0625,0.0013,0.0176,0.0012,0.0327,0.1133,0.0193,0.0,0.0064,1.0,0.0,0.3633,0.0
fixing,qawiki,?A3∅A4,gpt-5-mini,0.225,1.0,0.1003,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.7159,1.0,1.0,1.0,1.0,0.046
fixing,qawiki,?A3∅A4,o3,0.0,1.0,0.0,0.5,0.0096,0.2539,0.0059,0.0195,0.0096,0.0898,0.2744,0.0898,0.0,0.0176,0.8555,0.0,1.0,0.0
fixing,qawiki,?A3∅A4,gpt-5,0.7985,1.0,0.6043,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9968,1.0,1.0,0.9756,1.0,1.0
fixing,qawiki,?A4=A1|3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,qawiki,?A4=A1|3,gpt-oss:20b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,qawiki,?A4=A1|3,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,qawiki,?A4=A1|3,mistral-small:24b,0.0,1.0,0.0,1.0,0.1605,0.9985,0.993,1.0,0.9999,1.0,1.0,1.0,0.3073,0.595,0.9748,0.9544,1.0,0.8019
fixing,qawiki,?A4=A1|3,llama3.1:70b,0.0,1.0,0.0,0.8927,1.0,1.0,0.9999,1.0,1.0,1.0,1.0,1.0,0.7371,0.9124,0.9996,0.9979,1.0,0.985
fixing,qawiki,?A4=A1|3,gemini-2.0-flash,0.0,1.0,0.0,0.0032,0.0,1.0,0.2757,0.9836,0.8042,0.9863,0.9037,1.0,0.0001,0.0034,0.1909,0.1108,0.9225,0.015
fixing,qawiki,?A4=A1|3,gpt-4.1-mini-2025-04-14,0.0,1.0,0.0,0.0136,0.0002,0.8144,1.0,0.9984,0.9405,0.9966,0.9799,1.0,0.0021,0.0207,0.4036,0.2693,0.9802,0.068
fixing,qawiki,?A4=A1|3,gpt-4o,0.0,1.0,0.0,0.0,0.0,0.03,0.0038,1.0,0.0924,0.5531,0.1856,0.9508,0.0,0.0,0.0035,0.0004,0.2204,0.0
fixing,qawiki,?A4=A1|3,gpt-4.1-2025-04-14,0.0,1.0,0.0,0.0003,0.0,0.2841,0.1013,0.9444,1.0,0.9573,0.7478,1.0,0.0,0.0004,0.0556,0.024,0.775,0.0009
fixing,qawiki,?A4=A1|3,grok-3-mini,0.0,1.0,0.0,0.0,0.0,0.0259,0.007,0.5531,0.0762,1.0,0.1958,0.9702,0.0,0.0,0.0044,0.0004,0.2559,0.0
fixing,qawiki,?A4=A1|3,deepseek-chat,0.0,1.0,0.0,0.0,0.0,0.1488,0.038,0.8837,0.3444,0.8736,1.0,0.9961,0.0,0.0,0.0249,0.0028,0.6146,0.0002
fixing,qawiki,?A4=A1|3,gemini-2.5-flash,0.0,1.0,0.0,0.0,0.0,0.0001,0.0,0.0845,0.0002,0.0586,0.0088,1.0,0.0,0.0,0.0,0.0,0.0135,0.0
fixing,qawiki,?A4=A1|3,gpt-5-nano,0.0,1.0,0.0,0.775,0.3518,1.0,0.9991,1.0,1.0,1.0,1.0,1.0,1.0,0.8169,0.9966,0.9948,1.0,0.967
fixing,qawiki,?A4=A1|3,deepseek-reasoner,0.0,1.0,0.0,0.5,0.1339,0.9985,0.9888,1.0,0.9999,1.0,1.0,1.0,0.2595,1.0,0.9785,0.9593,1.0,0.775
fixing,qawiki,?A4=A1|3,gemini-2.5-pro,0.0,1.0,0.0,0.0423,0.0009,0.8698,0.6873,0.9983,0.9686,0.9978,0.9864,1.0,0.007,0.0385,1.0,0.405,0.9867,0.1375
fixing,qawiki,?A4=A1|3,gpt-5-mini,0.0,1.0,0.0,0.074,0.0046,0.929,0.8055,0.9998,0.9876,0.9998,0.999,1.0,0.0111,0.0704,0.6848,1.0,0.9952,0.1958
fixing,qawiki,?A4=A1|3,o3,0.0,1.0,0.0,0.0,0.0,0.1225,0.0361,0.86,0.3073,0.8209,0.5,0.9937,0.0,0.0001,0.0237,0.0093,1.0,0.0005
fixing,qawiki,?A4=A1|3,gpt-5,0.0,1.0,0.0,0.2723,0.029,0.9924,0.9592,1.0,0.9997,1.0,0.9999,1.0,0.0631,0.3073,0.9091,0.8736,0.9998,1.0
fixing,spinach,?A1=A2,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,spinach,?A1=A2,gpt-oss:20b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,spinach,?A1=A2,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,0.7693,0.0015,0.6101,0.9931,1.0,0.9997,0.9995,0.9519,0.9978,0.9799,0.0166,0.8642,0.9995,0.8341,1.0
fixing,spinach,?A1=A2,mistral-small:24b,0.0,1.0,0.3294,1.0,0.0,0.3776,0.9712,1.0,0.9992,0.998,0.8659,0.9947,0.9519,0.0023,0.7288,0.9985,0.6641,1.0
fixing,spinach,?A1=A2,llama3.1:70b,0.0,1.0,0.9993,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.8942,1.0,1.0,1.0,1.0
fixing,spinach,?A1=A2,gemini-2.0-flash,0.0,1.0,0.5,0.7336,0.0002,1.0,0.9881,1.0,0.9999,0.9999,0.9459,0.9975,0.98,0.0066,0.86,0.9988,0.8092,1.0
fixing,spinach,?A1=A2,gpt-4.1-mini-2025-04-14,0.0,1.0,0.0168,0.0607,0.0,0.0266,1.0,0.9998,0.9738,0.9242,0.2858,0.7634,0.566,0.0,0.1553,0.9075,0.1279,0.997
fixing,spinach,?A1=A2,gpt-4o,0.0,1.0,0.0,0.0,0.0,0.0,0.0013,1.0,0.0946,0.0318,0.0003,0.0085,0.003,0.0,0.0001,0.0392,0.0,0.4018
fixing,spinach,?A1=A2,gpt-4.1-2025-04-14,0.0,1.0,0.0008,0.0025,0.0,0.0005,0.0669,0.9608,1.0,0.4194,0.0178,0.2366,0.0925,0.0,0.0083,0.4194,0.0083,0.9054
fixing,spinach,?A1=A2,grok-3-mini,0.0,1.0,0.0014,0.0057,0.0,0.0004,0.1537,0.9904,0.7294,1.0,0.0494,0.3506,0.2088,0.0,0.0205,0.57,0.0144,0.9461
fixing,spinach,?A1=A2,deepseek-chat,0.0,1.0,0.0871,0.2148,0.0,0.0998,0.8275,0.9999,0.9937,0.9786,1.0,0.9123,0.7743,0.0,0.3714,0.9635,0.2983,0.9998
fixing,spinach,?A1=A2,gemini-2.5-flash,0.0,1.0,0.0058,0.0147,0.0,0.0069,0.3601,0.9978,0.8595,0.779,0.1553,1.0,0.3679,0.0,0.0494,0.779,0.0401,0.9855
fixing,spinach,?A1=A2,gpt-5-nano,0.0,1.0,0.038,0.0871,0.0,0.0395,0.566,0.9992,0.9564,0.8721,0.3258,0.7502,1.0,0.0,0.1684,0.9075,0.1215,0.997
fixing,spinach,?A1=A2,deepseek-reasoner,0.0,1.0,0.9918,0.9991,0.1659,0.9972,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9999,1.0,0.9997,1.0
fixing,spinach,?A1=A2,gemini-2.5-pro,0.0,1.0,0.2051,0.3804,0.0,0.2204,0.9123,1.0,0.997,0.9917,0.7443,0.9786,0.9002,0.0004,1.0,0.9881,0.5,0.9999
fixing,spinach,?A1=A2,gpt-5-mini,0.0,1.0,0.0014,0.0045,0.0,0.0033,0.1725,0.9867,0.7294,0.57,0.0717,0.3506,0.1725,0.0,0.0266,1.0,0.0122,0.9608
fixing,spinach,?A1=A2,o3,0.0,1.0,0.2442,0.4439,0.0001,0.28,0.9283,1.0,0.9968,0.9943,0.8115,0.9825,0.9338,0.0009,0.6254,0.9955,1.0,0.9999
fixing,spinach,?A1=A2,gpt-5,0.0,1.0,0.0,0.0001,0.0,0.0,0.0096,0.7728,0.1917,0.1148,0.0008,0.0378,0.0096,0.0,0.0004,0.0946,0.0003,1.0
fixing,spinach,?A1=A3+A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,spinach,?A1=A3+A4,gpt-oss:20b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,spinach,?A1=A3+A4,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,0.8825,0.9822,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9535,1.0,1.0,1.0,1.0
fixing,spinach,?A1=A3+A4,mistral-small:24b,0.0,1.0,0.1712,1.0,0.8432,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.7681,1.0,1.0,1.0,1.0
fixing,spinach,?A1=A3+A4,llama3.1:70b,0.0,1.0,0.032,0.225,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9999,0.4487,1.0,1.0,1.0,1.0
fixing,spinach,?A1=A3+A4,gemini-2.0-flash,0.0,1.0,0.0,0.0,0.0,1.0,0.3746,0.7949,0.4373,0.9832,0.322,0.9931,0.3389,0.0,0.86,0.6611,0.5,0.9558
fixing,spinach,?A1=A3+A4,gpt-4.1-mini-2025-04-14,0.0,1.0,0.0,0.0,0.0,0.7388,1.0,0.9033,0.6358,0.9977,0.5,0.9978,0.5,0.0,0.9281,0.8092,0.6742,0.99
fixing,spinach,?A1=A3+A4,gpt-4o,0.0,1.0,0.0,0.0,0.0,0.2916,0.1562,1.0,0.1856,0.889,0.1163,0.9506,0.1215,0.0,0.6146,0.3776,0.2257,0.8256
fixing,spinach,?A1=A3+A4,gpt-4.1-2025-04-14,0.0,1.0,0.0,0.0,0.0,0.6821,0.5,0.8837,1.0,0.9974,0.434,0.996,0.4449,0.0,0.908,0.7601,0.6286,0.9756
fixing,spinach,?A1=A3+A4,grok-3-mini,0.0,1.0,0.0,0.0,0.0,0.0365,0.0068,0.1802,0.0081,1.0,0.0069,0.7294,0.0113,0.0,0.1958,0.0967,0.0121,0.4321
fixing,spinach,?A1=A3+A4,deepseek-chat,0.0,1.0,0.0,0.0,0.0001,0.7796,0.617,0.9324,0.6911,0.9975,1.0,0.9982,0.5551,0.0,0.9481,0.8169,0.7243,0.9853
fixing,spinach,?A1=A3+A4,gemini-2.5-flash,0.0,1.0,0.0,0.0,0.0,0.0168,0.0058,0.0939,0.0098,0.4194,0.0048,1.0,0.004,0.0,0.1002,0.0519,0.0119,0.2983
fixing,spinach,?A1=A3+A4,gpt-5-nano,0.0,1.0,0.0,0.0,0.0003,0.7558,0.6146,0.9281,0.6611,0.9952,0.5551,0.9986,1.0,0.0001,0.9405,0.86,0.7159,0.9974
fixing,spinach,?A1=A3+A4,deepseek-reasoner,0.0,1.0,0.0728,0.3127,0.6506,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,spinach,?A1=A3+A4,gemini-2.5-pro,0.0,1.0,0.0,0.0,0.0,0.2204,0.1215,0.5,0.151,0.8853,0.092,0.9506,0.1013,0.0,1.0,0.3417,0.1684,0.7852
fixing,spinach,?A1=A3+A4,gpt-5-mini,0.0,1.0,0.0,0.0,0.0,0.4449,0.28,0.7336,0.3359,0.9443,0.2595,0.973,0.2204,0.0,0.7517,1.0,0.3899,0.9283
fixing,spinach,?A1=A3+A4,o3,0.0,1.0,0.0,0.0,0.0,0.6196,0.4402,0.8544,0.5,0.9959,0.383,0.9953,0.3877,0.0,0.9002,0.7121,1.0,0.9765
fixing,spinach,?A1=A3+A4,gpt-5,0.0,1.0,0.0,0.0,0.0,0.0821,0.0235,0.2664,0.0481,0.6962,0.0297,0.8115,0.0081,0.0,0.3179,0.1279,0.0494,1.0
fixing,spinach,?A1>A3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,spinach,?A1>A3,gpt-oss:20b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,spinach,?A1>A3,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,0.9565,0.9648,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.705,0.9968,1.0,1.0,1.0
fixing,spinach,?A1>A3,mistral-small:24b,0.0,1.0,0.074,1.0,0.57,1.0,1.0,1.0,1.0,1.0,0.9999,0.9992,0.9981,0.1488,0.9002,1.0,1.0,1.0
fixing,spinach,?A1>A3,llama3.1:70b,0.0,1.0,0.0632,0.57,1.0,1.0,1.0,1.0,1.0,1.0,0.9999,0.9997,0.9985,0.1403,0.9002,1.0,1.0,1.0
fixing,spinach,?A1>A3,gemini-2.0-flash,0.0,1.0,0.0,0.0001,0.0001,1.0,0.5747,0.9331,0.9867,0.8365,0.5747,0.1537,0.2434,0.0,0.0023,0.8675,0.9894,0.9793
fixing,spinach,?A1>A3,gpt-4.1-mini-2025-04-14,0.0,1.0,0.0,0.0001,0.0,0.5747,1.0,0.9423,0.9867,0.8684,0.5806,0.1725,0.2291,0.0,0.0038,0.8852,0.9738,0.9846
fixing,spinach,?A1>A3,gpt-4o,0.0,1.0,0.0,0.0,0.0,0.1431,0.1316,1.0,0.8491,0.3953,0.1316,0.0113,0.0261,0.0,0.0001,0.5,0.7597,0.7597
fixing,spinach,?A1>A3,gpt-4.1-2025-04-14,0.0,1.0,0.0,0.0,0.0,0.0392,0.0392,0.3036,1.0,0.1796,0.0392,0.003,0.0047,0.0,0.0,0.2403,0.5,0.5
fixing,spinach,?A1>A3,grok-3-mini,0.0,1.0,0.0,0.0,0.0,0.2786,0.2517,0.788,0.9165,1.0,0.2517,0.032,0.061,0.0,0.0003,0.6612,0.8684,0.8684
fixing,spinach,?A1>A3,deepseek-chat,0.0,1.0,0.0,0.0003,0.0003,0.5747,0.5806,0.9423,0.9867,0.8684,1.0,0.1885,0.2498,0.0,0.0058,0.8852,0.968,0.9622
fixing,spinach,?A1>A3,gemini-2.5-flash,0.0,1.0,0.0001,0.0025,0.0011,0.9242,0.9075,0.9967,0.9992,0.9887,0.8923,1.0,0.6286,0.0001,0.0401,0.9825,0.9988,0.9974
fixing,spinach,?A1>A3,gpt-5-nano,0.0,1.0,0.0,0.0047,0.004,0.8519,0.8675,0.9904,0.9988,0.9739,0.8447,0.5,1.0,0.0001,0.0557,0.9915,0.997,0.998
fixing,spinach,?A1>A3,deepseek-reasoner,0.0,1.0,0.3939,0.9037,0.9115,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9863,1.0,1.0,1.0
fixing,spinach,?A1>A3,gemini-2.5-pro,0.0,1.0,0.0072,0.1684,0.1684,0.9993,0.9987,1.0,1.0,0.9999,0.9978,0.9825,0.9703,0.0259,1.0,0.9999,1.0,1.0
fixing,spinach,?A1>A3,gpt-5-mini,0.0,1.0,0.0,0.0,0.0,0.2291,0.2122,0.6682,0.8811,0.5,0.2122,0.0401,0.0262,0.0,0.0002,1.0,0.8338,0.8204
fixing,spinach,?A1>A3,o3,0.0,1.0,0.0,0.0,0.0,0.0384,0.0669,0.4073,0.6855,0.2517,0.0758,0.0047,0.0096,0.0,0.0,0.3145,1.0,0.5927
fixing,spinach,?A1>A3,gpt-5,0.0,1.0,0.0,0.0,0.0,0.0577,0.0481,0.4073,0.6855,0.2517,0.0843,0.0081,0.0073,0.0,0.0001,0.3238,0.5927,1.0
fixing,spinach,?A1>A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,spinach,?A1>A4,gpt-oss:20b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,spinach,?A1>A4,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,1.0,0.9961,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9993,1.0,1.0,1.0,1.0
fixing,spinach,?A1>A4,mistral-small:24b,0.0,1.0,0.0001,1.0,0.1553,0.9519,1.0,0.9996,0.9738,1.0,0.9993,0.9998,1.0,0.2983,0.9998,0.9693,0.9461,0.9998
fixing,spinach,?A1>A4,llama3.1:70b,0.0,1.0,0.0088,0.9123,1.0,0.9932,1.0,1.0,0.9988,1.0,0.9998,1.0,1.0,0.7336,1.0,0.9993,0.9996,1.0
fixing,spinach,?A1>A4,gemini-2.0-flash,0.0,1.0,0.0,0.1189,0.0175,1.0,0.9991,0.9846,0.788,0.9998,0.9616,0.9935,0.9995,0.0378,0.9963,0.8204,0.6682,0.9936
fixing,spinach,?A1>A4,gpt-4.1-mini-2025-04-14,0.0,1.0,0.0,0.0002,0.0,0.0065,1.0,0.3438,0.0193,0.6875,0.1445,0.3438,0.875,0.0,0.5,0.0327,0.0112,0.5
fixing,spinach,?A1>A4,gpt-4o,0.0,1.0,0.0,0.0022,0.0001,0.0481,0.8906,1.0,0.1051,0.8555,0.377,0.6367,0.9375,0.0005,0.7461,0.1334,0.0461,0.7734
fixing,spinach,?A1>A4,gpt-4.1-2025-04-14,0.0,1.0,0.0,0.0669,0.0041,0.3953,0.9968,0.9616,1.0,0.999,0.927,0.9713,0.9995,0.0113,0.9824,0.6964,0.5,0.9888
fixing,spinach,?A1>A4,grok-3-mini,0.0,1.0,0.0,0.0002,0.0,0.0032,0.6875,0.3633,0.0107,1.0,0.1445,0.3438,0.875,0.0,0.5,0.0327,0.0112,0.5
fixing,spinach,?A1>A4,deepseek-chat,0.0,1.0,0.0,0.0038,0.0007,0.1051,0.9648,0.8281,0.1938,0.9648,1.0,0.8555,0.9805,0.0004,0.8867,0.3145,0.1662,0.8867
fixing,spinach,?A1>A4,gemini-2.5-flash,0.0,1.0,0.0,0.0013,0.0001,0.0287,0.8906,0.6367,0.0898,0.8906,0.3633,1.0,0.9375,0.0003,0.7734,0.1509,0.0461,0.7461
fixing,spinach,?A1>A4,gpt-5-nano,0.0,1.0,0.0,0.0,0.0,0.0037,0.5,0.2266,0.0059,0.5,0.0898,0.2266,1.0,0.0,0.3438,0.0107,0.0065,0.3125
fixing,spinach,?A1>A4,deepseek-reasoner,0.0,1.0,0.0019,0.8115,0.3776,0.9855,1.0,0.9999,0.9967,1.0,0.9999,1.0,1.0,1.0,1.0,0.9947,0.9825,1.0
fixing,spinach,?A1>A4,gemini-2.5-pro,0.0,1.0,0.0,0.0013,0.0,0.0176,0.7734,0.5,0.0592,0.8125,0.2744,0.5,0.8906,0.0002,1.0,0.0898,0.0287,0.6367
fixing,spinach,?A1>A4,gpt-5-mini,0.0,1.0,0.0,0.068,0.0026,0.3238,0.9941,0.9539,0.5,0.9941,0.8338,0.9408,0.999,0.0147,0.9713,1.0,0.4018,0.9893
fixing,spinach,?A1>A4,o3,0.0,1.0,0.0,0.1148,0.0022,0.5,0.9983,0.9888,0.6762,0.9983,0.9283,0.9888,0.9991,0.0401,0.9935,0.7728,1.0,0.9894
fixing,spinach,?A1>A4,gpt-5,0.0,1.0,0.0,0.0013,0.0,0.0245,0.8125,0.5,0.0461,0.8125,0.2744,0.5,0.9375,0.0002,0.6367,0.0547,0.0384,1.0
fixing,spinach,?A3∅A4,llama3.1:8b,1.0,1.0,0.8661,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9999,1.0,1.0
fixing,spinach,?A3∅A4,gpt-oss:20b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,spinach,?A3∅A4,gpt-4.1-nano-2025-04-14,0.1945,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9998,1.0,1.0,0.9948,1.0,0.9989
fixing,spinach,?A3∅A4,mistral-small:24b,0.0,1.0,0.0,1.0,0.8666,0.927,0.8666,0.6964,1.0,0.9893,0.788,0.927,0.0003,0.0669,0.9805,0.0,0.8666,0.0
fixing,spinach,?A3∅A4,llama3.1:70b,0.0,1.0,0.0,0.2905,1.0,0.7461,0.623,0.3872,1.0,0.9375,0.5,0.7734,0.0,0.0064,0.8555,0.0,0.6367,0.0
fixing,spinach,?A3∅A4,gemini-2.0-flash,0.0,1.0,0.0,0.1938,0.5,1.0,0.5,0.2744,1.0,0.8906,0.377,0.6367,0.0,0.0038,0.875,0.0,0.5,0.0
fixing,spinach,?A3∅A4,gpt-4.1-mini-2025-04-14,0.0,1.0,0.0,0.2905,0.623,0.7461,1.0,0.3872,1.0,0.9375,0.5,0.7461,0.0,0.0096,0.8555,0.0,0.623,0.0
fixing,spinach,?A3∅A4,gpt-4o,0.0,1.0,0.0,0.5,0.8062,0.8867,0.8062,1.0,1.0,0.9922,0.7256,0.8867,0.0001,0.0392,0.9453,0.0,0.8555,0.0
fixing,spinach,?A3∅A4,gpt-4.1-2025-04-14,0.0,1.0,0.0,0.0039,0.0312,0.0625,0.0312,0.0078,1.0,0.25,0.0156,0.0625,0.0,0.0,0.125,0.0,0.0312,0.0
fixing,spinach,?A3∅A4,grok-3-mini,0.0,1.0,0.0,0.0547,0.2266,0.3438,0.2266,0.0625,1.0,1.0,0.1445,0.3125,0.0,0.0003,0.5,0.0,0.1875,0.0
fixing,spinach,?A3∅A4,deepseek-chat,0.0,1.0,0.0,0.3953,0.7256,0.8281,0.7461,0.5,1.0,0.9648,1.0,0.8281,0.0,0.0262,0.9102,0.0,0.7461,0.0
fixing,spinach,?A3∅A4,gemini-2.5-flash,0.0,1.0,0.0,0.1938,0.5,0.6367,0.5,0.2744,1.0,0.9375,0.377,1.0,0.0,0.0038,0.7734,0.0,0.5,0.0
fixing,spinach,?A3∅A4,gpt-5-nano,0.0,1.0,0.0005,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.99,1.0,0.0814,1.0,0.2858
fixing,spinach,?A3∅A4,deepseek-reasoner,0.0,1.0,0.0,0.9738,0.9988,0.9993,0.9978,0.9867,1.0,1.0,0.9915,0.9993,0.0235,1.0,0.9999,0.0008,0.9978,0.0058
fixing,spinach,?A3∅A4,gemini-2.5-pro,0.0,1.0,0.0,0.0898,0.3633,0.5,0.3633,0.1719,1.0,0.8125,0.2539,0.5,0.0,0.0012,1.0,0.0,0.3633,0.0
fixing,spinach,?A3∅A4,gpt-5-mini,0.0002,1.0,0.0103,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9599,0.9997,1.0,1.0,1.0,0.8761
fixing,spinach,?A3∅A4,o3,0.0,1.0,0.0,0.2905,0.6367,0.7461,0.623,0.3633,1.0,0.9688,0.5,0.7734,0.0,0.0096,0.8555,0.0,1.0,0.0
fixing,spinach,?A3∅A4,gpt-5,0.0,1.0,0.0023,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.8275,0.9978,1.0,0.221,1.0,1.0
fixing,spinach,?A4=A1|3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,spinach,?A4=A1|3,gpt-oss:20b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,spinach,?A4=A1|3,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,0.9992,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9995,1.0,1.0,1.0,1.0
fixing,spinach,?A4=A1|3,mistral-small:24b,0.0,1.0,0.0018,1.0,0.9,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.926,0.6899,1.0,0.932,1.0,0.9982
fixing,spinach,?A4=A1|3,llama3.1:70b,0.0,1.0,0.0,0.1528,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.6439,0.2559,1.0,0.6482,1.0,0.9822
fixing,spinach,?A4=A1|3,gemini-2.0-flash,0.0,1.0,0.0,0.0,0.0,1.0,0.3746,0.6556,0.5,0.9702,0.2712,0.9702,0.0001,0.0,0.86,0.0,0.3804,0.0019
fixing,spinach,?A4=A1|3,gpt-4.1-mini-2025-04-14,0.0,1.0,0.0,0.0,0.0,0.7388,1.0,0.7949,0.6962,0.9955,0.4402,0.9902,0.0001,0.0,0.9238,0.0001,0.5585,0.0048
fixing,spinach,?A4=A1|3,gpt-4o,0.0,1.0,0.0,0.0,0.0,0.4469,0.2916,1.0,0.3854,0.9281,0.1958,0.9369,0.0,0.0,0.7601,0.0,0.2757,0.0022
fixing,spinach,?A4=A1|3,gpt-4.1-2025-04-14,0.0,1.0,0.0,0.0,0.0,0.6254,0.4321,0.72,1.0,0.9919,0.3136,0.9756,0.0,0.0,0.8837,0.0001,0.4373,0.0033
fixing,spinach,?A4=A1|3,grok-3-mini,0.0,1.0,0.0,0.0,0.0,0.0586,0.0122,0.1215,0.0214,1.0,0.0098,0.5747,0.0,0.0,0.2434,0.0,0.01,0.0
fixing,spinach,?A4=A1|3,deepseek-chat,0.0,1.0,0.0,0.0,0.0001,0.8198,0.6742,0.8736,0.7912,0.996,1.0,0.9943,0.0002,0.0,0.9605,0.0005,0.6641,0.0164
fixing,spinach,?A4=A1|3,gemini-2.5-flash,0.0,1.0,0.0,0.0,0.0,0.0586,0.0218,0.111,0.0481,0.5747,0.0129,1.0,0.0,0.0,0.2434,0.0,0.0244,0.0
fixing,spinach,?A4=A1|3,gpt-5-nano,0.0,1.0,0.0,0.1142,0.4511,1.0,1.0,1.0,1.0,1.0,0.9999,1.0,1.0,0.1871,1.0,0.5551,1.0,0.9702
fixing,spinach,?A4=A1|3,deepseek-reasoner,0.0,1.0,0.0012,0.4022,0.8209,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.8736,1.0,1.0,0.8698,1.0,0.9977
fixing,spinach,?A4=A1|3,gemini-2.5-pro,0.0,1.0,0.0,0.0,0.0,0.2204,0.1264,0.3359,0.1856,0.8519,0.0719,0.8519,0.0,0.0,1.0,0.0,0.1055,0.0
fixing,spinach,?A4=A1|3,gpt-5-mini,0.0,1.0,0.0,0.1073,0.4495,1.0,1.0,1.0,1.0,1.0,0.9998,1.0,0.5551,0.1909,1.0,1.0,1.0,0.9573
fixing,spinach,?A4=A1|3,o3,0.0,1.0,0.0,0.0,0.0,0.7288,0.5585,0.8144,0.6821,0.9965,0.4439,0.9887,0.0001,0.0,0.9414,0.0001,1.0,0.002
fixing,spinach,?A4=A1|3,gpt-5,0.0,1.0,0.0,0.0041,0.0362,0.9993,0.9982,0.9991,0.9987,1.0,0.9923,1.0,0.0586,0.0055,1.0,0.0762,0.9994,1.0
fixing,synthetic,?A1=A2,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,synthetic,?A1=A2,gpt-oss:20b,0.0,1.0,0.0215,0.9663,0.6611,1.0,0.9991,0.9993,1.0,1.0,1.0,1.0,1.0,0.9733,0.646,1.0,0.9907,1.0
fixing,synthetic,?A1=A2,gpt-4.1-nano-2025-04-14,0.0,0.9887,1.0,1.0,0.9985,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9923,1.0,1.0,1.0
fixing,synthetic,?A1=A2,mistral-small:24b,0.0,0.0587,0.0,1.0,0.0676,0.998,0.9179,0.9324,0.9998,1.0,1.0,0.9997,0.9999,0.5573,0.1,0.9508,0.6641,0.9996
fixing,synthetic,?A1=A2,llama3.1:70b,0.0,0.4449,0.004,0.9638,1.0,1.0,0.9988,0.9987,1.0,1.0,1.0,1.0,1.0,0.954,0.5522,0.9991,0.9733,1.0
fixing,synthetic,?A1=A2,gemini-2.0-flash,0.0,0.0,0.0,0.0057,0.0,1.0,0.1215,0.1553,0.8275,0.8852,0.8852,0.8998,0.9252,0.0098,0.0002,0.2557,0.0244,0.7878
fixing,synthetic,?A1=A2,gpt-4.1-mini-2025-04-14,0.0,0.0023,0.0,0.14,0.0033,0.9338,1.0,0.617,0.9967,0.998,0.9993,0.9974,0.997,0.1341,0.004,0.7159,0.2088,0.9795
fixing,synthetic,?A1=A2,gpt-4o,0.0,0.0019,0.0,0.1163,0.0033,0.9123,0.5,1.0,0.9795,0.99,0.9919,0.9947,0.9974,0.1312,0.006,0.6864,0.1856,0.9635
fixing,synthetic,?A1=A2,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.0006,0.0,0.2858,0.0113,0.0448,1.0,0.6762,0.6855,0.7383,0.8083,0.0006,0.0,0.0814,0.0011,0.5
fixing,synthetic,?A1=A2,grok-3-mini,0.0,0.0,0.0,0.0002,0.0,0.2122,0.0073,0.0251,0.5,1.0,0.5982,0.6494,0.7294,0.0003,0.0,0.0662,0.0009,0.4194
fixing,synthetic,?A1=A2,deepseek-chat,0.0,0.0,0.0,0.0002,0.0,0.2122,0.0036,0.0214,0.5,0.5982,1.0,0.6682,0.7383,0.0002,0.0,0.0607,0.0009,0.4194
fixing,synthetic,?A1=A2,gemini-2.5-flash,0.0,0.0,0.0,0.0008,0.0,0.1808,0.0081,0.0147,0.4159,0.5,0.5,1.0,0.6762,0.0003,0.0,0.0261,0.0006,0.3506
fixing,synthetic,?A1=A2,gpt-5-nano,0.0,0.0,0.0,0.0004,0.0,0.1405,0.0083,0.0081,0.3318,0.4194,0.4159,0.5,1.0,0.0002,0.0,0.0145,0.0003,0.2517
fixing,synthetic,?A1=A2,deepseek-reasoner,0.0,0.0492,0.0001,0.5573,0.0804,0.996,0.9231,0.9196,0.9998,0.9999,1.0,0.9999,0.9999,1.0,0.1,0.9638,0.6742,0.9997
fixing,synthetic,?A1=A2,gemini-2.5-pro,0.0,0.4503,0.0147,0.9381,0.5522,0.9999,0.9985,0.9973,1.0,1.0,1.0,1.0,1.0,0.9381,1.0,0.9993,0.9686,1.0
fixing,synthetic,?A1=A2,gpt-5-mini,0.0,0.0001,0.0,0.0845,0.0023,0.838,0.3877,0.4357,0.9599,0.9674,0.9712,0.9904,0.9953,0.0676,0.0019,1.0,0.0939,0.9564
fixing,synthetic,?A1=A2,o3,0.0,0.02,0.0,0.4439,0.0492,0.9887,0.8721,0.8837,0.9997,0.9997,0.9997,0.9998,0.9999,0.4402,0.0556,0.9506,1.0,0.9997
fixing,synthetic,?A1=A2,gpt-5,0.0,0.0,0.0,0.0013,0.0,0.345,0.0448,0.0717,0.655,0.7294,0.7294,0.779,0.8684,0.0009,0.0,0.0925,0.0012,1.0
fixing,synthetic,?A1=A3+A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,synthetic,?A1=A3+A4,gpt-oss:20b,0.0,1.0,0.0,0.0084,0.3899,0.9997,0.996,0.962,1.0,1.0,1.0,1.0,0.98,0.383,0.9733,0.9818,1.0,1.0
fixing,synthetic,?A1=A3+A4,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,synthetic,?A1=A3+A4,mistral-small:24b,0.0,0.9959,0.0004,1.0,0.9898,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9898,1.0,1.0,1.0,1.0
fixing,synthetic,?A1=A3+A4,llama3.1:70b,0.0,0.7121,0.0,0.0198,1.0,1.0,0.9992,0.9865,1.0,1.0,1.0,1.0,0.9923,0.5573,0.9876,0.9928,1.0,1.0
fixing,synthetic,?A1=A3+A4,gemini-2.0-flash,0.0,0.0009,0.0,0.0,0.0002,1.0,0.1958,0.0494,0.8275,0.8852,0.7142,0.9904,0.0871,0.0001,0.1058,0.1055,0.9749,0.9739
fixing,synthetic,?A1=A3+A4,gpt-4.1-mini-2025-04-14,0.0,0.0093,0.0,0.0,0.0023,0.8853,1.0,0.2612,0.9786,0.9853,0.9449,0.9996,0.3038,0.0028,0.3389,0.3679,0.9993,0.9983
fixing,synthetic,?A1=A3+A4,gpt-4o,0.0,0.0668,0.0,0.0,0.0267,0.9765,0.8316,1.0,0.9977,0.9968,0.99,0.9999,0.617,0.0337,0.6061,0.6611,0.9999,0.9995
fixing,synthetic,?A1=A3+A4,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.0,0.0,0.2858,0.0494,0.0068,1.0,0.6445,0.4225,0.939,0.0063,0.0,0.0244,0.0235,0.8923,0.8761
fixing,synthetic,?A1=A3+A4,grok-3-mini,0.0,0.0,0.0,0.0,0.0,0.2122,0.0354,0.0083,0.5,1.0,0.3506,0.9423,0.01,0.0,0.0119,0.0144,0.8761,0.8463
fixing,synthetic,?A1=A3+A4,deepseek-chat,0.0,0.0001,0.0,0.0,0.0,0.4253,0.1077,0.0235,0.7214,0.779,1.0,0.9693,0.0288,0.0,0.0481,0.0541,0.9738,0.932
fixing,synthetic,?A1=A3+A4,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.0261,0.0017,0.0005,0.1239,0.1316,0.068,1.0,0.0005,0.0,0.0005,0.0004,0.5,0.4119
fixing,synthetic,?A1=A3+A4,gpt-5-nano,0.0,0.0395,0.0,0.0,0.0164,0.9519,0.8042,0.5,0.9981,0.9962,0.9878,0.9999,1.0,0.0147,0.5551,0.6445,0.9998,0.9998
fixing,synthetic,?A1=A3+A4,deepseek-reasoner,0.0,0.7243,0.0,0.0198,0.5573,1.0,0.999,0.9818,1.0,1.0,1.0,1.0,0.9934,1.0,0.9863,0.9953,1.0,1.0
fixing,synthetic,?A1=A3+A4,gemini-2.5-pro,0.0,0.0492,0.0,0.0,0.024,0.9368,0.7558,0.5,0.9887,0.9953,0.9756,0.9999,0.5551,0.0259,1.0,0.6146,0.9998,0.999
fixing,synthetic,?A1=A3+A4,gpt-5-mini,0.0,0.0352,0.0,0.0,0.015,0.9414,0.7502,0.4449,0.99,0.9943,0.9734,0.9999,0.5,0.0106,0.5,1.0,0.9997,0.9998
fixing,synthetic,?A1=A3+A4,o3,0.0,0.0,0.0,0.0,0.0,0.0551,0.0026,0.0005,0.1885,0.221,0.0669,0.6612,0.0008,0.0,0.0008,0.0009,1.0,0.5
fixing,synthetic,?A1=A3+A4,gpt-5,0.0,0.0,0.0,0.0,0.0,0.061,0.0053,0.0014,0.221,0.2706,0.1325,0.7483,0.0007,0.0,0.0027,0.0007,0.6682,1.0
fixing,synthetic,?A1>A3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,synthetic,?A1>A3,gpt-oss:20b,0.0,1.0,0.0,0.7443,0.4415,0.9231,0.9981,0.9998,1.0,0.9996,0.9962,0.9997,0.8115,0.3258,0.5598,0.9712,1.0,1.0
fixing,synthetic,?A1>A3,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9998,1.0,1.0,1.0,1.0
fixing,synthetic,?A1>A3,mistral-small:24b,0.0,0.3714,0.0,1.0,0.2366,0.8852,0.9904,0.9995,0.9999,0.9998,0.9967,0.9992,0.6358,0.1481,0.3776,0.932,1.0,1.0
fixing,synthetic,?A1>A3,llama3.1:70b,0.0,0.6706,0.0001,0.8595,1.0,0.9749,0.9985,1.0,1.0,1.0,0.9993,1.0,0.86,0.4388,0.6821,0.9808,1.0,1.0
fixing,synthetic,?A1>A3,gemini-2.0-flash,0.0,0.1341,0.0,0.2122,0.0551,1.0,0.9331,0.9947,0.9987,0.9936,0.9682,0.9941,0.3038,0.0251,0.1279,0.7214,0.9999,0.9998
fixing,synthetic,?A1>A3,gpt-4.1-mini-2025-04-14,0.0,0.0063,0.0,0.0261,0.0045,0.1431,1.0,0.9283,0.9807,0.8338,0.7095,0.8811,0.0436,0.0011,0.01,0.2617,0.9824,0.9935
fixing,synthetic,?A1>A3,gpt-4o,0.0,0.0007,0.0,0.0019,0.0002,0.0173,0.1662,1.0,0.7095,0.4073,0.212,0.5,0.0053,0.0001,0.0003,0.0466,0.788,0.8867
fixing,synthetic,?A1>A3,gpt-4.1-2025-04-14,0.0,0.0002,0.0,0.0005,0.0,0.0059,0.073,0.5,1.0,0.2905,0.1133,0.3953,0.0008,0.0,0.0002,0.0154,0.7256,0.8555
fixing,synthetic,?A1>A3,grok-3-mini,0.0,0.0017,0.0,0.0013,0.0002,0.0245,0.3145,0.7597,0.8666,1.0,0.3953,0.7095,0.0096,0.0001,0.003,0.105,0.927,0.9673
fixing,synthetic,?A1>A3,deepseek-chat,0.0,0.01,0.0,0.0113,0.0023,0.0835,0.5,0.9102,0.9673,0.788,1.0,0.8491,0.0354,0.0004,0.0068,0.1917,0.9807,0.9824
fixing,synthetic,?A1>A3,gemini-2.5-flash,0.0,0.0011,0.0,0.003,0.0002,0.0207,0.2403,0.6855,0.788,0.5,0.3036,1.0,0.0081,0.0,0.0005,0.0669,0.8666,0.927
fixing,synthetic,?A1>A3,gpt-5-nano,0.0,0.2983,0.0,0.5,0.2204,0.8042,0.9822,0.9983,0.9999,0.997,0.9853,0.9974,1.0,0.1279,0.3294,0.9075,0.9999,1.0
fixing,synthetic,?A1>A3,deepseek-reasoner,0.0,0.7743,0.0005,0.9186,0.678,0.99,0.9997,1.0,1.0,1.0,0.9999,1.0,0.9283,1.0,0.7743,0.9943,1.0,1.0
fixing,synthetic,?A1>A3,gemini-2.5-pro,0.0,0.5598,0.0001,0.7336,0.4373,0.9283,0.9965,0.9999,1.0,0.9991,0.9977,0.9999,0.7693,0.3258,1.0,0.9635,1.0,1.0
fixing,synthetic,?A1>A3,gpt-5-mini,0.0,0.0607,0.0,0.1325,0.0403,0.4225,0.8569,0.9827,0.9962,0.9534,0.9054,0.9738,0.1725,0.0144,0.0717,1.0,0.9978,0.9987
fixing,synthetic,?A1>A3,o3,0.0,0.0,0.0,0.0,0.0,0.0012,0.0592,0.3953,0.5,0.1938,0.073,0.2905,0.0005,0.0,0.0002,0.0096,1.0,0.7461
fixing,synthetic,?A1>A3,gpt-5,0.0,0.0001,0.0,0.0001,0.0,0.0013,0.0287,0.2744,0.3633,0.1133,0.0592,0.1938,0.0003,0.0,0.0,0.0059,0.5,1.0
fixing,synthetic,?A1>A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,synthetic,?A1>A4,gpt-oss:20b,0.0,1.0,0.0,0.9682,0.2366,0.9904,0.9963,0.9793,0.9904,1.0,0.9997,0.9894,0.9988,0.221,0.3506,0.9608,0.9962,1.0
fixing,synthetic,?A1>A4,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,synthetic,?A1>A4,mistral-small:24b,0.0,0.0835,0.0,1.0,0.0085,0.8062,0.8281,0.7095,0.8281,0.9961,0.9805,0.7095,0.9453,0.0085,0.032,0.6128,0.9375,0.9961
fixing,synthetic,?A1>A4,llama3.1:70b,0.0,0.8595,0.0001,0.9978,1.0,0.9992,0.9992,0.9993,0.9996,1.0,1.0,0.998,0.9999,0.5722,0.7017,0.9967,0.9998,1.0
fixing,synthetic,?A1>A4,gemini-2.0-flash,0.0,0.0318,0.0,0.3872,0.0033,1.0,0.623,0.5,0.623,0.9844,0.9375,0.5,0.8555,0.0033,0.0085,0.377,0.7461,0.9844
fixing,synthetic,?A1>A4,gpt-4.1-mini-2025-04-14,0.0,0.0176,0.0,0.377,0.0033,0.623,1.0,0.5,0.623,0.9844,0.9375,0.5,0.8555,0.0013,0.0085,0.3872,0.7734,1.0
fixing,synthetic,?A1>A4,gpt-4o,0.0,0.0577,0.0,0.5,0.0036,0.7256,0.7461,1.0,0.7461,0.9922,0.9648,0.6128,0.9102,0.0053,0.0133,0.5,0.8281,0.9922
fixing,synthetic,?A1>A4,gpt-4.1-2025-04-14,0.0,0.0318,0.0,0.377,0.0022,0.623,0.623,0.5,1.0,0.9844,0.9375,0.5,0.8555,0.0022,0.0059,0.377,0.7461,0.9844
fixing,synthetic,?A1>A4,grok-3-mini,0.0,0.0005,0.0,0.0352,0.0,0.1094,0.1094,0.0625,0.1094,1.0,0.5,0.0312,0.3125,0.0,0.0001,0.0352,0.1875,0.75
fixing,synthetic,?A1>A4,deepseek-chat,0.0,0.0021,0.0,0.0898,0.0,0.2266,0.2266,0.1445,0.2266,0.875,1.0,0.1445,0.5,0.0001,0.0004,0.0898,0.3438,0.875
fixing,synthetic,?A1>A4,gemini-2.5-flash,0.0,0.0384,0.0,0.5,0.0073,0.7461,0.7461,0.6128,0.7256,1.0,0.9648,1.0,0.9102,0.0073,0.0173,0.5,0.8281,1.0
fixing,synthetic,?A1>A4,gpt-5-nano,0.0,0.0064,0.0,0.1719,0.0004,0.3633,0.3633,0.2539,0.3633,0.9375,0.8125,0.2539,1.0,0.0004,0.0007,0.1719,0.5,0.9375
fixing,synthetic,?A1>A4,deepseek-reasoner,0.0,0.8761,0.0001,0.9978,0.5722,0.9992,0.9998,0.9987,0.9996,1.0,1.0,0.998,0.9999,1.0,0.7017,0.9978,1.0,1.0
fixing,synthetic,?A1>A4,gemini-2.5-pro,0.0,0.779,0.0,0.9887,0.43,0.9978,0.9978,0.9964,0.9987,1.0,1.0,0.9947,0.9999,0.43,1.0,0.9915,0.9996,1.0
fixing,synthetic,?A1>A4,gpt-5-mini,0.0,0.0946,0.0,0.6128,0.0113,0.8281,0.8062,0.7256,0.8281,0.9961,0.9805,0.7256,0.9453,0.0085,0.0262,1.0,0.8867,0.9961
fixing,synthetic,?A1>A4,o3,0.0,0.0154,0.0,0.2266,0.0013,0.5,0.5,0.377,0.5,0.9688,0.8906,0.377,0.7734,0.0004,0.0022,0.2744,1.0,0.9688
fixing,synthetic,?A1>A4,gpt-5,0.0,0.0001,0.0,0.0352,0.0,0.1094,0.0625,0.0625,0.1094,0.75,0.5,0.0312,0.3125,0.0,0.0001,0.0352,0.1875,1.0
fixing,synthetic,?A3∅A4,llama3.1:8b,1.0,1.0,0.1981,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,synthetic,?A3∅A4,gpt-oss:20b,0.0,1.0,0.0,0.0352,0.1094,0.5,0.0,0.1875,0.125,0.1094,0.0625,0.1875,0.0005,0.1094,0.1875,0.0,1.0,0.0032
fixing,synthetic,?A3∅A4,gpt-4.1-nano-2025-04-14,0.8625,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,synthetic,?A3∅A4,mistral-small:24b,0.0,0.9961,0.0,1.0,0.8281,0.9805,0.0022,0.9102,0.8867,0.8281,0.8062,0.8867,0.0946,0.8062,0.8867,0.003,1.0,0.2272
fixing,synthetic,?A3∅A4,llama3.1:70b,0.0,0.9844,0.0,0.377,1.0,0.9688,0.0004,0.7734,0.7461,0.6562,0.623,0.7461,0.0176,0.6367,0.7461,0.0002,1.0,0.1051
fixing,synthetic,?A3∅A4,gemini-2.0-flash,0.0,1.0,0.0,0.0898,0.1875,1.0,0.0,0.3438,0.3125,0.2266,0.1875,0.3438,0.0009,0.2266,0.3438,0.0,1.0,0.0112
fixing,synthetic,?A3∅A4,gpt-4.1-mini-2025-04-14,0.0,1.0,0.0,0.9996,0.9999,1.0,1.0,1.0,1.0,0.9999,1.0,1.0,0.939,0.9999,1.0,0.5,1.0,0.9822
fixing,synthetic,?A3∅A4,gpt-4o,0.0,0.9688,0.0,0.2539,0.5,0.8906,0.0,1.0,0.6562,0.5,0.5,0.6367,0.0154,0.5,0.6367,0.0001,1.0,0.0592
fixing,synthetic,?A3∅A4,gpt-4.1-2025-04-14,0.0,1.0,0.0,0.2744,0.5,0.9375,0.0001,0.6562,1.0,0.5,0.5,0.6367,0.0065,0.5,0.6562,0.0,1.0,0.0327
fixing,synthetic,?A3∅A4,grok-3-mini,0.0,0.9844,0.0,0.377,0.6562,0.9375,0.0004,0.7734,0.7734,1.0,0.623,0.7734,0.0112,0.623,0.7461,0.0002,1.0,0.073
fixing,synthetic,?A3∅A4,deepseek-chat,0.0,1.0,0.0,0.3872,0.623,0.9688,0.0002,0.7461,0.7734,0.623,1.0,0.7734,0.0176,0.6367,0.7461,0.0002,1.0,0.0898
fixing,synthetic,?A3∅A4,gemini-2.5-flash,0.0,0.9688,0.0,0.2744,0.5,0.8906,0.0001,0.6367,0.6367,0.5,0.5,1.0,0.0032,0.5,0.6562,0.0001,1.0,0.0195
fixing,synthetic,?A3∅A4,gpt-5-nano,0.0,1.0,0.0,0.9608,0.9963,0.9999,0.1239,0.9962,0.9991,0.9983,0.9963,0.9998,1.0,0.9936,0.9991,0.0287,1.0,0.8666
fixing,synthetic,?A3∅A4,deepseek-reasoner,0.0,0.9844,0.0,0.3872,0.6367,0.9375,0.0008,0.7461,0.7461,0.623,0.6367,0.7461,0.0245,1.0,0.7461,0.0005,1.0,0.1051
fixing,synthetic,?A3∅A4,gemini-2.5-pro,0.0,0.9688,0.0,0.2744,0.5,0.8906,0.0001,0.6367,0.6562,0.5,0.5,0.6562,0.0065,0.5,1.0,0.0001,1.0,0.0327
fixing,synthetic,?A3∅A4,gpt-5-mini,0.0,1.0,0.0,0.9992,1.0,1.0,0.6494,1.0,1.0,1.0,1.0,1.0,0.9935,0.9999,1.0,1.0,1.0,0.9988
fixing,synthetic,?A3∅A4,o3,0.0,0.5,0.0,0.0078,0.0312,0.25,0.0,0.0625,0.0625,0.0312,0.0312,0.0625,0.0001,0.0312,0.0625,0.0,1.0,0.0005
fixing,synthetic,?A3∅A4,gpt-5,0.0,0.9998,0.0,0.8949,0.9616,0.9983,0.0436,0.9824,0.9941,0.9807,0.9713,0.998,0.2905,0.9616,0.9941,0.0064,1.0,1.0
fixing,synthetic,?A4=A1|3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,synthetic,?A4=A1|3,gpt-oss:20b,0.0,1.0,0.0,0.0034,0.3389,0.9993,0.9573,0.962,0.9999,1.0,0.9997,1.0,0.6224,0.2757,0.9648,0.3327,1.0,0.9995
fixing,synthetic,?A4=A1|3,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fixing,synthetic,?A4=A1|3,mistral-small:24b,0.0,0.9985,0.0,1.0,0.9937,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9995,0.9933,1.0,0.9977,1.0,1.0
fixing,synthetic,?A4=A1|3,llama3.1:70b,0.0,0.7558,0.0,0.0124,1.0,0.9999,0.988,0.9901,1.0,1.0,0.9999,1.0,0.8092,0.5,0.9863,0.5531,1.0,0.9999
fixing,synthetic,?A4=A1|3,gemini-2.0-flash,0.0,0.0019,0.0,0.0,0.0003,1.0,0.0607,0.0939,0.7709,0.8365,0.5747,0.9904,0.0023,0.0001,0.1312,0.0002,0.9878,0.5722
fixing,synthetic,?A4=A1|3,gpt-4.1-mini-2025-04-14,0.0,0.0762,0.0,0.0,0.0244,0.9712,1.0,0.6196,0.9917,0.9931,0.9674,0.9999,0.0769,0.0129,0.6061,0.0178,1.0,0.9597
fixing,synthetic,?A4=A1|3,gpt-4o,0.0,0.0668,0.0,0.0,0.0201,0.9506,0.5,1.0,0.9878,0.9862,0.9459,0.9994,0.0885,0.0182,0.5531,0.0341,0.9999,0.9196
fixing,synthetic,?A4=A1|3,gpt-4.1-2025-04-14,0.0,0.0003,0.0,0.0,0.0,0.3555,0.0205,0.0288,1.0,0.6399,0.3601,0.9564,0.0,0.0,0.0403,0.0,0.9599,0.3642
fixing,synthetic,?A4=A1|3,grok-3-mini,0.0,0.0001,0.0,0.0,0.0,0.2786,0.0168,0.0298,0.5,1.0,0.2983,0.9608,0.0,0.0,0.0266,0.0,0.9564,0.2706
fixing,synthetic,?A4=A1|3,deepseek-chat,0.0,0.001,0.0,0.0,0.0002,0.5747,0.0662,0.0998,0.7634,0.8115,1.0,0.9825,0.0018,0.0002,0.1215,0.0004,0.9953,0.5643
fixing,synthetic,?A4=A1|3,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.0261,0.0003,0.0018,0.0925,0.0946,0.0401,1.0,0.0,0.0,0.0008,0.0,0.655,0.0133
fixing,synthetic,?A4=A1|3,gpt-5-nano,0.0,0.5,0.0,0.0013,0.28,0.9992,0.9597,0.9476,1.0,1.0,0.9994,1.0,1.0,0.2354,0.9476,0.221,1.0,0.9999
fixing,synthetic,?A4=A1|3,deepseek-reasoner,0.0,0.8144,0.0,0.0137,0.6146,1.0,0.9943,0.9908,1.0,1.0,1.0,1.0,0.8438,1.0,0.9898,0.617,1.0,1.0
fixing,synthetic,?A4=A1|3,gemini-2.5-pro,0.0,0.0632,0.0,0.0,0.0259,0.9196,0.5,0.5531,0.9808,0.9881,0.9281,0.9998,0.0885,0.0198,1.0,0.022,0.9999,0.9324
fixing,synthetic,?A4=A1|3,gpt-5-mini,0.0,0.7646,0.0,0.0055,0.5531,0.9999,0.992,0.9808,1.0,1.0,0.9999,1.0,0.8761,0.5,0.9889,1.0,1.0,1.0
fixing,synthetic,?A4=A1|3,o3,0.0,0.0,0.0,0.0,0.0,0.0288,0.0002,0.0005,0.0814,0.0925,0.0145,0.5,0.0,0.0,0.0005,0.0,1.0,0.0178
fixing,synthetic,?A4=A1|3,gpt-5,0.0,0.0015,0.0,0.0,0.0003,0.5722,0.0769,0.1312,0.7566,0.8463,0.5643,0.9964,0.0003,0.0002,0.1163,0.0,0.9937,1.0
wikidata,LC-QuAD,?A1=A2,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1=A2,gpt-oss:20b,0.0,1.0,1.0,1.0,0.0047,1.0,1.0,1.0,1.0,0.6911,1.0,1.0,1.0,1.0,1.0,1.0,0.9977,1.0
wikidata,LC-QuAD,?A1=A2,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1=A2,mistral-small:24b,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.9675,1.0,1.0,0.9238,0.0,0.3015
wikidata,LC-QuAD,?A1=A2,llama3.1:70b,0.0,0.9983,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1=A2,gemini-2.0-flash,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1=A2,gpt-4.1-mini-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1=A2,gpt-4o,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
wikidata,LC-QuAD,?A1=A2,gpt-4.1-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1=A2,grok-3-mini,0.0,0.434,1.0,1.0,0.0003,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9894,1.0
wikidata,LC-QuAD,?A1=A2,deepseek-chat,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1=A2,gemini-2.5-flash,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1=A2,gpt-5-nano,0.0,0.0,1.0,0.0595,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.3679,0.0,0.0166
wikidata,LC-QuAD,?A1=A2,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1=A2,gemini-2.5-pro,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1=A2,gpt-5-mini,0.0,0.0,1.0,0.1264,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.7502,1.0,1.0,1.0,0.0,0.0297
wikidata,LC-QuAD,?A1=A2,o3,0.0,0.0057,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0222,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1=A2,gpt-5,0.0,0.0,1.0,0.7825,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.9918,1.0,1.0,0.9853,0.0,1.0
wikidata,LC-QuAD,?A1=A3+A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1=A3+A4,gpt-oss:20b,0.0001,1.0,1.0,1.0,0.1316,1.0,1.0,1.0,1.0,0.9989,1.0,1.0,1.0,1.0,1.0,1.0,0.9999,1.0
wikidata,LC-QuAD,?A1=A3+A4,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1=A3+A4,mistral-small:24b,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.4495,1.0,1.0,0.7906,0.0,0.0941
wikidata,LC-QuAD,?A1=A3+A4,llama3.1:70b,0.0039,0.9423,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1=A3+A4,gemini-2.0-flash,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1=A3+A4,gpt-4.1-mini-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1=A3+A4,gpt-4o,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
wikidata,LC-QuAD,?A1=A3+A4,gpt-4.1-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1=A3+A4,grok-3-mini,0.0,0.0035,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.8316,1.0
wikidata,LC-QuAD,?A1=A3+A4,deepseek-chat,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1=A3+A4,gemini-2.5-flash,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1=A3+A4,gpt-5-nano,0.0,0.0,1.0,0.6482,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.9123,0.0,0.0967
wikidata,LC-QuAD,?A1=A3+A4,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1=A3+A4,gemini-2.5-pro,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1=A3+A4,gpt-5-mini,0.0,0.0,1.0,0.295,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.1553,1.0,1.0,1.0,0.0,0.012
wikidata,LC-QuAD,?A1=A3+A4,o3,0.0,0.0005,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.2612,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1=A3+A4,gpt-5,0.0,0.0,1.0,0.9402,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.9443,1.0,1.0,0.9945,0.0,1.0
wikidata,LC-QuAD,?A1>A3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1>A3,gpt-oss:20b,0.0,1.0,1.0,1.0,0.1102,1.0,1.0,1.0,1.0,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,0.9997,1.0
wikidata,LC-QuAD,?A1>A3,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1>A3,mistral-small:24b,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0092,1.0,1.0,0.7906,1.0,1.0,0.8942,0.0036,0.3974
wikidata,LC-QuAD,?A1>A3,llama3.1:70b,0.0,0.9332,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1>A3,gemini-2.0-flash,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1>A3,gpt-4.1-mini-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1>A3,gpt-4o,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
wikidata,LC-QuAD,?A1>A3,gpt-4.1-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1>A3,grok-3-mini,0.0,0.0004,1.0,0.9952,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9997,1.0,1.0,0.9998,0.3854,0.9937
wikidata,LC-QuAD,?A1>A3,deepseek-chat,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1>A3,gemini-2.5-flash,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1>A3,gpt-5-nano,0.0,0.0,1.0,0.295,0.0,1.0,1.0,1.0,1.0,0.0008,1.0,1.0,1.0,1.0,1.0,0.72,0.0001,0.14
wikidata,LC-QuAD,?A1>A3,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1>A3,gemini-2.5-pro,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1>A3,gpt-5-mini,0.0,0.0,1.0,0.1659,0.0,1.0,1.0,1.0,1.0,0.0005,1.0,1.0,0.3854,1.0,1.0,1.0,0.0,0.0719
wikidata,LC-QuAD,?A1>A3,o3,0.0,0.0009,1.0,0.9982,0.0,1.0,1.0,1.0,1.0,0.72,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9987
wikidata,LC-QuAD,?A1>A3,gpt-5,0.0,0.0,1.0,0.6985,0.0,1.0,1.0,1.0,1.0,0.0124,1.0,1.0,0.9179,1.0,1.0,0.9605,0.0031,1.0
wikidata,LC-QuAD,?A1>A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1>A4,gpt-oss:20b,0.0,1.0,1.0,1.0,0.0182,1.0,1.0,1.0,1.0,0.9196,1.0,1.0,1.0,1.0,1.0,1.0,0.9923,1.0
wikidata,LC-QuAD,?A1>A4,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1>A4,mistral-small:24b,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.9155,1.0,1.0,0.9853,0.0,0.926
wikidata,LC-QuAD,?A1>A4,llama3.1:70b,0.0,0.9912,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1>A4,gemini-2.0-flash,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1>A4,gpt-4.1-mini-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1>A4,gpt-4o,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
wikidata,LC-QuAD,?A1>A4,gpt-4.1-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1>A4,grok-3-mini,0.0,0.1312,1.0,1.0,0.0002,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.8785,1.0
wikidata,LC-QuAD,?A1>A4,deepseek-chat,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1>A4,gemini-2.5-flash,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1>A4,gpt-5-nano,0.0,0.0,1.0,0.1358,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.8595,0.0,0.6123
wikidata,LC-QuAD,?A1>A4,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1>A4,gemini-2.5-pro,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1>A4,gpt-5-mini,0.0,0.0,1.0,0.0297,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.2366,1.0,1.0,1.0,0.0,0.3327
wikidata,LC-QuAD,?A1>A4,o3,0.0,0.0164,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.1908,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A1>A4,gpt-5,0.0,0.0,1.0,0.1185,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.5,1.0,1.0,0.7646,0.0,1.0
wikidata,LC-QuAD,?A3∅A4,llama3.1:8b,1.0,0.9973,1.0,0.0,0.2916,1.0,1.0,0.0,1.0,0.9968,1.0,1.0,0.0006,1.0,1.0,0.0,1.0,0.4495
wikidata,LC-QuAD,?A3∅A4,gpt-oss:20b,0.0063,1.0,1.0,0.0,0.0005,1.0,1.0,0.0,1.0,0.57,1.0,1.0,0.0,1.0,1.0,0.0,0.9987,0.001
wikidata,LC-QuAD,?A3∅A4,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A3∅A4,mistral-small:24b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9991,1.0,1.0
wikidata,LC-QuAD,?A3∅A4,llama3.1:70b,0.7949,0.9998,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.9998,1.0,1.0,0.008,1.0,1.0,0.0003,1.0,0.7248
wikidata,LC-QuAD,?A3∅A4,gemini-2.0-flash,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A3∅A4,gpt-4.1-mini-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A3∅A4,gpt-4o,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A3∅A4,gpt-4.1-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A3∅A4,grok-3-mini,0.0072,0.57,1.0,0.0,0.0005,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.9988,0.001
wikidata,LC-QuAD,?A3∅A4,deepseek-chat,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A3∅A4,gemini-2.5-flash,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A3∅A4,gpt-5-nano,0.9997,1.0,1.0,0.0001,0.9956,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0551,1.0,1.0
wikidata,LC-QuAD,?A3∅A4,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A3∅A4,gemini-2.5-pro,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A3∅A4,gpt-5-mini,1.0,1.0,1.0,0.0023,0.9999,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.9749,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A3∅A4,o3,0.0,0.0053,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0047,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0
wikidata,LC-QuAD,?A3∅A4,gpt-5,0.6482,0.9997,1.0,0.0,0.3601,1.0,1.0,0.0,1.0,0.9997,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0
wikidata,LC-QuAD,?A4=A1|3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A4=A1|3,gpt-oss:20b,0.0312,1.0,1.0,0.1094,0.1094,1.0,1.0,0.0312,1.0,1.0,1.0,1.0,0.9991,1.0,1.0,0.9963,1.0,1.0
wikidata,LC-QuAD,?A4=A1|3,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A4=A1|3,mistral-small:24b,0.5,0.9844,1.0,1.0,0.75,1.0,1.0,0.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A4=A1|3,llama3.1:70b,0.5,0.9844,1.0,0.75,1.0,1.0,1.0,0.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A4=A1|3,gemini-2.0-flash,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A4=A1|3,gpt-4.1-mini-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A4=A1|3,gpt-4o,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A4=A1|3,gpt-4.1-2025-04-14,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A4=A1|3,grok-3-mini,0.0,0.0002,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0592,1.0,1.0,0.0287,0.9564,0.9616
wikidata,LC-QuAD,?A4=A1|3,deepseek-chat,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A4=A1|3,gemini-2.5-flash,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A4=A1|3,gpt-5-nano,0.0,0.0065,1.0,0.0001,0.0003,1.0,1.0,0.0,1.0,0.9824,1.0,1.0,1.0,1.0,1.0,0.5,0.9988,0.9993
wikidata,LC-QuAD,?A4=A1|3,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A4=A1|3,gemini-2.5-pro,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,LC-QuAD,?A4=A1|3,gpt-5-mini,0.0001,0.0176,1.0,0.0005,0.0001,1.0,1.0,0.0001,1.0,0.9935,1.0,1.0,0.6964,1.0,1.0,1.0,0.9995,0.9999
wikidata,LC-QuAD,?A4=A1|3,o3,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0925,1.0,1.0,0.0041,1.0,1.0,0.002,1.0,0.4119
wikidata,LC-QuAD,?A4=A1|3,gpt-5,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.1051,1.0,1.0,0.0036,1.0,1.0,0.0007,0.7483,1.0
wikidata,overall,?A1=A2,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A1=A2,gpt-oss:20b,0.0,1.0,0.9912,1.0,0.0,0.999,0.0163,1.0,1.0,0.9994,1.0,0.9964,1.0,1.0,0.9489,1.0,1.0,1.0
wikidata,overall,?A1=A2,gpt-4.1-nano-2025-04-14,0.0,0.0107,1.0,1.0,0.0,0.7271,0.0001,1.0,1.0,0.6572,0.9731,0.6012,1.0,1.0,0.288,1.0,1.0,1.0
wikidata,overall,?A1=A2,mistral-small:24b,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.9645,1.0,0.0,0.9941,0.0,0.7395
wikidata,overall,?A1=A2,llama3.1:70b,0.0,1.0,1.0,1.0,1.0,1.0,0.9918,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A1=A2,gemini-2.0-flash,0.0,0.0013,0.3023,1.0,0.0,1.0,0.0,1.0,1.0,0.4111,0.9292,0.3792,1.0,1.0,0.1236,1.0,1.0,1.0
wikidata,overall,?A1=A2,gpt-4.1-mini-2025-04-14,0.0,0.9866,1.0,1.0,0.0102,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9997,1.0,1.0,1.0
wikidata,overall,?A1=A2,gpt-4o,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
wikidata,overall,?A1=A2,gpt-4.1-2025-04-14,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0126,1.0
wikidata,overall,?A1=A2,grok-3-mini,0.0,0.0008,0.3702,1.0,0.0,0.6177,0.0,1.0,1.0,1.0,0.97,0.4549,1.0,1.0,0.1434,1.0,1.0,1.0
wikidata,overall,?A1=A2,deepseek-chat,0.0,0.0,0.0325,1.0,0.0,0.0836,0.0,1.0,1.0,0.0355,1.0,0.0411,1.0,1.0,0.0053,1.0,0.9998,1.0
wikidata,overall,?A1=A2,gemini-2.5-flash,0.0,0.0044,0.4321,1.0,0.0,0.6537,0.0,1.0,1.0,0.5748,0.9661,1.0,1.0,1.0,0.2143,1.0,1.0,1.0
wikidata,overall,?A1=A2,gpt-5-nano,0.0,0.0,0.0,0.0411,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.7765,0.0,0.1252
wikidata,overall,?A1=A2,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A1=A2,gemini-2.5-pro,0.0,0.0594,0.7407,1.0,0.0,0.8936,0.0004,1.0,1.0,0.8731,0.9959,0.8105,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A1=A2,gpt-5-mini,0.0,0.0,0.0,0.0071,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.2447,1.0,0.0,1.0,0.0,0.0284
wikidata,overall,?A1=A2,o3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.9896,0.0,0.0003,0.0,1.0,1.0,0.0,1.0,1.0,1.0
wikidata,overall,?A1=A2,gpt-5,0.0,0.0,0.0,0.2829,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.8882,1.0,0.0,0.9758,0.0,1.0
wikidata,overall,?A1=A3+A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A1=A3+A4,gpt-oss:20b,0.0,1.0,0.0,1.0,0.0,0.252,0.0,1.0,0.9995,1.0,0.9998,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A1=A3+A4,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,1.0,0.8144,1.0,0.7653,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A1=A3+A4,mistral-small:24b,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.7062,1.0,0.0,1.0,0.0,0.9998
wikidata,overall,?A1=A3+A4,llama3.1:70b,0.0,1.0,0.2217,1.0,1.0,1.0,0.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A1=A3+A4,gemini-2.0-flash,0.0,0.7774,0.0,1.0,0.0,1.0,0.0,1.0,0.9998,1.0,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A1=A3+A4,gpt-4.1-mini-2025-04-14,0.0,1.0,0.2814,1.0,0.5491,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A1=A3+A4,gpt-4o,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
wikidata,overall,?A1=A3+A4,gpt-4.1-2025-04-14,0.0,0.0007,0.0,1.0,0.0,0.0003,0.0,1.0,1.0,0.9738,0.6724,0.923,1.0,1.0,0.9999,1.0,1.0,1.0
wikidata,overall,?A1=A3+A4,grok-3-mini,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0318,1.0,0.0709,0.3676,1.0,1.0,0.9887,1.0,0.9978,1.0
wikidata,overall,?A1=A3+A4,deepseek-chat,0.0,0.0002,0.0,1.0,0.0,0.0001,0.0,1.0,0.3642,0.9398,1.0,0.8538,1.0,1.0,0.9994,1.0,1.0,1.0
wikidata,overall,?A1=A3+A4,gemini-2.5-flash,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0922,0.6638,0.1705,1.0,1.0,1.0,0.9898,1.0,0.9979,1.0
wikidata,overall,?A1=A3+A4,gpt-5-nano,0.0,0.0,0.0,0.3148,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.999
wikidata,overall,?A1=A3+A4,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A1=A3+A4,gemini-2.5-pro,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0002,0.014,0.0008,0.0131,1.0,1.0,1.0,1.0,0.6107,1.0
wikidata,overall,?A1=A3+A4,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0033
wikidata,overall,?A1=A3+A4,o3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0027,0.0001,0.0027,1.0,1.0,0.4204,1.0,1.0,1.0
wikidata,overall,?A1=A3+A4,gpt-5,0.0,0.0,0.0,0.0002,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0012,1.0,0.0,0.9973,0.0,1.0
wikidata,overall,?A1>A3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A1>A3,gpt-oss:20b,0.0,1.0,0.0158,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A1>A3,gpt-4.1-nano-2025-04-14,0.0,0.9867,1.0,1.0,0.0,1.0,0.0332,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A1>A3,mistral-small:24b,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.9883,1.0,0.0,1.0,0.0,1.0
wikidata,overall,?A1>A3,llama3.1:70b,0.0,1.0,1.0,1.0,1.0,1.0,0.9841,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A1>A3,gemini-2.0-flash,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.999,0.9995,0.9995,1.0,1.0,1.0,0.8926,1.0,0.9994,1.0
wikidata,overall,?A1>A3,gpt-4.1-mini-2025-04-14,0.0,1.0,0.9725,1.0,0.0192,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A1>A3,gpt-4o,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
wikidata,overall,?A1>A3,gpt-4.1-2025-04-14,0.0,0.0,0.0,1.0,0.0,0.0013,0.0,1.0,1.0,0.3879,0.5971,0.9061,1.0,1.0,0.0332,1.0,0.3318,1.0
wikidata,overall,?A1>A3,grok-3-mini,0.0,0.0,0.0,1.0,0.0,0.0007,0.0,1.0,0.6391,1.0,0.6761,0.9535,1.0,1.0,0.0312,1.0,0.4629,1.0
wikidata,overall,?A1>A3,deepseek-chat,0.0,0.0,0.0,1.0,0.0,0.0006,0.0,1.0,0.4349,0.3496,1.0,0.8621,1.0,1.0,0.0239,1.0,0.3117,1.0
wikidata,overall,?A1>A3,gemini-2.5-flash,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.1089,0.0537,0.1572,1.0,1.0,1.0,0.0009,1.0,0.041,1.0
wikidata,overall,?A1>A3,gpt-5-nano,0.0,0.0,0.0,0.0137,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.9901,0.0,0.9968
wikidata,overall,?A1>A3,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A1>A3,gemini-2.5-pro,0.0,0.0,0.0,1.0,0.0,0.1235,0.0,1.0,0.9725,0.9735,0.9803,0.9993,1.0,1.0,1.0,1.0,0.9645,1.0
wikidata,overall,?A1>A3,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0117,1.0,0.0,1.0,0.0,0.6578
wikidata,overall,?A1>A3,o3,0.0,0.0,0.0,1.0,0.0,0.0008,0.0,1.0,0.6941,0.5616,0.7126,0.9649,1.0,1.0,0.0415,1.0,1.0,1.0
wikidata,overall,?A1>A3,gpt-5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0039,1.0,0.0,0.3674,0.0,1.0
wikidata,overall,?A1>A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A1>A4,gpt-oss:20b,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A1>A4,gpt-4.1-nano-2025-04-14,0.0,0.0,1.0,1.0,0.0,0.0106,0.0,1.0,0.0093,0.0044,0.9597,0.9572,1.0,1.0,0.0075,1.0,0.0,1.0
wikidata,overall,?A1>A4,mistral-small:24b,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0
wikidata,overall,?A1>A4,llama3.1:70b,0.0,1.0,1.0,1.0,1.0,1.0,0.9996,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A1>A4,gemini-2.0-flash,0.0,0.0,0.9914,1.0,0.0,1.0,0.0,1.0,0.532,0.5688,1.0,1.0,1.0,1.0,0.4678,1.0,0.0576,1.0
wikidata,overall,?A1>A4,gpt-4.1-mini-2025-04-14,0.0,1.0,1.0,1.0,0.0005,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A1>A4,gpt-4o,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
wikidata,overall,?A1>A4,gpt-4.1-2025-04-14,0.0,0.0,0.9926,1.0,0.0,0.5,0.0,1.0,1.0,0.5708,1.0,1.0,1.0,1.0,0.4508,1.0,0.0569,1.0
wikidata,overall,?A1>A4,grok-3-mini,0.0,0.0,0.9964,1.0,0.0,0.4586,0.0,1.0,0.4574,1.0,1.0,1.0,1.0,1.0,0.4033,1.0,0.0218,1.0
wikidata,overall,?A1>A4,deepseek-chat,0.0,0.0,0.0479,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.5162,1.0,1.0,0.0,1.0,0.0,1.0
wikidata,overall,?A1>A4,gemini-2.5-flash,0.0,0.0,0.0506,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.5162,1.0,1.0,1.0,0.0,1.0,0.0,1.0
wikidata,overall,?A1>A4,gpt-5-nano,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.9929,0.0,0.9935
wikidata,overall,?A1>A4,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A1>A4,gemini-2.5-pro,0.0,0.0,0.994,1.0,0.0,0.5642,0.0,1.0,0.5817,0.6235,1.0,1.0,1.0,1.0,1.0,1.0,0.0667,1.0
wikidata,overall,?A1>A4,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0086,1.0,0.0,1.0,0.0,0.555
wikidata,overall,?A1>A4,o3,0.0,0.0,1.0,1.0,0.0,0.9501,0.0,1.0,0.9507,0.9812,1.0,1.0,1.0,1.0,0.9421,1.0,1.0,1.0
wikidata,overall,?A1>A4,gpt-5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0078,1.0,0.0,0.4724,0.0,1.0
wikidata,overall,?A3∅A4,llama3.1:8b,1.0,1.0,1.0,0.0,0.0005,0.0,1.0,0.0,0.0001,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0855
wikidata,overall,?A3∅A4,gpt-oss:20b,0.0,1.0,0.0125,0.0,0.0,0.0,0.0,0.0,0.0,0.9999,0.0,0.9994,0.0,1.0,0.0003,0.0,1.0,0.0
wikidata,overall,?A3∅A4,gpt-4.1-nano-2025-04-14,0.0,0.9901,1.0,0.0,0.0,0.0,0.0391,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.2089,0.0,1.0,0.0
wikidata,overall,?A3∅A4,mistral-small:24b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A3∅A4,llama3.1:70b,0.9996,1.0,1.0,0.0,1.0,0.001,1.0,0.0,0.1208,1.0,0.0089,1.0,0.0003,1.0,1.0,0.0,1.0,0.9646
wikidata,overall,?A3∅A4,gemini-2.0-flash,1.0,1.0,1.0,0.0,0.9992,1.0,1.0,0.0,0.9585,1.0,0.7558,1.0,0.404,1.0,1.0,0.0,1.0,1.0
wikidata,overall,?A3∅A4,gpt-4.1-mini-2025-04-14,0.0,1.0,0.9689,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.8048,0.0,1.0,0.0
wikidata,overall,?A3∅A4,gpt-4o,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A3∅A4,gpt-4.1-2025-04-14,1.0,1.0,1.0,0.0,0.8933,0.0498,1.0,0.0,1.0,1.0,0.1677,1.0,0.0187,1.0,1.0,0.0,1.0,0.9924
wikidata,overall,?A3∅A4,grok-3-mini,0.0,0.0001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.5632,0.0,1.0,0.0,0.0,0.9862,0.0
wikidata,overall,?A3∅A4,deepseek-chat,1.0,1.0,1.0,0.0,0.9928,0.2721,1.0,0.0,0.8525,1.0,1.0,1.0,0.1881,1.0,1.0,0.0,1.0,0.9999
wikidata,overall,?A3∅A4,gemini-2.5-flash,0.0,0.0008,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4789,0.0,1.0,0.0,1.0,0.0,0.0,0.9626,0.0
wikidata,overall,?A3∅A4,gpt-5-nano,1.0,1.0,1.0,0.0,0.9998,0.6226,1.0,0.0,0.9843,1.0,0.8297,1.0,1.0,1.0,1.0,0.0,1.0,1.0
wikidata,overall,?A3∅A4,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A3∅A4,gemini-2.5-pro,0.0,0.9998,0.8175,0.0,0.0,0.0,0.2226,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0
wikidata,overall,?A3∅A4,gpt-5-mini,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A3∅A4,o3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0176,0.0,0.0475,0.0,1.0,0.0,0.0,1.0,0.0
wikidata,overall,?A3∅A4,gpt-5,0.9238,1.0,1.0,0.0,0.0404,0.0,1.0,0.0,0.0093,1.0,0.0002,1.0,0.0,1.0,1.0,0.0,1.0,1.0
wikidata,overall,?A4=A1|3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A4=A1|3,gpt-oss:20b,0.0,1.0,0.0,0.0,0.0,0.0046,0.0,0.0,0.9629,1.0,0.9394,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A4=A1|3,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,0.0,0.0008,1.0,0.5796,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A4=A1|3,mistral-small:24b,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.8316,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A4=A1|3,llama3.1:70b,0.0,1.0,0.9996,0.0001,1.0,1.0,0.9997,0.0005,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A4=A1|3,gemini-2.0-flash,0.0,0.9967,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A4=A1|3,gpt-4.1-mini-2025-04-14,0.0,1.0,0.5,0.0,0.0007,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A4=A1|3,gpt-4o,0.0,1.0,1.0,0.2612,0.9998,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A4=A1|3,gpt-4.1-2025-04-14,0.0,0.0465,0.0,0.0,0.0,0.0001,0.0,0.0,1.0,1.0,0.4762,1.0,0.9939,1.0,1.0,0.9519,1.0,1.0
wikidata,overall,?A4=A1|3,grok-3-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.7305,0.0,1.0,0.9972,0.0,0.9167,1.0
wikidata,overall,?A4=A1|3,deepseek-chat,0.0,0.0741,0.0,0.0,0.0,0.0001,0.0,0.0,0.5711,1.0,1.0,1.0,0.9956,1.0,1.0,0.9682,1.0,1.0
wikidata,overall,?A4=A1|3,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2992,0.0,1.0,0.0,1.0,0.9727,0.0,0.7384,0.9981
wikidata,overall,?A4=A1|3,gpt-5-nano,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0081,1.0,0.0058,1.0,1.0,1.0,1.0,0.1967,1.0,1.0
wikidata,overall,?A4=A1|3,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A4=A1|3,gemini-2.5-pro,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0036,0.0,0.0341,0.0,1.0,1.0,0.0,0.0642,0.7841
wikidata,overall,?A4=A1|3,gpt-5-mini,0.0,0.0001,0.0,0.0,0.0,0.0,0.0,0.0,0.0591,1.0,0.0396,1.0,0.8261,1.0,1.0,1.0,1.0,1.0
wikidata,overall,?A4=A1|3,o3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0954,0.0,0.29,0.0,1.0,0.9457,0.0,1.0,0.9953
wikidata,overall,?A4=A1|3,gpt-5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0025,0.0,1.0,0.2409,0.0,0.0059,1.0
wikidata,qawiki,?A1=A2,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,qawiki,?A1=A2,gpt-oss:20b,0.0,1.0,0.6673,1.0,0.0015,0.027,0.0821,1.0,0.2916,0.4415,0.5,0.6641,1.0,1.0,0.0967,1.0,0.9558,1.0
wikidata,qawiki,?A1=A2,gpt-4.1-nano-2025-04-14,0.0,0.4427,1.0,1.0,0.0001,0.0035,0.0403,1.0,0.1908,0.3179,0.3804,0.554,1.0,1.0,0.0326,1.0,0.8987,1.0
wikidata,qawiki,?A1=A2,mistral-small:24b,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.9999,0.0,0.0,0.0,0.0,0.5,1.0,0.0,0.9597,0.0,0.86
wikidata,qawiki,?A1=A2,llama3.1:70b,0.0001,0.9995,1.0,1.0,1.0,0.9461,0.9879,1.0,0.9993,0.9998,0.9998,0.9998,1.0,1.0,0.9927,1.0,1.0,1.0
wikidata,qawiki,?A1=A2,gemini-2.0-flash,0.0,0.9871,0.9989,1.0,0.1148,1.0,0.8192,1.0,0.9646,0.9937,0.9947,0.996,1.0,1.0,0.8365,1.0,0.9999,1.0
wikidata,qawiki,?A1=A2,gpt-4.1-mini-2025-04-14,0.0,0.9558,0.9808,1.0,0.0307,0.2923,1.0,1.0,0.838,0.9283,0.9599,0.9808,1.0,1.0,0.5722,1.0,0.9992,1.0
wikidata,qawiki,?A1=A2,gpt-4o,0.0,0.0,0.0,0.0004,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0001,1.0,0.0,0.0354,0.0,0.01
wikidata,qawiki,?A1=A2,gpt-4.1-2025-04-14,0.0,0.7949,0.8785,1.0,0.0026,0.0748,0.2557,1.0,1.0,0.7634,0.7912,0.8945,1.0,1.0,0.2366,1.0,0.988,1.0
wikidata,qawiki,?A1=A2,grok-3-mini,0.0,0.6706,0.7852,1.0,0.0007,0.0178,0.1279,1.0,0.3601,1.0,0.6286,0.7912,1.0,1.0,0.0758,1.0,0.9878,1.0
wikidata,qawiki,?A1=A2,deepseek-chat,0.0,0.6101,0.7288,1.0,0.0006,0.0147,0.0814,1.0,0.3136,0.5,1.0,0.72,1.0,1.0,0.0939,1.0,0.9508,1.0
wikidata,qawiki,?A1=A2,gemini-2.5-flash,0.0,0.4439,0.554,1.0,0.0007,0.0098,0.0403,1.0,0.1744,0.3136,0.3854,1.0,1.0,1.0,0.0481,1.0,0.8987,1.0
wikidata,qawiki,?A1=A2,gpt-5-nano,0.0,0.0,0.0,0.6123,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.9795,0.0,0.9061
wikidata,qawiki,?A1=A2,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,qawiki,?A1=A2,gemini-2.5-pro,0.0,0.9443,0.9856,1.0,0.0216,0.2786,0.5722,1.0,0.8595,0.968,0.9506,0.9756,1.0,1.0,1.0,1.0,0.9997,1.0
wikidata,qawiki,?A1=A2,gpt-5-mini,0.0,0.0,0.0,0.0769,0.0,0.0,0.0,0.9853,0.0,0.0,0.0,0.0,0.0448,1.0,0.0,1.0,0.0,0.2403
wikidata,qawiki,?A1=A2,o3,0.0,0.0821,0.1611,1.0,0.0,0.0002,0.0023,1.0,0.0244,0.0288,0.0845,0.1611,1.0,1.0,0.0011,1.0,1.0,1.0
wikidata,qawiki,?A1=A2,gpt-5,0.0,0.0,0.0,0.2204,0.0,0.0,0.0,0.9962,0.0,0.0,0.0,0.0,0.162,1.0,0.0,0.8811,0.0,1.0
wikidata,qawiki,?A1=A3+A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,qawiki,?A1=A3+A4,gpt-oss:20b,0.0,1.0,0.0007,1.0,0.0,0.0178,0.0002,1.0,0.0607,0.7142,0.8447,0.3038,1.0,1.0,0.434,1.0,0.8042,1.0
wikidata,qawiki,?A1=A3+A4,gpt-4.1-nano-2025-04-14,0.002,0.9998,1.0,1.0,0.2905,0.9534,0.5982,1.0,0.9936,1.0,1.0,0.9988,1.0,1.0,0.9997,1.0,1.0,1.0
wikidata,qawiki,?A1=A3+A4,mistral-small:24b,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.2879,1.0,0.0,0.9558,0.0,0.6026
wikidata,qawiki,?A1=A3+A4,llama3.1:70b,0.0078,1.0,0.8666,1.0,1.0,0.9979,0.8666,1.0,0.9997,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,qawiki,?A1=A3+A4,gemini-2.0-flash,0.0,0.9937,0.105,1.0,0.0106,1.0,0.0592,1.0,0.7142,0.9974,0.9997,0.9564,1.0,1.0,0.9749,1.0,0.9989,1.0
wikidata,qawiki,?A1=A3+A4,gpt-4.1-mini-2025-04-14,0.002,1.0,0.5982,1.0,0.2905,0.9824,1.0,1.0,0.9904,1.0,1.0,0.9999,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,qawiki,?A1=A3+A4,gpt-4o,0.0,0.0,0.0,0.0001,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0144,0.0,0.0011
wikidata,qawiki,?A1=A3+A4,gpt-4.1-2025-04-14,0.0,0.9712,0.0245,1.0,0.0021,0.4253,0.0318,1.0,1.0,0.9937,0.9962,0.9157,1.0,1.0,0.9622,1.0,0.9965,1.0
wikidata,qawiki,?A1=A3+A4,grok-3-mini,0.0,0.4253,0.0001,1.0,0.0,0.0081,0.0,1.0,0.0178,1.0,0.7566,0.1635,1.0,1.0,0.2786,1.0,0.7017,1.0
wikidata,qawiki,?A1=A3+A4,deepseek-chat,0.0,0.2498,0.0001,1.0,0.0,0.0012,0.0,1.0,0.01,0.3642,1.0,0.0877,1.0,1.0,0.1553,1.0,0.5,1.0
wikidata,qawiki,?A1=A3+A4,gemini-2.5-flash,0.0,0.8042,0.0041,1.0,0.0001,0.0925,0.0007,1.0,0.1635,0.9157,0.9552,1.0,1.0,1.0,0.7077,1.0,0.9622,1.0
wikidata,qawiki,?A1=A3+A4,gpt-5-nano,0.0,0.0,0.0,0.7995,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.997,0.0,0.8544
wikidata,qawiki,?A1=A3+A4,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,qawiki,?A1=A3+A4,gemini-2.5-pro,0.0,0.6911,0.0012,1.0,0.0,0.0551,0.0001,1.0,0.0843,0.8365,0.9123,0.4278,1.0,1.0,1.0,1.0,0.8998,1.0
wikidata,qawiki,?A1=A3+A4,gpt-5-mini,0.0,0.0,0.0,0.0821,0.0,0.0,0.0,0.9943,0.0,0.0,0.0,0.0,0.0083,1.0,0.0,1.0,0.0,0.0814
wikidata,qawiki,?A1=A3+A4,o3,0.0,0.3038,0.0001,1.0,0.0,0.0035,0.0,1.0,0.01,0.43,0.6196,0.0843,1.0,1.0,0.1808,1.0,1.0,1.0
wikidata,qawiki,?A1=A3+A4,gpt-5,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.9996,0.0,0.0,0.0,0.0,0.2257,1.0,0.0,0.9599,0.0,1.0
wikidata,qawiki,?A1>A3,llama3.1:8b,1.0,1.0,1.0,1.0,0.9997,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,qawiki,?A1>A3,gpt-oss:20b,0.0,1.0,0.0876,1.0,0.0,0.5,0.0047,1.0,0.6101,0.962,0.9998,0.9982,1.0,1.0,0.5,1.0,0.9876,1.0
wikidata,qawiki,?A1>A3,gpt-4.1-nano-2025-04-14,0.0,0.9456,1.0,1.0,0.0005,0.9381,0.1958,1.0,0.9756,0.9992,1.0,1.0,1.0,1.0,0.9444,1.0,0.9999,1.0
wikidata,qawiki,?A1>A3,mistral-small:24b,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.9993,0.0,0.0003,0.0124,0.0084,0.6706,1.0,0.0,0.9782,0.0015,0.9296
wikidata,qawiki,?A1>A3,llama3.1:70b,0.0012,1.0,0.9998,1.0,1.0,1.0,0.9975,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,qawiki,?A1>A3,gemini-2.0-flash,0.0,0.6061,0.1,1.0,0.0,1.0,0.0066,1.0,0.6641,0.9686,0.9999,0.9997,1.0,1.0,0.5551,1.0,0.9953,1.0
wikidata,qawiki,?A1>A3,gpt-4.1-mini-2025-04-14,0.0,0.9981,0.8736,1.0,0.0069,0.9972,1.0,1.0,0.9997,1.0,1.0,1.0,1.0,1.0,0.9986,1.0,1.0,1.0
wikidata,qawiki,?A1>A3,gpt-4o,0.0,0.0,0.0,0.0023,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0158,1.0,0.0,0.2557,0.0,0.1802
wikidata,qawiki,?A1>A3,gpt-4.1-2025-04-14,0.0,0.5,0.046,1.0,0.0,0.4439,0.0011,1.0,1.0,0.954,0.9996,0.9987,1.0,1.0,0.4388,1.0,0.9894,1.0
wikidata,qawiki,?A1>A3,grok-3-mini,0.0,0.0668,0.0018,0.9999,0.0,0.0556,0.0,1.0,0.0804,1.0,0.9592,0.926,0.9999,1.0,0.0395,1.0,0.7558,1.0
wikidata,qawiki,?A1>A3,deepseek-chat,0.0,0.0005,0.0,0.9937,0.0,0.0002,0.0,1.0,0.0011,0.068,1.0,0.3939,0.9978,1.0,0.0009,1.0,0.1568,1.0
wikidata,qawiki,?A1>A3,gemini-2.5-flash,0.0,0.0041,0.0001,0.9956,0.0,0.0009,0.0,1.0,0.0032,0.1185,0.705,1.0,0.9989,1.0,0.0013,1.0,0.2442,1.0
wikidata,qawiki,?A1>A3,gpt-5-nano,0.0,0.0,0.0,0.4415,0.0,0.0,0.0,0.9931,0.0,0.0001,0.0049,0.0025,1.0,1.0,0.0,0.9558,0.0003,0.9231
wikidata,qawiki,?A1>A3,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,qawiki,?A1>A3,gemini-2.5-pro,0.0,0.6061,0.0924,1.0,0.0,0.5551,0.004,1.0,0.678,0.98,0.9996,0.9995,1.0,1.0,1.0,1.0,0.9967,1.0
wikidata,qawiki,?A1>A3,gpt-5-mini,0.0,0.0,0.0,0.0442,0.0,0.0,0.0,0.838,0.0,0.0,0.0,0.0,0.0821,1.0,0.0,1.0,0.0,0.4278
wikidata,qawiki,?A1>A3,o3,0.0,0.024,0.0003,0.9993,0.0,0.0106,0.0,1.0,0.0222,0.3389,0.8963,0.8341,0.9999,1.0,0.008,1.0,1.0,1.0
wikidata,qawiki,?A1>A3,gpt-5,0.0,0.0,0.0,0.1144,0.0,0.0,0.0,0.889,0.0,0.0,0.0001,0.0,0.1341,1.0,0.0,0.7077,0.0,1.0
wikidata,qawiki,?A1>A4,llama3.1:8b,1.0,1.0,1.0,1.0,0.9997,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,qawiki,?A1>A4,gpt-oss:20b,0.0,1.0,0.9923,1.0,0.0,0.7405,0.0704,1.0,0.6556,0.7906,0.9984,0.9887,1.0,1.0,0.2879,1.0,0.2983,1.0
wikidata,qawiki,?A1>A4,gpt-4.1-nano-2025-04-14,0.0,0.0147,1.0,0.9999,0.0,0.0385,0.0,1.0,0.0106,0.0571,0.6418,0.4503,1.0,1.0,0.0021,1.0,0.0035,1.0
wikidata,qawiki,?A1>A4,mistral-small:24b,0.0,0.0,0.0002,1.0,0.0,0.0,0.0,0.9999,0.0,0.0,0.0002,0.0,0.954,1.0,0.0,0.9856,0.0,0.8296
wikidata,qawiki,?A1>A4,llama3.1:70b,0.0012,1.0,1.0,1.0,1.0,1.0,0.9987,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,qawiki,?A1>A4,gemini-2.0-flash,0.0,0.3494,0.9785,1.0,0.0,1.0,0.0147,1.0,0.4439,0.6043,0.9945,0.9639,1.0,1.0,0.1264,1.0,0.1358,1.0
wikidata,qawiki,?A1>A4,gpt-4.1-mini-2025-04-14,0.0,0.9593,1.0,1.0,0.0033,0.9934,1.0,1.0,0.9836,0.9918,1.0,1.0,1.0,1.0,0.8642,1.0,0.8554,1.0
wikidata,qawiki,?A1>A4,gpt-4o,0.0,0.0,0.0,0.0005,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0586,1.0,0.0,0.0662,0.0,0.0147
wikidata,qawiki,?A1>A4,gpt-4.1-2025-04-14,0.0,0.4469,0.9953,1.0,0.0,0.6641,0.0325,1.0,1.0,0.7336,0.9951,0.985,1.0,1.0,0.1744,1.0,0.1908,1.0
wikidata,qawiki,?A1>A4,grok-3-mini,0.0,0.295,0.9659,1.0,0.0,0.5,0.0166,1.0,0.3776,1.0,0.9822,0.9703,1.0,1.0,0.092,1.0,0.1144,1.0
wikidata,qawiki,?A1>A4,deepseek-chat,0.0,0.0038,0.4518,0.9999,0.0,0.012,0.0,1.0,0.0102,0.032,1.0,0.3638,1.0,1.0,0.0003,1.0,0.0008,1.0
wikidata,qawiki,?A1>A4,gemini-2.5-flash,0.0,0.0215,0.646,1.0,0.0,0.0619,0.0001,1.0,0.029,0.0557,0.7193,1.0,1.0,1.0,0.0023,1.0,0.0031,1.0
wikidata,qawiki,?A1>A4,gpt-5-nano,0.0,0.0,0.0,0.0804,0.0,0.0,0.0,0.9702,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.6358,0.0,0.2664
wikidata,qawiki,?A1>A4,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,qawiki,?A1>A4,gemini-2.5-pro,0.0,0.7995,0.9991,1.0,0.0,0.9238,0.2051,1.0,0.8945,0.9481,0.9999,0.9991,1.0,1.0,1.0,1.0,0.5612,1.0
wikidata,qawiki,?A1>A4,gpt-5-mini,0.0,0.0,0.0,0.0326,0.0,0.0,0.0,0.9674,0.0,0.0,0.0,0.0,0.5,1.0,0.0,1.0,0.0,0.1725
wikidata,qawiki,?A1>A4,o3,0.0,0.7865,0.9983,1.0,0.0,0.9155,0.2135,1.0,0.8785,0.9296,0.9997,0.9987,1.0,1.0,0.5612,1.0,1.0,1.0
wikidata,qawiki,?A1>A4,gpt-5,0.0,0.0,0.0,0.2483,0.0,0.0,0.0,0.9934,0.0,0.0,0.0,0.0,0.8256,1.0,0.0,0.9075,0.0,1.0
wikidata,qawiki,?A3∅A4,llama3.1:8b,1.0,0.8785,0.9997,0.0,0.0011,0.0003,0.9878,0.0,0.0022,0.996,0.0015,0.9902,0.0,1.0,0.3974,0.0,0.9702,0.0
wikidata,qawiki,?A3∅A4,gpt-oss:20b,0.1908,1.0,0.9552,0.0,0.0007,0.0,0.8316,0.0,0.0,0.9693,0.0002,0.932,0.0,1.0,0.0769,0.0,0.8275,0.0
wikidata,qawiki,?A3∅A4,gpt-4.1-nano-2025-04-14,0.0012,0.0877,1.0,0.0,0.0,0.0,0.2923,0.0,0.0,0.5722,0.0,0.4278,0.0,1.0,0.0047,0.0,0.2434,0.0
wikidata,qawiki,?A3∅A4,mistral-small:24b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0069,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9934,1.0,1.0
wikidata,qawiki,?A3∅A4,llama3.1:70b,0.9997,0.9997,1.0,0.0,1.0,0.2399,1.0,0.0,0.3991,1.0,0.5,1.0,0.0058,1.0,0.9887,0.0,1.0,0.0099
wikidata,qawiki,?A3∅A4,gemini-2.0-flash,0.9999,1.0,1.0,0.0,0.8389,1.0,1.0,0.0,0.7159,1.0,0.7865,1.0,0.0207,1.0,0.9994,0.0001,1.0,0.0423
wikidata,qawiki,?A3∅A4,gpt-4.1-mini-2025-04-14,0.0288,0.2612,0.8192,0.0,0.0,0.0,1.0,0.0,0.0,0.8192,0.0,0.6962,0.0,1.0,0.0178,0.0,0.5,0.0
wikidata,qawiki,?A3∅A4,gpt-4o,1.0,1.0,1.0,0.9975,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,qawiki,?A3∅A4,gpt-4.1-2025-04-14,0.9991,1.0,1.0,0.0,0.6955,0.3877,1.0,0.0,1.0,1.0,0.6482,1.0,0.0128,1.0,0.9981,0.0,1.0,0.0265
wikidata,qawiki,?A3∅A4,grok-3-mini,0.0098,0.068,0.5722,0.0,0.0,0.0,0.2923,0.0,0.0,1.0,0.0,0.4194,0.0,1.0,0.0009,0.0,0.2122,0.0
wikidata,qawiki,?A3∅A4,deepseek-chat,0.9995,0.9999,1.0,0.0,0.6224,0.2983,1.0,0.0,0.4495,1.0,1.0,1.0,0.0125,1.0,0.9955,0.0,1.0,0.0209
wikidata,qawiki,?A3∅A4,gemini-2.5-flash,0.0218,0.1325,0.7077,0.0,0.0,0.0,0.4321,0.0,0.0,0.7294,0.0,1.0,0.0,1.0,0.0012,0.0,0.3601,0.0
wikidata,qawiki,?A3∅A4,gpt-5-nano,1.0,1.0,1.0,0.0,0.997,0.9888,1.0,0.0,0.993,1.0,0.9928,1.0,1.0,1.0,1.0,0.0008,1.0,0.7142
wikidata,qawiki,?A3∅A4,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,qawiki,?A3∅A4,gemini-2.5-pro,0.6985,0.9597,0.9981,0.0,0.0215,0.0016,0.992,0.0,0.0045,0.9997,0.0099,0.9997,0.0,1.0,1.0,0.0,0.9931,0.0
wikidata,qawiki,?A3∅A4,gpt-5-mini,1.0,1.0,1.0,0.0147,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.9999,1.0,1.0,1.0,1.0,1.0
wikidata,qawiki,?A3∅A4,o3,0.0586,0.2858,0.8519,0.0,0.0,0.0,0.6321,0.0,0.0,0.8852,0.0,0.7634,0.0,1.0,0.0168,0.0,1.0,0.0
wikidata,qawiki,?A3∅A4,gpt-5,1.0,1.0,1.0,0.0,0.9947,0.9748,1.0,0.0,0.9843,1.0,0.9875,1.0,0.4253,1.0,1.0,0.0001,1.0,1.0
wikidata,qawiki,?A4=A1|3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,qawiki,?A4=A1|3,gpt-oss:20b,0.0,1.0,0.0262,0.0001,0.0001,0.0835,0.0021,0.0,0.1796,0.9978,0.9252,0.9622,0.5881,1.0,0.9738,0.5,0.895,0.9423
wikidata,qawiki,?A4=A1|3,gpt-4.1-nano-2025-04-14,0.0156,0.9915,1.0,0.0625,0.1445,0.8491,0.3633,0.0156,0.9539,1.0,0.9998,0.9999,0.9962,1.0,1.0,0.9867,0.9998,0.9999
wikidata,qawiki,?A4=A1|3,mistral-small:24b,0.5,1.0,0.9922,1.0,0.875,0.999,0.9688,0.5,0.9998,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,qawiki,?A4=A1|3,llama3.1:70b,0.25,1.0,0.9648,0.5,1.0,0.998,0.8906,0.25,0.9995,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,qawiki,?A4=A1|3,gemini-2.0-flash,0.002,0.9682,0.3036,0.0107,0.0195,1.0,0.1133,0.002,0.7728,0.9999,0.9998,0.9992,0.9608,1.0,0.9983,0.9519,0.9967,0.997
wikidata,qawiki,?A4=A1|3,gpt-4.1-mini-2025-04-14,0.0625,0.9997,0.8555,0.1875,0.3438,0.9673,1.0,0.0625,0.9941,1.0,1.0,1.0,0.9993,1.0,1.0,0.9995,1.0,1.0
wikidata,qawiki,?A4=A1|3,gpt-4o,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,qawiki,?A4=A1|3,gpt-4.1-2025-04-14,0.0005,0.9165,0.1334,0.0032,0.0059,0.4018,0.0327,0.0005,1.0,0.9999,0.9919,0.9987,0.9408,1.0,0.998,0.8811,0.9915,0.9947
wikidata,qawiki,?A4=A1|3,grok-3-mini,0.0,0.0096,0.0001,0.0,0.0,0.0005,0.0,0.0,0.0008,1.0,0.3136,0.345,0.0133,1.0,0.3388,0.0038,0.1537,0.1662
wikidata,qawiki,?A4=A1|3,deepseek-chat,0.0,0.1405,0.0008,0.0,0.0,0.0013,0.0001,0.0,0.0214,0.7912,1.0,0.6321,0.1405,1.0,0.6254,0.0925,0.434,0.5
wikidata,qawiki,?A4=A1|3,gemini-2.5-flash,0.0,0.0843,0.0005,0.0,0.0,0.003,0.0,0.0,0.0053,0.7878,0.5,1.0,0.0669,1.0,0.5747,0.0176,0.3318,0.4194
wikidata,qawiki,?A4=A1|3,gpt-5-nano,0.0,0.5881,0.0154,0.0001,0.0003,0.0946,0.0038,0.0,0.1509,0.9964,0.9252,0.9738,1.0,1.0,0.9793,0.5,0.9283,0.9423
wikidata,qawiki,?A4=A1|3,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,qawiki,?A4=A1|3,gemini-2.5-pro,0.0,0.0669,0.0003,0.0,0.0,0.0053,0.0,0.0,0.0073,0.7976,0.5,0.5747,0.0577,1.0,1.0,0.0318,0.3238,0.3872
wikidata,qawiki,?A4=A1|3,gpt-5-mini,0.0,0.6762,0.0392,0.0003,0.0005,0.1189,0.0037,0.0,0.2403,0.9993,0.9564,0.9963,0.7095,1.0,0.9904,1.0,0.9807,0.9888
wikidata,qawiki,?A4=A1|3,o3,0.0,0.2024,0.0013,0.0,0.0,0.0113,0.0002,0.0,0.0262,0.9242,0.6911,0.8083,0.1662,1.0,0.8204,0.073,1.0,0.6964
wikidata,qawiki,?A4=A1|3,gpt-5,0.0,0.1316,0.0008,0.0,0.0,0.0096,0.0,0.0,0.0173,0.9283,0.6286,0.7294,0.1316,1.0,0.8062,0.0461,0.5,1.0
wikidata,spinach,?A1=A2,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A1=A2,gpt-oss:20b,0.0,1.0,0.4469,1.0,0.0924,0.554,0.029,1.0,0.9993,0.6821,0.4439,0.7601,1.0,1.0,0.3327,1.0,0.9703,1.0
wikidata,spinach,?A1=A2,gpt-4.1-nano-2025-04-14,0.0,0.6556,1.0,1.0,0.2307,0.6673,0.0492,1.0,0.9998,0.7743,0.5573,0.8252,1.0,1.0,0.4427,1.0,0.9721,1.0
wikidata,spinach,?A1=A2,mistral-small:24b,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.9902,0.0001,0.0,0.0,0.0,0.7084,1.0,0.0,0.9369,0.0,0.7646
wikidata,spinach,?A1=A2,llama3.1:70b,0.0,0.9444,0.849,1.0,1.0,0.9564,0.1908,1.0,1.0,0.9481,0.9075,0.9756,1.0,1.0,0.8275,1.0,0.9995,1.0
wikidata,spinach,?A1=A2,gemini-2.0-flash,0.0,0.554,0.4427,1.0,0.0925,1.0,0.0178,1.0,0.9999,0.6821,0.43,0.7975,1.0,1.0,0.2983,1.0,0.9856,1.0
wikidata,spinach,?A1=A2,gpt-4.1-mini-2025-04-14,0.0,0.985,0.9733,1.0,0.8785,0.992,1.0,1.0,1.0,0.9953,0.9822,0.9988,1.0,1.0,0.9638,1.0,1.0,1.0
wikidata,spinach,?A1=A2,gpt-4o,0.0,0.0,0.0,0.0218,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0494,1.0,0.0,0.2557,0.0,0.0871
wikidata,spinach,?A1=A2,gpt-4.1-2025-04-14,0.0,0.0019,0.0006,1.0,0.0,0.0002,0.0,1.0,1.0,0.0028,0.0,0.0032,1.0,1.0,0.0,1.0,0.0769,1.0
wikidata,spinach,?A1=A2,grok-3-mini,0.0,0.4373,0.3258,1.0,0.092,0.4373,0.0106,1.0,0.999,1.0,0.3136,0.6673,1.0,1.0,0.2204,1.0,0.9674,1.0
wikidata,spinach,?A1=A2,deepseek-chat,0.0,0.6641,0.5573,1.0,0.1725,0.7017,0.0362,1.0,1.0,0.7912,1.0,0.8923,1.0,1.0,0.434,1.0,0.9955,1.0
wikidata,spinach,?A1=A2,gemini-2.5-flash,0.0,0.3359,0.2522,1.0,0.0481,0.3089,0.0033,1.0,0.9989,0.4427,0.1885,1.0,1.0,1.0,0.1341,1.0,0.9179,1.0
wikidata,spinach,?A1=A2,gpt-5-nano,0.0,0.0,0.0,0.3919,0.0,0.0,0.0,0.9765,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.8853,0.0,0.6146
wikidata,spinach,?A1=A2,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A1=A2,gemini-2.5-pro,0.0,0.7646,0.6673,1.0,0.2858,0.8115,0.0676,1.0,1.0,0.86,0.6911,0.9231,1.0,1.0,1.0,1.0,0.9995,1.0
wikidata,spinach,?A1=A2,gpt-5-mini,0.0,0.0,0.0,0.111,0.0,0.0,0.0,0.838,0.0,0.0,0.0,0.0,0.1958,1.0,0.0,1.0,0.0,0.2557
wikidata,spinach,?A1=A2,o3,0.0,0.0557,0.049,1.0,0.0014,0.0326,0.0001,1.0,0.9597,0.0662,0.0122,0.14,1.0,1.0,0.0019,1.0,1.0,1.0
wikidata,spinach,?A1=A2,gpt-5,0.0,0.0,0.0,0.3327,0.0,0.0,0.0,0.9519,0.0,0.0,0.0,0.0,0.5,1.0,0.0,0.838,0.0,1.0
wikidata,spinach,?A1=A3+A4,llama3.1:8b,1.0,1.0,0.9807,1.0,0.9999,1.0,0.9988,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A1=A3+A4,gpt-oss:20b,0.0,1.0,0.0022,1.0,0.1481,0.7077,0.0748,1.0,0.939,0.9786,0.889,0.996,1.0,1.0,0.9999,1.0,0.9995,1.0
wikidata,spinach,?A1=A3+A4,gpt-4.1-nano-2025-04-14,0.073,0.9996,1.0,1.0,0.9793,0.9999,0.9054,1.0,1.0,1.0,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A1=A3+A4,mistral-small:24b,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0001,0.0002,0.0,0.0022,0.8987,1.0,0.0247,1.0,0.008,0.9924
wikidata,spinach,?A1=A3+A4,llama3.1:70b,0.0007,0.9186,0.0577,1.0,1.0,0.9738,0.345,1.0,0.998,0.9985,0.9947,0.9999,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A1=A3+A4,gemini-2.0-flash,0.0,0.4278,0.0004,1.0,0.0669,1.0,0.0216,1.0,0.9054,0.9622,0.8595,0.9953,1.0,1.0,0.9999,1.0,0.9996,1.0
wikidata,spinach,?A1=A3+A4,gpt-4.1-mini-2025-04-14,0.0064,0.9646,0.1917,1.0,0.7878,0.9927,1.0,1.0,0.9993,0.9998,0.9989,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A1=A3+A4,gpt-4o,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0031,1.0,0.0,0.3899,0.0,0.0876
wikidata,spinach,?A1=A3+A4,gpt-4.1-2025-04-14,0.0,0.1239,0.0,1.0,0.0073,0.1917,0.0026,1.0,1.0,0.779,0.5643,0.9635,1.0,1.0,0.9989,1.0,0.9953,1.0
wikidata,spinach,?A1=A3+A4,grok-3-mini,0.0,0.0494,0.0,0.9999,0.0045,0.0843,0.0007,1.0,0.3506,1.0,0.3642,0.9123,1.0,1.0,0.9947,1.0,0.9646,1.0
wikidata,spinach,?A1=A3+A4,deepseek-chat,0.0,0.1802,0.0003,1.0,0.0147,0.2366,0.0035,1.0,0.5643,0.7566,1.0,0.9674,1.0,1.0,0.9952,1.0,0.99,1.0
wikidata,spinach,?A1=A3+A4,gemini-2.5-flash,0.0,0.0093,0.0,0.999,0.0002,0.0119,0.0,1.0,0.0717,0.1553,0.0662,1.0,1.0,1.0,0.8659,1.0,0.7077,1.0
wikidata,spinach,?A1=A3+A4,gpt-5-nano,0.0,0.0,0.0,0.1611,0.0,0.0,0.0,0.9987,0.0,0.0,0.0,0.0,1.0,1.0,0.0006,0.9998,0.0001,0.9481
wikidata,spinach,?A1=A3+A4,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A1=A3+A4,gemini-2.5-pro,0.0,0.0004,0.0,0.9851,0.0,0.0003,0.0,1.0,0.0035,0.0147,0.0113,0.2148,0.9997,1.0,1.0,1.0,0.2858,1.0
wikidata,spinach,?A1=A3+A4,gpt-5-mini,0.0,0.0,0.0,0.0001,0.0,0.0,0.0,0.7121,0.0,0.0,0.0,0.0,0.0007,1.0,0.0,1.0,0.0,0.0998
wikidata,spinach,?A1=A3+A4,o3,0.0,0.0017,0.0,0.9956,0.0,0.0017,0.0,1.0,0.0145,0.0748,0.0251,0.4278,1.0,1.0,0.8275,1.0,1.0,1.0
wikidata,spinach,?A1=A3+A4,gpt-5,0.0,0.0,0.0,0.015,0.0,0.0,0.0,0.9456,0.0,0.0,0.0,0.0,0.092,1.0,0.0,0.9459,0.0,1.0
wikidata,spinach,?A1>A3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A1>A3,gpt-oss:20b,0.0,1.0,0.1108,1.0,0.1568,0.9964,0.2983,1.0,0.9999,1.0,0.9951,0.9995,1.0,1.0,0.9997,1.0,0.9978,1.0
wikidata,spinach,?A1>A3,gpt-4.1-nano-2025-04-14,0.0,0.929,1.0,1.0,0.8688,1.0,0.8389,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A1>A3,mistral-small:24b,0.0,0.0,0.0,1.0,0.0,0.0444,0.0,0.9836,0.0973,0.2319,0.0284,0.0432,0.8341,1.0,0.1063,0.9953,0.0619,0.9865
wikidata,spinach,?A1>A3,llama3.1:70b,0.0,0.8963,0.2005,1.0,1.0,0.9999,0.5,1.0,1.0,1.0,0.9995,0.9997,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A1>A3,gemini-2.0-flash,0.0,0.0076,0.0,0.9732,0.0003,1.0,0.0006,0.9996,0.7693,0.9369,0.5,0.6146,0.9982,1.0,0.7796,1.0,0.6196,1.0
wikidata,spinach,?A1>A3,gpt-4.1-mini-2025-04-14,0.0,0.7865,0.2399,1.0,0.6321,0.9998,1.0,1.0,1.0,1.0,0.9996,0.9998,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A1>A3,gpt-4o,0.0,0.0,0.0,0.0325,0.0,0.0008,0.0,1.0,0.0037,0.0128,0.0005,0.0007,0.1791,1.0,0.003,0.7288,0.0016,0.6061
wikidata,spinach,?A1>A3,gpt-4.1-2025-04-14,0.0,0.0004,0.0,0.9375,0.0,0.3294,0.0,0.9981,1.0,0.8316,0.2757,0.3877,0.9888,1.0,0.5679,0.9999,0.3679,0.9997
wikidata,spinach,?A1>A3,grok-3-mini,0.0,0.0001,0.0,0.8358,0.0,0.111,0.0,0.993,0.2612,1.0,0.0871,0.1611,0.9568,1.0,0.28,0.9992,0.1611,0.9989
wikidata,spinach,?A1>A3,deepseek-chat,0.0,0.0102,0.0,0.984,0.0013,0.6224,0.0011,0.9998,0.8144,0.9519,1.0,0.6641,0.9985,1.0,0.8042,1.0,0.6641,1.0
wikidata,spinach,?A1>A3,gemini-2.5-flash,0.0,0.0015,0.0,0.9751,0.0008,0.5,0.0004,0.9997,0.7159,0.8987,0.4439,1.0,0.9995,1.0,0.705,1.0,0.554,1.0
wikidata,spinach,?A1>A3,gpt-5-nano,0.0,0.0,0.0,0.2442,0.0,0.0041,0.0,0.8815,0.0207,0.071,0.0034,0.0015,1.0,1.0,0.0207,0.9795,0.0077,0.9414
wikidata,spinach,?A1>A3,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A1>A3,gemini-2.5-pro,0.0,0.0009,0.0,0.9297,0.0001,0.322,0.0001,0.9985,0.5679,0.8092,0.2841,0.3939,0.9888,1.0,1.0,0.9999,0.3601,0.9998
wikidata,spinach,?A1>A3,gpt-5-mini,0.0,0.0,0.0,0.0106,0.0,0.0,0.0,0.3804,0.0003,0.0018,0.0001,0.0,0.0448,1.0,0.0003,1.0,0.0001,0.4373
wikidata,spinach,?A1>A3,o3,0.0,0.0049,0.0,0.9608,0.0002,0.5,0.0001,0.9992,0.7502,0.8987,0.4439,0.554,0.9962,1.0,0.7634,1.0,1.0,1.0
wikidata,spinach,?A1>A3,gpt-5,0.0,0.0,0.0,0.0267,0.0,0.0001,0.0,0.5,0.0007,0.0027,0.0,0.0,0.1055,1.0,0.0005,0.6821,0.0001,1.0
wikidata,spinach,?A1>A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A1>A4,gpt-oss:20b,0.0,1.0,0.9996,1.0,0.0124,0.9898,0.046,1.0,0.9476,0.9991,0.9998,1.0,1.0,1.0,0.9918,1.0,0.9955,1.0
wikidata,spinach,?A1>A4,gpt-4.1-nano-2025-04-14,0.0,0.0011,1.0,0.9639,0.0,0.1958,0.0,1.0,0.0427,0.5,0.7865,0.9639,1.0,1.0,0.2135,1.0,0.2442,1.0
wikidata,spinach,?A1>A4,mistral-small:24b,0.0,0.0,0.0598,1.0,0.0,0.011,0.0,0.9997,0.0031,0.0596,0.1488,0.5469,0.9987,1.0,0.0177,1.0,0.0199,0.9968
wikidata,spinach,?A1>A4,llama3.1:70b,0.0,0.994,1.0,1.0,1.0,1.0,0.72,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A1>A4,gemini-2.0-flash,0.0,0.0198,0.8736,0.9942,0.0,1.0,0.0,1.0,0.2307,0.8252,0.9593,0.9984,1.0,1.0,0.5522,1.0,0.6123,1.0
wikidata,spinach,?A1>A4,gpt-4.1-mini-2025-04-14,0.0,0.9756,1.0,1.0,0.3854,1.0,1.0,1.0,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A1>A4,gpt-4o,0.0,0.0,0.0,0.0009,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0044,0.3294,1.0,0.0,0.7388,0.0,0.28
wikidata,spinach,?A1>A4,gpt-4.1-2025-04-14,0.0,0.0885,0.9778,0.9984,0.0,0.849,0.0004,1.0,1.0,0.9703,0.9897,0.9999,1.0,1.0,0.8659,1.0,0.8785,1.0
wikidata,spinach,?A1>A4,grok-3-mini,0.0,0.0022,0.6081,0.9626,0.0,0.2522,0.0,1.0,0.0557,1.0,0.8091,0.988,1.0,1.0,0.2399,1.0,0.2879,1.0
wikidata,spinach,?A1>A4,deepseek-chat,0.0,0.0005,0.2983,0.9037,0.0,0.0704,0.0,1.0,0.0192,0.2662,1.0,0.9076,1.0,1.0,0.0973,1.0,0.1142,0.9999
wikidata,spinach,?A1>A4,gemini-2.5-flash,0.0,0.0,0.0598,0.5469,0.0,0.0038,0.0,0.9978,0.0002,0.0244,0.1446,1.0,0.9964,1.0,0.0032,0.9999,0.0039,0.9981
wikidata,spinach,?A1>A4,gpt-5-nano,0.0,0.0,0.0,0.0033,0.0,0.0,0.0,0.7693,0.0,0.0,0.0001,0.0076,1.0,1.0,0.0,0.932,0.0,0.5
wikidata,spinach,?A1>A4,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A1>A4,gemini-2.5-pro,0.0,0.0166,0.8554,0.9897,0.0,0.5522,0.0001,1.0,0.2148,0.8389,0.9375,0.9987,1.0,1.0,1.0,1.0,0.6445,1.0
wikidata,spinach,?A1>A4,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3746,0.0,0.0,0.0,0.0004,0.1325,1.0,0.0,1.0,0.0,0.1341
wikidata,spinach,?A1>A4,o3,0.0,0.0099,0.8341,0.9885,0.0,0.5,0.0,1.0,0.1908,0.7995,0.926,0.9984,1.0,1.0,0.5,1.0,1.0,1.0
wikidata,spinach,?A1>A4,gpt-5,0.0,0.0,0.0,0.0072,0.0,0.0,0.0,0.8092,0.0,0.0,0.0002,0.0047,0.6196,1.0,0.0,0.9231,0.0,1.0
wikidata,spinach,?A3∅A4,llama3.1:8b,1.0,1.0,0.996,0.0,0.5612,0.3327,0.9997,0.0,0.4487,1.0,0.3294,1.0,0.181,1.0,0.9996,0.0092,1.0,0.3177
wikidata,spinach,?A3∅A4,gpt-oss:20b,0.0,1.0,0.0427,0.0,0.0,0.0,0.14,0.0,0.0,0.7878,0.0,0.9075,0.0,1.0,0.1725,0.0,0.9904,0.0
wikidata,spinach,?A3∅A4,gpt-4.1-nano-2025-04-14,0.0093,0.9778,1.0,0.0,0.0093,0.0032,0.8519,0.0,0.0102,0.9943,0.001,0.999,0.0033,1.0,0.8597,0.0,1.0,0.0092
wikidata,spinach,?A3∅A4,mistral-small:24b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.004,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9985,1.0,1.0
wikidata,spinach,?A3∅A4,llama3.1:70b,0.5612,1.0,0.996,0.0,1.0,0.3294,0.9999,0.0,0.4449,1.0,0.322,1.0,0.1841,1.0,0.9994,0.0092,1.0,0.3222
wikidata,spinach,?A3∅A4,gemini-2.0-flash,0.7646,1.0,0.9987,0.0,0.7693,1.0,1.0,0.0,0.6673,1.0,0.5627,1.0,0.3222,1.0,0.9999,0.0192,1.0,0.5
wikidata,spinach,?A3∅A4,gpt-4.1-mini-2025-04-14,0.0008,0.9179,0.2434,0.0,0.0005,0.0002,1.0,0.0,0.0004,0.9734,0.0,0.9955,0.0002,1.0,0.6641,0.0,0.9997,0.0012
wikidata,spinach,?A3∅A4,gpt-4o,1.0,1.0,1.0,0.9986,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A3∅A4,gpt-4.1-2025-04-14,0.6506,1.0,0.9951,0.0,0.6611,0.4427,0.9999,0.0,1.0,1.0,0.4478,1.0,0.2135,1.0,0.9999,0.006,1.0,0.3939
wikidata,spinach,?A3∅A4,grok-3-mini,0.0,0.345,0.0129,0.0,0.0,0.0,0.0541,0.0,0.0,1.0,0.0,0.8083,0.0,1.0,0.061,0.0,0.9793,0.0
wikidata,spinach,?A3∅A4,deepseek-chat,0.7693,1.0,0.9997,0.0,0.7796,0.5627,1.0,0.0,0.653,1.0,1.0,1.0,0.3323,1.0,0.9999,0.0347,1.0,0.5
wikidata,spinach,?A3∅A4,gemini-2.5-flash,0.0,0.1725,0.0027,0.0,0.0,0.0,0.0122,0.0,0.0,0.3318,0.0,1.0,0.0,1.0,0.0214,0.0,0.9283,0.0
wikidata,spinach,?A3∅A4,gpt-5-nano,0.8728,1.0,0.9983,0.0,0.8698,0.7557,0.9999,0.0,0.8554,1.0,0.7423,1.0,1.0,1.0,1.0,0.0121,1.0,0.8365
wikidata,spinach,?A3∅A4,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A3∅A4,gemini-2.5-pro,0.0011,0.9075,0.2094,0.0,0.0016,0.0002,0.4439,0.0,0.0002,0.9739,0.0002,0.9919,0.0,1.0,1.0,0.0,0.9999,0.0
wikidata,spinach,?A3∅A4,gpt-5-mini,0.9952,1.0,1.0,0.004,0.9952,0.9897,1.0,0.0,0.9973,1.0,0.9789,1.0,0.9959,1.0,1.0,1.0,1.0,0.9997
wikidata,spinach,?A3∅A4,o3,0.0,0.0261,0.0001,0.0,0.0,0.0,0.0009,0.0,0.0,0.0577,0.0,0.1662,0.0,1.0,0.0005,0.0,1.0,0.0
wikidata,spinach,?A3∅A4,gpt-5,0.7617,1.0,0.9952,0.0,0.7557,0.595,0.9995,0.0,0.705,1.0,0.5868,1.0,0.2786,1.0,1.0,0.0012,1.0,1.0
wikidata,spinach,?A4=A1|3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A4=A1|3,gpt-oss:20b,0.0,1.0,0.0002,0.0,0.0002,0.2706,0.0096,0.0,0.7294,0.9953,0.6321,0.9997,0.9616,1.0,1.0,0.968,0.9999,0.9993
wikidata,spinach,?A4=A1|3,gpt-4.1-nano-2025-04-14,0.125,1.0,1.0,0.125,0.6562,0.9999,0.9648,0.6562,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A4=A1|3,mistral-small:24b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A4=A1|3,llama3.1:70b,0.125,1.0,0.6562,0.125,1.0,1.0,1.0,0.6875,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A4=A1|3,gemini-2.0-flash,0.0,0.8463,0.0012,0.0,0.0005,1.0,0.0112,0.0001,0.9713,0.9997,0.9054,1.0,0.9887,1.0,1.0,0.9967,1.0,1.0
wikidata,spinach,?A4=A1|3,gpt-4.1-mini-2025-04-14,0.0078,0.997,0.1445,0.0078,0.0625,0.9983,1.0,0.1445,0.9999,1.0,0.9998,1.0,0.9999,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A4=A1|3,gpt-4o,0.125,1.0,0.6562,0.125,0.6875,1.0,0.9648,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A4=A1|3,gpt-4.1-2025-04-14,0.0,0.4194,0.0,0.0,0.0,0.0898,0.0007,0.0,1.0,0.9855,0.5,0.9996,0.8811,1.0,1.0,0.9242,0.9998,0.9992
wikidata,spinach,?A4=A1|3,grok-3-mini,0.0,0.0145,0.0,0.0,0.0,0.0012,0.0,0.0,0.0378,1.0,0.0307,0.9599,0.1189,1.0,0.99,0.2403,0.932,0.8675
wikidata,spinach,?A4=A1|3,deepseek-chat,0.0,0.5,0.0001,0.0,0.0,0.1917,0.0013,0.0,0.6445,0.9879,1.0,0.9995,0.8519,1.0,0.9998,0.9186,0.9989,0.9968
wikidata,spinach,?A4=A1|3,gemini-2.5-flash,0.0,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.0013,0.0814,0.0014,1.0,0.0041,1.0,0.7443,0.0096,0.4225,0.2983
wikidata,spinach,?A4=A1|3,gpt-5-nano,0.0,0.1051,0.0,0.0,0.0,0.032,0.0003,0.0,0.2403,0.9519,0.2434,0.9988,1.0,1.0,1.0,0.8062,0.9999,0.9964
wikidata,spinach,?A4=A1|3,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,spinach,?A4=A1|3,gemini-2.5-pro,0.0,0.0001,0.0,0.0,0.0,0.0,0.0,0.0,0.0001,0.0251,0.0005,0.3714,0.0003,1.0,1.0,0.0012,0.221,0.105
wikidata,spinach,?A4=A1|3,gpt-5-mini,0.0,0.0758,0.0,0.0,0.0,0.0113,0.0001,0.0,0.1537,0.8811,0.1481,0.997,0.3872,1.0,0.9997,1.0,0.9988,0.9936
wikidata,spinach,?A4=A1|3,o3,0.0,0.0004,0.0,0.0,0.0,0.0,0.0,0.0,0.0008,0.1325,0.0032,0.7214,0.0012,1.0,0.8761,0.0064,1.0,0.377
wikidata,spinach,?A4=A1|3,gpt-5,0.0,0.0023,0.0,0.0,0.0,0.0001,0.0,0.0,0.003,0.2291,0.0083,0.8115,0.0133,1.0,0.9534,0.0245,0.8281,1.0
wikidata,synthetic,?A1=A2,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,synthetic,?A1=A2,gpt-oss:20b,0.0,1.0,0.9405,1.0,0.9129,1.0,0.8316,1.0,1.0,0.9998,1.0,0.849,1.0,1.0,0.9973,1.0,1.0,1.0
wikidata,synthetic,?A1=A2,gpt-4.1-nano-2025-04-14,0.0,0.1013,1.0,1.0,0.4373,0.9981,0.2879,1.0,1.0,0.9686,0.9957,0.3294,1.0,1.0,0.9196,1.0,0.9998,1.0
wikidata,synthetic,?A1=A2,mistral-small:24b,0.0,0.0,0.0,1.0,0.0,0.0017,0.0,1.0,0.2693,0.0001,0.0017,0.0,0.6043,1.0,0.0,0.1871,0.0172,0.6026
wikidata,synthetic,?A1=A2,llama3.1:70b,0.0,0.1456,0.6821,1.0,1.0,1.0,0.3877,1.0,1.0,0.9953,0.9999,0.434,1.0,1.0,0.9734,1.0,1.0,1.0
wikidata,synthetic,?A1=A2,gemini-2.0-flash,0.0,0.0001,0.0047,0.9992,0.0002,1.0,0.0003,1.0,0.9994,0.1958,0.5679,0.0001,0.9997,1.0,0.0662,0.9812,0.8945,0.9999
wikidata,synthetic,?A1=A2,gpt-4.1-mini-2025-04-14,0.0,0.2612,0.7995,1.0,0.7159,0.9999,1.0,1.0,1.0,0.9982,0.9999,0.6224,1.0,1.0,0.9887,1.0,1.0,1.0
wikidata,synthetic,?A1=A2,gpt-4o,0.0,0.0,0.0,0.0001,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0002,1.0,0.0,0.0,0.0,0.0003
wikidata,synthetic,?A1=A2,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.8055,0.0,0.0018,0.0,1.0,1.0,0.0002,0.0018,0.0,0.8395,1.0,0.0,0.4566,0.0395,0.8432
wikidata,synthetic,?A1=A2,grok-3-mini,0.0,0.0005,0.0556,1.0,0.0119,0.8853,0.0048,1.0,1.0,1.0,0.8853,0.0047,1.0,1.0,0.322,0.9978,0.9822,1.0
wikidata,synthetic,?A1=A2,deepseek-chat,0.0,0.0001,0.0092,0.9992,0.0005,0.5679,0.0003,1.0,0.9994,0.1958,1.0,0.0004,0.9997,1.0,0.0821,0.9824,0.8736,0.9999
wikidata,synthetic,?A1=A2,gemini-2.5-flash,0.0,0.2307,0.7693,1.0,0.6911,1.0,0.5,1.0,1.0,0.9983,0.9999,1.0,1.0,1.0,0.9862,1.0,1.0,1.0
wikidata,synthetic,?A1=A2,gpt-5-nano,0.0,0.0,0.0,0.5,0.0,0.0007,0.0,0.9999,0.2285,0.0,0.0007,0.0,1.0,1.0,0.0,0.1312,0.0093,0.5573
wikidata,synthetic,?A1=A2,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,synthetic,?A1=A2,gemini-2.5-pro,0.0,0.0063,0.1312,1.0,0.0541,0.9674,0.0244,1.0,1.0,0.7796,0.9558,0.0298,1.0,1.0,1.0,0.9996,0.9967,1.0
wikidata,synthetic,?A1=A2,gpt-5-mini,0.0,0.0,0.0001,0.8736,0.0,0.0315,0.0,1.0,0.6282,0.0044,0.0299,0.0,0.9196,1.0,0.0009,1.0,0.1175,0.9
wikidata,synthetic,?A1=A2,o3,0.0,0.0,0.0004,0.9907,0.0,0.1744,0.0,1.0,0.98,0.0362,0.1958,0.0,0.9954,1.0,0.008,0.923,1.0,0.999
wikidata,synthetic,?A1=A2,gpt-5,0.0,0.0,0.0,0.5,0.0,0.0002,0.0,0.9999,0.225,0.0,0.0003,0.0,0.5573,1.0,0.0,0.1528,0.0028,1.0
wikidata,synthetic,?A1=A3+A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,synthetic,?A1=A3+A4,gpt-oss:20b,0.0,1.0,0.0004,0.9994,0.0015,0.2434,0.0,1.0,0.9519,0.9919,0.3714,0.7502,1.0,1.0,0.9825,1.0,0.9978,1.0
wikidata,synthetic,?A1=A3+A4,gpt-4.1-nano-2025-04-14,0.0001,0.9999,1.0,1.0,0.7383,0.9977,0.4119,1.0,1.0,1.0,0.9991,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,synthetic,?A1=A3+A4,mistral-small:24b,0.0,0.0016,0.0,1.0,0.0,0.0003,0.0,1.0,0.0703,0.1034,0.0007,0.0093,0.7277,1.0,0.0855,0.926,0.2443,0.9956
wikidata,synthetic,?A1=A3+A4,llama3.1:70b,0.0,0.9996,0.4159,1.0,1.0,0.997,0.2617,1.0,1.0,1.0,0.9988,0.9999,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,synthetic,?A1=A3+A4,gemini-2.0-flash,0.0,0.8519,0.0068,0.9999,0.0096,1.0,0.0017,1.0,0.9977,0.9987,0.6864,0.9179,1.0,1.0,0.9975,1.0,0.9999,1.0
wikidata,synthetic,?A1=A3+A4,gpt-4.1-mini-2025-04-14,0.0002,1.0,0.7483,1.0,0.8569,0.9996,1.0,1.0,1.0,1.0,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,synthetic,?A1=A3+A4,gpt-4o,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0002,1.0,0.0,0.001,0.0,0.0546
wikidata,synthetic,?A1=A3+A4,gpt-4.1-2025-04-14,0.0,0.0871,0.0,0.9556,0.0,0.0068,0.0,1.0,1.0,0.678,0.0266,0.162,0.994,1.0,0.6224,0.9992,0.9002,1.0
wikidata,synthetic,?A1=A3+A4,grok-3-mini,0.0,0.0214,0.0,0.9323,0.0,0.0038,0.0,1.0,0.4388,1.0,0.0178,0.0814,0.9901,1.0,0.5,0.9989,0.8852,1.0
wikidata,synthetic,?A1=A3+A4,deepseek-chat,0.0,0.7443,0.003,0.9997,0.0041,0.4357,0.0003,1.0,0.9881,0.992,1.0,0.8721,1.0,1.0,0.9917,1.0,0.9992,1.0
wikidata,synthetic,?A1=A3+A4,gemini-2.5-flash,0.0,0.3679,0.0,0.9952,0.0004,0.14,0.0,1.0,0.9061,0.9599,0.2088,1.0,0.9998,1.0,0.9283,1.0,0.9943,1.0
wikidata,synthetic,?A1=A3+A4,gpt-5-nano,0.0,0.0,0.0,0.3582,0.0,0.0,0.0,0.9999,0.0124,0.0201,0.0,0.0005,1.0,1.0,0.012,0.889,0.0885,0.9944
wikidata,synthetic,?A1=A3+A4,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,synthetic,?A1=A3+A4,gemini-2.5-pro,0.0,0.0401,0.0,0.945,0.0,0.0069,0.0,1.0,0.5,0.6358,0.0192,0.1279,0.9945,1.0,1.0,0.9996,0.8853,1.0
wikidata,synthetic,?A1=A3+A4,gpt-5-mini,0.0,0.0,0.0,0.1142,0.0,0.0,0.0,0.9995,0.0018,0.0026,0.0,0.0001,0.1802,1.0,0.0011,1.0,0.015,0.9456
wikidata,synthetic,?A1=A3+A4,o3,0.0,0.0058,0.0,0.8222,0.0,0.0005,0.0,1.0,0.1684,0.2122,0.0023,0.0144,0.9476,1.0,0.1958,0.9924,1.0,1.0
wikidata,synthetic,?A1=A3+A4,gpt-5,0.0,0.0,0.0,0.0083,0.0,0.0,0.0,0.9653,0.0,0.0,0.0,0.0,0.0113,1.0,0.0,0.0876,0.0,1.0
wikidata,synthetic,?A1>A3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,synthetic,?A1>A3,gpt-oss:20b,0.0,1.0,0.3015,0.9659,0.1748,0.8042,0.0047,0.9967,0.9994,0.9778,0.8341,0.9923,0.9977,1.0,0.9296,0.9577,0.9984,1.0
wikidata,synthetic,?A1>A3,gpt-4.1-nano-2025-04-14,0.0,0.7825,1.0,0.9915,0.3974,0.9225,0.0462,0.9996,1.0,0.9924,0.935,0.9987,0.9992,1.0,0.9834,0.989,0.9989,1.0
wikidata,synthetic,?A1>A3,mistral-small:24b,0.0,0.0571,0.016,1.0,0.0064,0.1678,0.0002,0.935,0.9124,0.5,0.2111,0.6418,0.8019,1.0,0.3638,0.5469,0.792,0.9853
wikidata,synthetic,?A1>A3,llama3.1:70b,0.0,0.8856,0.6985,0.9968,1.0,0.9756,0.0762,0.9998,1.0,0.9991,0.978,0.9992,0.9999,1.0,0.994,0.9968,0.9999,1.0
wikidata,synthetic,?A1>A3,gemini-2.0-flash,0.0,0.2841,0.1225,0.8858,0.046,1.0,0.0007,0.9877,0.9977,0.8942,0.6081,0.9413,0.9716,1.0,0.7995,0.8825,0.992,0.9998
wikidata,synthetic,?A1>A3,gpt-4.1-mini-2025-04-14,0.0,0.9981,0.9741,0.9999,0.9573,0.9997,1.0,1.0,1.0,1.0,0.9997,1.0,1.0,1.0,1.0,0.9999,1.0,1.0
wikidata,synthetic,?A1>A3,gpt-4o,0.0,0.0062,0.0008,0.1037,0.0005,0.0211,0.0,1.0,0.5,0.1204,0.0238,0.19,0.3222,1.0,0.0619,0.1073,0.3244,0.792
wikidata,synthetic,?A1>A3,gpt-4.1-2025-04-14,0.0,0.0016,0.0001,0.1339,0.0,0.0055,0.0,0.5901,1.0,0.0676,0.0066,0.1562,0.354,1.0,0.0352,0.1339,0.3359,0.8642
wikidata,synthetic,?A1>A3,grok-3-mini,0.0,0.0427,0.015,0.5937,0.0023,0.1659,0.0,0.9173,0.9638,1.0,0.2005,0.7243,0.8554,1.0,0.3776,0.5978,0.8785,0.9948
wikidata,synthetic,?A1>A3,deepseek-chat,0.0,0.2442,0.1037,0.8491,0.0407,0.5,0.0008,0.9862,0.9972,0.8688,1.0,0.9405,0.9721,1.0,0.7517,0.8523,0.978,0.9996
wikidata,synthetic,?A1>A3,gemini-2.5-flash,0.0,0.0164,0.0032,0.4518,0.0018,0.0963,0.0,0.8639,0.9033,0.383,0.1013,1.0,0.7517,1.0,0.2483,0.4518,0.7517,0.9865
wikidata,synthetic,?A1>A3,gpt-5-nano,0.0,0.0052,0.0018,0.2723,0.0004,0.048,0.0,0.7557,0.7338,0.2135,0.049,0.3417,1.0,1.0,0.1102,0.2399,0.554,0.954
wikidata,synthetic,?A1>A3,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,synthetic,?A1>A3,gemini-2.5-pro,0.0,0.1144,0.0314,0.7193,0.0124,0.2879,0.0001,0.9608,0.9818,0.7336,0.3417,0.8296,0.9332,1.0,1.0,0.7441,0.9481,0.9995
wikidata,synthetic,?A1>A3,gpt-5-mini,0.0,0.0677,0.0198,0.5469,0.0064,0.1712,0.0001,0.932,0.9124,0.5,0.208,0.6418,0.8389,1.0,0.347,1.0,0.8209,0.9937
wikidata,synthetic,?A1>A3,o3,0.0,0.0039,0.0023,0.2807,0.0003,0.0178,0.0,0.7528,0.7601,0.1908,0.0407,0.3417,0.554,1.0,0.092,0.2559,1.0,0.9734
wikidata,synthetic,?A1>A3,gpt-5,0.0,0.0001,0.0,0.0266,0.0,0.0004,0.0,0.2807,0.2051,0.0111,0.0009,0.0267,0.0804,1.0,0.0015,0.0135,0.0541,1.0
wikidata,synthetic,?A1>A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,synthetic,?A1>A4,gpt-oss:20b,0.0,1.0,0.9768,0.9915,0.2629,0.9864,0.0361,1.0,0.9996,1.0,0.9989,0.9863,1.0,1.0,0.9998,1.0,0.9957,1.0
wikidata,synthetic,?A1>A4,gpt-4.1-nano-2025-04-14,0.0,0.0408,1.0,0.722,0.0043,0.653,0.0001,1.0,0.9405,0.9756,0.8472,0.6009,0.993,1.0,0.926,0.9974,0.7558,1.0
wikidata,synthetic,?A1>A4,mistral-small:24b,0.0,0.016,0.362,1.0,0.002,0.455,0.0001,1.0,0.792,0.8582,0.68,0.4122,0.9751,1.0,0.7801,0.993,0.545,1.0
wikidata,synthetic,?A1>A4,llama3.1:70b,0.0,0.8129,0.9981,0.9991,1.0,0.9994,0.1215,1.0,1.0,1.0,1.0,0.9987,1.0,1.0,0.9999,1.0,0.9998,1.0
wikidata,synthetic,?A1>A4,gemini-2.0-flash,0.0,0.0249,0.4478,0.6328,0.0016,1.0,0.0,1.0,0.9129,0.9413,0.8042,0.5,0.984,1.0,0.9033,0.9952,0.6506,1.0
wikidata,synthetic,?A1>A4,gpt-4.1-mini-2025-04-14,0.0001,0.9802,0.9999,1.0,0.9281,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,synthetic,?A1>A4,gpt-4o,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0001,1.0,0.0,0.0001,0.0,0.1312
wikidata,synthetic,?A1>A4,gpt-4.1-2025-04-14,0.0,0.0009,0.1013,0.2807,0.0,0.1456,0.0,1.0,1.0,0.7243,0.383,0.1488,0.8927,1.0,0.5627,0.9568,0.2483,1.0
wikidata,synthetic,?A1>A4,grok-3-mini,0.0,0.0,0.046,0.1956,0.0,0.0963,0.0,1.0,0.383,1.0,0.2595,0.0632,0.8209,1.0,0.3899,0.9157,0.111,1.0
wikidata,synthetic,?A1>A4,deepseek-chat,0.0,0.0027,0.2213,0.4076,0.0001,0.2841,0.0,1.0,0.7243,0.8169,1.0,0.2442,0.951,1.0,0.7084,0.9793,0.4007,1.0
wikidata,synthetic,?A1>A4,gemini-2.5-flash,0.0,0.0259,0.5,0.6715,0.0031,0.6061,0.0001,1.0,0.9037,0.9648,0.8341,1.0,0.9907,1.0,0.9196,0.9956,0.7017,1.0
wikidata,synthetic,?A1>A4,gpt-5-nano,0.0,0.0,0.0136,0.0432,0.0,0.0284,0.0,1.0,0.1605,0.2559,0.0809,0.0178,1.0,1.0,0.1642,0.7796,0.0385,0.9987
wikidata,synthetic,?A1>A4,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,synthetic,?A1>A4,gemini-2.5-pro,0.0,0.0006,0.1185,0.2906,0.0002,0.1562,0.0,1.0,0.5627,0.7121,0.3919,0.1312,0.8892,1.0,1.0,0.9568,0.2257,1.0
wikidata,synthetic,?A1>A4,gpt-5-mini,0.0,0.0,0.0053,0.0136,0.0,0.0093,0.0,1.0,0.071,0.1302,0.0361,0.0084,0.322,1.0,0.071,1.0,0.016,0.994
wikidata,synthetic,?A1>A4,o3,0.0,0.0092,0.3389,0.545,0.0006,0.4487,0.0,1.0,0.8296,0.9369,0.6927,0.3957,0.9785,1.0,0.8544,0.9915,1.0,1.0
wikidata,synthetic,?A1>A4,gpt-5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9196,0.0001,0.0001,0.0,0.0,0.0031,1.0,0.0,0.0124,0.0,1.0
wikidata,synthetic,?A3∅A4,llama3.1:8b,1.0,1.0,0.9987,0.0001,0.5,0.2285,0.8858,0.0,0.5937,1.0,0.2135,1.0,0.9998,1.0,0.9999,0.9544,1.0,1.0
wikidata,synthetic,?A3∅A4,gpt-oss:20b,0.0001,1.0,0.0769,0.0,0.0,0.0,0.0007,0.0,0.0,0.9506,0.0,0.7017,0.2557,1.0,0.3089,0.0047,0.6962,0.8447
wikidata,synthetic,?A3∅A4,gpt-4.1-nano-2025-04-14,0.0031,0.9597,1.0,0.0,0.0016,0.0002,0.0266,0.0,0.007,0.9977,0.0001,0.9782,0.7949,1.0,0.8296,0.1568,0.973,0.9894
wikidata,synthetic,?A3∅A4,mistral-small:24b,1.0,1.0,1.0,1.0,1.0,0.9995,1.0,0.0098,1.0,1.0,0.9984,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,synthetic,?A3∅A4,llama3.1:70b,0.6043,1.0,0.9994,0.0,1.0,0.2522,0.926,0.0,0.6583,1.0,0.2559,1.0,0.9999,1.0,1.0,0.9799,1.0,1.0
wikidata,synthetic,?A3∅A4,gemini-2.0-flash,0.8395,1.0,0.9999,0.0013,0.8252,1.0,0.9889,0.0,0.9231,1.0,0.5561,1.0,1.0,1.0,1.0,0.9981,1.0,1.0
wikidata,synthetic,?A3∅A4,gpt-4.1-mini-2025-04-14,0.1678,0.9998,0.9881,0.0,0.1185,0.022,1.0,0.0,0.1909,1.0,0.024,0.9998,0.9933,1.0,0.9968,0.7338,0.9999,1.0
wikidata,synthetic,?A3∅A4,gpt-4o,1.0,1.0,1.0,0.996,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,synthetic,?A3∅A4,gpt-4.1-2025-04-14,0.5,1.0,0.9966,0.0,0.446,0.1341,0.8698,0.0,1.0,1.0,0.1748,1.0,1.0,1.0,1.0,0.9648,1.0,1.0
wikidata,synthetic,?A3∅A4,grok-3-mini,0.0,0.1002,0.0057,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.1316,0.0147,1.0,0.0113,0.0,0.1635,0.3601
wikidata,synthetic,?A3∅A4,deepseek-chat,0.8554,1.0,1.0,0.0033,0.8209,0.5561,0.9876,0.0,0.8856,1.0,1.0,1.0,1.0,1.0,1.0,0.9954,1.0,1.0
wikidata,synthetic,?A3∅A4,gemini-2.5-flash,0.0,0.43,0.0442,0.0,0.0,0.0,0.0005,0.0,0.0,0.9423,0.0,1.0,0.1325,1.0,0.1431,0.0004,0.5775,0.7566
wikidata,synthetic,?A3∅A4,gpt-5-nano,0.0004,0.838,0.2916,0.0,0.0002,0.0,0.0137,0.0,0.0002,0.9947,0.0,0.932,1.0,1.0,0.6494,0.01,0.9186,0.9822
wikidata,synthetic,?A3∅A4,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,synthetic,?A3∅A4,gemini-2.5-pro,0.0003,0.7975,0.2483,0.0,0.0,0.0,0.0072,0.0,0.0,0.9967,0.0,0.9331,0.5,1.0,1.0,0.0068,0.9331,0.9552
wikidata,synthetic,?A3∅A4,gpt-5-mini,0.074,0.9981,0.8963,0.0,0.038,0.0045,0.354,0.0,0.0632,1.0,0.0093,0.9999,0.9965,1.0,0.9977,1.0,0.9999,1.0
wikidata,synthetic,?A3∅A4,o3,0.0,0.4321,0.0519,0.0,0.0,0.0,0.0002,0.0,0.0,0.9157,0.0,0.5775,0.1481,1.0,0.1431,0.0005,1.0,0.7502
wikidata,synthetic,?A3∅A4,gpt-5,0.0,0.2498,0.0222,0.0,0.0,0.0,0.0001,0.0,0.0,0.7634,0.0,0.3642,0.0436,1.0,0.0877,0.0001,0.3679,1.0
wikidata,synthetic,?A4=A1|3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,synthetic,?A4=A1|3,gpt-oss:20b,0.0,1.0,0.0002,0.0,0.0,0.0063,0.0,0.0,0.4321,0.9989,0.01,0.7502,0.9825,1.0,0.9947,0.8519,0.9983,1.0
wikidata,synthetic,?A4=A1|3,gpt-4.1-nano-2025-04-14,0.002,1.0,1.0,0.0547,0.377,0.9423,0.3872,0.0547,0.9998,1.0,0.9423,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,synthetic,?A4=A1|3,mistral-small:24b,0.125,1.0,0.9893,1.0,0.9648,0.9997,0.9648,0.75,1.0,1.0,0.9997,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,synthetic,?A4=A1|3,llama3.1:70b,0.0078,1.0,0.8281,0.1445,1.0,0.999,0.623,0.1094,1.0,1.0,0.9968,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,synthetic,?A4=A1|3,gemini-2.0-flash,0.0,0.9981,0.1316,0.0021,0.0107,1.0,0.0384,0.0009,0.9978,1.0,0.6047,0.9987,1.0,1.0,1.0,0.9996,1.0,1.0
wikidata,synthetic,?A4=A1|3,gpt-4.1-mini-2025-04-14,0.0078,1.0,0.8062,0.1445,0.623,0.9894,1.0,0.1094,1.0,1.0,0.9935,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,synthetic,?A4=A1|3,gpt-4o,0.125,1.0,0.9893,0.75,0.9844,0.9999,0.9844,1.0,1.0,1.0,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,synthetic,?A4=A1|3,gpt-4.1-2025-04-14,0.0,0.6962,0.0007,0.0,0.0,0.0085,0.0,0.0,1.0,0.9994,0.0085,0.8761,0.9932,1.0,0.997,0.9252,0.9998,1.0
wikidata,synthetic,?A4=A1|3,grok-3-mini,0.0,0.0035,0.0,0.0,0.0,0.0,0.0,0.0,0.002,1.0,0.0,0.0175,0.2557,1.0,0.3714,0.0307,0.655,0.9998
wikidata,synthetic,?A4=A1|3,deepseek-chat,0.0,0.9965,0.1316,0.0021,0.0193,0.6047,0.0287,0.0009,0.9978,1.0,1.0,0.9991,1.0,1.0,1.0,0.9997,1.0,1.0
wikidata,synthetic,?A4=A1|3,gemini-2.5-flash,0.0,0.3679,0.0,0.0,0.0,0.0038,0.0,0.0,0.221,0.9932,0.003,1.0,0.9338,1.0,0.9635,0.7017,0.9943,1.0
wikidata,synthetic,?A4=A1|3,gpt-5-nano,0.0,0.0401,0.0,0.0,0.0,0.0001,0.0,0.0,0.0175,0.838,0.0,0.1215,1.0,1.0,0.7142,0.1635,0.8853,1.0
wikidata,synthetic,?A4=A1|3,deepseek-reasoner,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
wikidata,synthetic,?A4=A1|3,gemini-2.5-pro,0.0,0.0147,0.0,0.0,0.0,0.0,0.0,0.0,0.0083,0.7443,0.0,0.0717,0.4253,1.0,1.0,0.0758,0.8115,1.0
wikidata,synthetic,?A4=A1|3,gpt-5-mini,0.0,0.2434,0.0,0.0,0.0,0.0013,0.0,0.0,0.1405,0.9879,0.0009,0.43,0.9157,1.0,0.968,1.0,0.99,1.0
wikidata,synthetic,?A4=A1|3,o3,0.0,0.0047,0.0,0.0,0.0,0.0,0.0,0.0,0.0007,0.5,0.0,0.0144,0.1958,1.0,0.2983,0.0251,1.0,0.9998
wikidata,synthetic,?A4=A1|3,gpt-5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0007,0.0,0.0,0.0001,1.0,0.0001,0.0,0.0006,1.0
zero-shot,LC-QuAD,?A1=A2,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,LC-QuAD,?A1=A2,gpt-oss:20b,0.0,1.0,0.7142,1.0,0.0925,0.9638,0.7709,1.0,0.9597,0.9975,0.9995,0.9934,1.0,0.2983,0.9765,1.0,0.9999,1.0
zero-shot,LC-QuAD,?A1=A2,gpt-4.1-nano-2025-04-14,0.0,0.4253,1.0,1.0,0.032,0.9369,0.6494,1.0,0.9283,0.9931,0.9986,0.9917,1.0,0.1808,0.9324,1.0,0.9989,1.0
zero-shot,LC-QuAD,?A1=A2,mistral-small:24b,0.0,0.0,0.0,1.0,0.0,0.0007,0.0,0.8042,0.0003,0.0057,0.0158,0.0063,0.9999,0.0,0.0019,1.0,0.0352,0.9987
zero-shot,LC-QuAD,?A1=A2,llama3.1:70b,0.0001,0.9564,0.9887,1.0,1.0,0.9997,0.9947,1.0,0.9997,1.0,1.0,1.0,1.0,0.8463,0.9998,1.0,1.0,1.0
zero-shot,LC-QuAD,?A1=A2,gemini-2.0-flash,0.0,0.0676,0.111,0.9997,0.0009,1.0,0.14,0.9999,0.5,0.8316,0.9506,0.8256,1.0,0.0053,0.5643,1.0,0.9519,1.0
zero-shot,LC-QuAD,?A1=A2,gpt-4.1-mini-2025-04-14,0.0,0.3555,0.5,1.0,0.0173,0.9179,1.0,1.0,0.9186,0.9959,0.9978,0.9917,1.0,0.1481,0.9338,1.0,0.9992,1.0
zero-shot,LC-QuAD,?A1=A2,gpt-4o,0.0,0.0,0.0,0.2841,0.0,0.0003,0.0,1.0,0.0001,0.0025,0.0028,0.0035,0.9999,0.0,0.0004,1.0,0.0092,0.9981
zero-shot,LC-QuAD,?A1=A2,gpt-4.1-2025-04-14,0.0,0.0769,0.1279,0.9999,0.0011,0.6358,0.1481,1.0,1.0,0.8785,0.9822,0.8785,1.0,0.0063,0.6254,1.0,0.9765,1.0
zero-shot,LC-QuAD,?A1=A2,grok-3-mini,0.0,0.0069,0.0168,0.9977,0.0,0.2612,0.0121,0.9989,0.2025,1.0,0.8115,0.5627,1.0,0.0011,0.2664,1.0,0.7796,1.0
zero-shot,LC-QuAD,?A1=A2,deepseek-chat,0.0,0.0014,0.004,0.9931,0.0,0.0939,0.0058,0.999,0.0436,0.2983,1.0,0.322,1.0,0.0001,0.1163,1.0,0.6321,1.0
zero-shot,LC-QuAD,?A1=A2,gemini-2.5-flash,0.0,0.0147,0.0192,0.9973,0.0001,0.2664,0.0205,0.9983,0.2025,0.5627,0.7796,1.0,1.0,0.0003,0.2434,1.0,0.8256,1.0
zero-shot,LC-QuAD,?A1=A2,gpt-5-nano,0.0,0.0,0.0,0.0001,0.0,0.0,0.0,0.0004,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.8519,0.0,0.2005
zero-shot,LC-QuAD,?A1=A2,deepseek-reasoner,0.0,0.8115,0.8998,1.0,0.2706,0.9983,0.9186,1.0,0.9981,0.9997,1.0,0.9999,1.0,1.0,0.9983,1.0,1.0,1.0
zero-shot,LC-QuAD,?A1=A2,gemini-2.5-pro,0.0,0.0494,0.1163,0.9992,0.0007,0.5643,0.1215,0.9998,0.5,0.8256,0.9324,0.8519,1.0,0.0053,1.0,1.0,0.9324,1.0
zero-shot,LC-QuAD,?A1=A2,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2434,0.0,0.0,1.0,0.0,0.0704
zero-shot,LC-QuAD,?A1=A2,o3,0.0,0.0003,0.0032,0.9818,0.0,0.0871,0.0025,0.9957,0.0494,0.322,0.5,0.2664,1.0,0.0001,0.1163,1.0,1.0,1.0
zero-shot,LC-QuAD,?A1=A2,gpt-5,0.0,0.0,0.0,0.0028,0.0,0.0,0.0,0.0045,0.0,0.0,0.0,0.0,0.8688,0.0,0.0,0.9593,0.0,1.0
zero-shot,LC-QuAD,?A1=A3+A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,LC-QuAD,?A1=A3+A4,gpt-oss:20b,0.0,1.0,0.6494,0.9977,0.0466,0.2024,0.1316,0.9999,0.2706,0.9919,0.8998,0.9564,1.0,0.5,0.9599,1.0,0.8761,1.0
zero-shot,LC-QuAD,?A1=A3+A4,gpt-4.1-nano-2025-04-14,0.0,0.5,1.0,0.9983,0.0262,0.1635,0.0835,1.0,0.2122,0.9765,0.8675,0.9061,1.0,0.4278,0.9179,1.0,0.7852,1.0
zero-shot,LC-QuAD,?A1=A3+A4,mistral-small:24b,0.0,0.0057,0.0047,1.0,0.0,0.0001,0.0001,0.7975,0.0005,0.2399,0.0365,0.1058,1.0,0.0055,0.1312,1.0,0.0395,0.9238
zero-shot,LC-QuAD,?A1=A3+A4,llama3.1:70b,0.0039,0.9827,0.9915,1.0,1.0,0.9102,0.8867,1.0,0.9165,1.0,0.9999,0.9997,1.0,0.9846,0.9999,1.0,0.9988,1.0
zero-shot,LC-QuAD,?A1=A3+A4,gemini-2.0-flash,0.0002,0.895,0.9157,1.0,0.212,1.0,0.5,1.0,0.7095,0.9997,0.9904,0.9959,1.0,0.8684,0.9981,1.0,0.9855,1.0
zero-shot,LC-QuAD,?A1=A3+A4,gpt-4.1-mini-2025-04-14,0.0005,0.9423,0.9682,1.0,0.2744,0.6612,1.0,1.0,0.7383,0.9998,0.9978,0.9974,1.0,0.9165,0.9988,1.0,0.9879,1.0
zero-shot,LC-QuAD,?A1=A3+A4,gpt-4o,0.0,0.0005,0.0002,0.3089,0.0,0.0001,0.0,1.0,0.0002,0.1185,0.0129,0.0462,0.9999,0.0009,0.0619,1.0,0.0135,0.8544
zero-shot,LC-QuAD,?A1=A3+A4,gpt-4.1-2025-04-14,0.0001,0.8463,0.8852,0.9999,0.1796,0.5,0.4159,1.0,1.0,0.9995,0.9855,0.9919,1.0,0.7878,0.9959,1.0,0.9739,1.0
zero-shot,LC-QuAD,?A1=A3+A4,grok-3-mini,0.0,0.0214,0.0494,0.8389,0.0002,0.0012,0.0007,0.926,0.0019,1.0,0.1885,0.2923,1.0,0.0175,0.3555,1.0,0.1405,0.9934
zero-shot,LC-QuAD,?A1=A3+A4,deepseek-chat,0.0,0.1808,0.2291,0.9832,0.0007,0.0261,0.0085,0.9943,0.0378,0.8923,1.0,0.6962,1.0,0.1405,0.7566,1.0,0.5,0.9997
zero-shot,LC-QuAD,?A1=A3+A4,gemini-2.5-flash,0.0,0.0925,0.162,0.9368,0.0012,0.0121,0.0081,0.9741,0.0214,0.8192,0.4321,1.0,1.0,0.0392,0.655,1.0,0.3679,0.9987
zero-shot,LC-QuAD,?A1=A3+A4,gpt-5-nano,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0002,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.8721,0.0,0.0018
zero-shot,LC-QuAD,?A1=A3+A4,deepseek-reasoner,0.0,0.6682,0.7077,0.9977,0.0481,0.2517,0.1796,0.9997,0.345,0.9932,0.9252,0.9867,1.0,1.0,0.9887,1.0,0.9157,1.0
zero-shot,LC-QuAD,?A1=A3+A4,gemini-2.5-pro,0.0,0.0814,0.14,0.9196,0.0005,0.0063,0.0041,0.9639,0.0121,0.7709,0.3642,0.5,1.0,0.032,1.0,1.0,0.2617,0.9985
zero-shot,LC-QuAD,?A1=A3+A4,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2088,0.0,0.0,1.0,0.0,0.0002
zero-shot,LC-QuAD,?A1=A3+A4,o3,0.0,0.221,0.3179,0.98,0.0047,0.0378,0.0307,0.9937,0.061,0.9252,0.6321,0.7502,1.0,0.1635,0.8569,1.0,1.0,0.9998
zero-shot,LC-QuAD,?A1=A3+A4,gpt-5,0.0,0.0,0.0001,0.1264,0.0,0.0,0.0,0.2257,0.0,0.0147,0.0008,0.0033,0.9994,0.0,0.004,1.0,0.0006,1.0
zero-shot,LC-QuAD,?A1>A3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,LC-QuAD,?A1>A3,gpt-oss:20b,0.0,1.0,0.9444,0.9998,0.2712,0.967,0.9887,1.0,0.9934,0.9998,0.9998,0.9999,1.0,0.988,0.9973,1.0,1.0,1.0
zero-shot,LC-QuAD,?A1>A3,gpt-4.1-nano-2025-04-14,0.0,0.0924,1.0,0.9876,0.0201,0.5497,0.6955,0.9996,0.7586,0.9663,0.951,0.9876,1.0,0.7405,0.8736,1.0,0.9888,0.9999
zero-shot,LC-QuAD,?A1>A3,mistral-small:24b,0.0,0.0005,0.024,1.0,0.0,0.0249,0.0517,0.9155,0.0909,0.405,0.3073,0.545,0.9997,0.1063,0.1712,0.9997,0.6418,0.951
zero-shot,LC-QuAD,?A1>A3,llama3.1:70b,0.0,0.8198,0.9901,1.0,1.0,0.9934,0.9981,1.0,0.9991,1.0,1.0,1.0,1.0,0.9994,0.9998,1.0,1.0,1.0
zero-shot,LC-QuAD,?A1>A3,gemini-2.0-flash,0.0,0.0631,0.5497,0.9864,0.0147,1.0,0.7243,0.9984,0.8092,0.9756,0.9648,0.9908,1.0,0.7558,0.8856,1.0,0.9957,0.9999
zero-shot,LC-QuAD,?A1>A3,gpt-4.1-mini-2025-04-14,0.0,0.0244,0.3991,0.97,0.0047,0.383,1.0,0.9979,0.678,0.9519,0.9155,0.9818,1.0,0.6146,0.8042,1.0,0.9912,0.9998
zero-shot,LC-QuAD,?A1>A3,gpt-4o,0.0,0.0,0.0011,0.1358,0.0,0.0033,0.0046,1.0,0.0133,0.0843,0.0619,0.2111,0.9901,0.0085,0.0302,0.9951,0.2319,0.775
zero-shot,LC-QuAD,?A1>A3,gpt-4.1-2025-04-14,0.0,0.0147,0.32,0.9429,0.0023,0.28,0.4388,0.993,1.0,0.8942,0.8688,0.9593,1.0,0.5,0.7159,1.0,0.978,0.9988
zero-shot,LC-QuAD,?A1>A3,grok-3-mini,0.0,0.0006,0.0587,0.6848,0.0001,0.046,0.0871,0.9483,0.1659,1.0,0.5,0.7693,0.9998,0.1403,0.2712,1.0,0.8042,0.9864
zero-shot,LC-QuAD,?A1>A3,deepseek-chat,0.0,0.0007,0.0809,0.775,0.0001,0.0632,0.1358,0.9639,0.2005,0.6026,1.0,0.7865,0.9998,0.1456,0.3417,1.0,0.8358,0.9897
zero-shot,LC-QuAD,?A1>A3,gemini-2.5-flash,0.0,0.0002,0.0232,0.545,0.0,0.0182,0.0352,0.8491,0.0704,0.3294,0.2983,1.0,0.9978,0.0395,0.1055,0.9995,0.6556,0.9497
zero-shot,LC-QuAD,?A1>A3,gpt-5-nano,0.0,0.0,0.0,0.0008,0.0,0.0,0.0,0.0201,0.0,0.0005,0.0004,0.0044,1.0,0.0,0.0,0.7243,0.003,0.0885
zero-shot,LC-QuAD,?A1>A3,deepseek-reasoner,0.0,0.0244,0.3494,0.9297,0.0018,0.3389,0.5,0.9957,0.6061,0.9115,0.9129,0.98,1.0,1.0,0.7852,1.0,0.9853,0.9998
zero-shot,LC-QuAD,?A1>A3,gemini-2.5-pro,0.0,0.0063,0.1871,0.8825,0.0005,0.1748,0.2841,0.9828,0.3877,0.8198,0.7517,0.9414,1.0,0.3179,1.0,1.0,0.9605,0.9989
zero-shot,LC-QuAD,?A1>A3,gpt-5-mini,0.0,0.0,0.0,0.0008,0.0,0.0,0.0,0.0102,0.0,0.0001,0.0001,0.0012,0.383,0.0,0.0,1.0,0.0023,0.0435
zero-shot,LC-QuAD,?A1>A3,o3,0.0,0.0,0.0207,0.4518,0.0,0.0092,0.0182,0.8358,0.0407,0.2841,0.2319,0.4469,0.9987,0.0297,0.0719,0.9989,1.0,0.9332
zero-shot,LC-QuAD,?A1>A3,gpt-5,0.0,0.0,0.0002,0.0809,0.0,0.0003,0.0005,0.3073,0.0026,0.0249,0.0192,0.0799,0.9476,0.0006,0.0027,0.976,0.1102,1.0
zero-shot,LC-QuAD,?A1>A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,LC-QuAD,?A1>A4,gpt-oss:20b,0.0,1.0,0.4439,0.9987,0.02,0.5,0.2148,0.9998,0.2442,0.9238,0.7558,0.5,1.0,0.3899,0.8898,1.0,0.5,1.0
zero-shot,LC-QuAD,?A1>A4,gpt-4.1-nano-2025-04-14,0.0,0.6641,1.0,0.9985,0.0427,0.6061,0.3258,0.9999,0.3389,0.9508,0.8341,0.6101,1.0,0.5,0.9332,1.0,0.653,1.0
zero-shot,LC-QuAD,?A1>A4,mistral-small:24b,0.0,0.0032,0.0032,1.0,0.0,0.0056,0.0013,0.8252,0.0006,0.1208,0.022,0.0085,0.9999,0.0043,0.0876,1.0,0.0036,0.9887
zero-shot,LC-QuAD,?A1>A4,llama3.1:70b,0.0,0.9907,0.9778,1.0,1.0,0.9931,0.9599,1.0,0.9599,0.9999,0.9996,0.9902,1.0,0.973,0.9998,1.0,0.98,1.0
zero-shot,LC-QuAD,?A1>A4,gemini-2.0-flash,0.0,0.6043,0.5,0.9974,0.0168,1.0,0.2664,0.9997,0.2498,0.9296,0.7995,0.5561,1.0,0.4415,0.9369,1.0,0.5,1.0
zero-shot,LC-QuAD,?A1>A4,gpt-4.1-mini-2025-04-14,0.0,0.8659,0.7743,0.9995,0.0814,0.8256,1.0,1.0,0.5598,0.9953,0.9368,0.8092,1.0,0.72,0.9871,1.0,0.7796,1.0
zero-shot,LC-QuAD,?A1>A4,gpt-4o,0.0,0.0006,0.0002,0.2522,0.0,0.0007,0.0001,1.0,0.0,0.0284,0.0019,0.0009,0.9985,0.0005,0.0237,1.0,0.0006,0.9323
zero-shot,LC-QuAD,?A1>A4,gpt-4.1-2025-04-14,0.0,0.8341,0.7558,0.9998,0.0814,0.8447,0.5598,1.0,1.0,0.9894,0.9519,0.8316,1.0,0.7288,0.9943,1.0,0.7743,1.0
zero-shot,LC-QuAD,?A1>A4,grok-3-mini,0.0,0.1264,0.0845,0.9201,0.0003,0.1144,0.0119,0.984,0.0222,1.0,0.2916,0.0871,1.0,0.0557,0.5,1.0,0.0762,0.9998
zero-shot,LC-QuAD,?A1>A4,deepseek-chat,0.0,0.3389,0.2442,0.9889,0.0013,0.2879,0.1058,0.9993,0.0871,0.7949,1.0,0.28,1.0,0.1908,0.7478,1.0,0.2442,1.0
zero-shot,LC-QuAD,?A1>A4,gemini-2.5-flash,0.0,0.6081,0.5,0.9957,0.0218,0.5561,0.28,0.9996,0.2612,0.9519,0.8092,1.0,1.0,0.43,0.9552,1.0,0.5,1.0
zero-shot,LC-QuAD,?A1>A4,gpt-5-nano,0.0,0.0,0.0,0.0002,0.0,0.0,0.0,0.0034,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.9283,0.0,0.0963
zero-shot,LC-QuAD,?A1>A4,deepseek-reasoner,0.0,0.7121,0.6061,0.998,0.0519,0.6706,0.3854,0.9998,0.3804,0.9703,0.8785,0.7017,1.0,1.0,0.9702,1.0,0.6101,1.0
zero-shot,LC-QuAD,?A1>A4,gemini-2.5-pro,0.0,0.1704,0.1102,0.9456,0.0005,0.111,0.027,0.9867,0.0144,0.6123,0.3444,0.0877,1.0,0.0586,1.0,1.0,0.092,0.9999
zero-shot,LC-QuAD,?A1>A4,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0001,0.0,0.0,0.0,0.0,0.1279,0.0,0.0,1.0,0.0,0.0082
zero-shot,LC-QuAD,?A1>A4,o3,0.0,0.617,0.4478,0.9984,0.0395,0.6146,0.322,0.9997,0.3258,0.9573,0.8341,0.617,1.0,0.5,0.9481,1.0,1.0,1.0
zero-shot,LC-QuAD,?A1>A4,gpt-5,0.0,0.0,0.0,0.0215,0.0,0.0,0.0,0.1034,0.0,0.0005,0.0,0.0,0.9413,0.0,0.0003,0.9962,0.0,1.0
zero-shot,LC-QuAD,?A3∅A4,llama3.1:8b,1.0,0.061,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0748,0.0,0.0748,0.0,0.0925,0.068,0.0,0.5775,0.0017
zero-shot,LC-QuAD,?A3∅A4,gpt-oss:20b,0.9739,1.0,0.0003,0.0,0.0057,0.0,0.0001,0.0,0.0,0.5775,0.0,0.5747,0.0,0.6286,0.5747,0.0,0.9564,0.0551
zero-shot,LC-QuAD,?A3∅A4,gpt-4.1-nano-2025-04-14,1.0,0.9999,1.0,0.0019,0.8775,0.3073,0.554,0.0063,0.5513,1.0,0.0011,0.9999,0.0432,0.9999,0.9998,0.0192,1.0,0.9923
zero-shot,LC-QuAD,?A3∅A4,mistral-small:24b,1.0,1.0,0.9992,1.0,1.0,0.9924,0.9992,0.6506,0.9981,1.0,0.405,1.0,0.8556,1.0,1.0,0.7528,1.0,1.0
zero-shot,LC-QuAD,?A3∅A4,llama3.1:70b,1.0,0.9977,0.1831,0.0001,1.0,0.0619,0.1562,0.0002,0.1562,0.9982,0.0,0.9986,0.0053,0.9995,0.9982,0.0015,1.0,0.8856
zero-shot,LC-QuAD,?A3∅A4,gemini-2.0-flash,1.0,1.0,0.775,0.015,0.9639,1.0,0.7787,0.0361,0.8144,1.0,0.0027,1.0,0.1444,1.0,1.0,0.074,1.0,0.9978
zero-shot,LC-QuAD,?A3∅A4,gpt-4.1-mini-2025-04-14,1.0,1.0,0.554,0.0018,0.9033,0.3045,1.0,0.0085,0.5531,1.0,0.0002,1.0,0.0573,1.0,0.9999,0.0237,1.0,0.9901
zero-shot,LC-QuAD,?A3∅A4,gpt-4o,1.0,1.0,0.997,0.4487,0.9999,0.9793,0.9957,1.0,0.9978,1.0,0.2833,1.0,0.8389,1.0,1.0,0.705,1.0,1.0
zero-shot,LC-QuAD,?A3∅A4,gpt-4.1-2025-04-14,1.0,1.0,0.5513,0.004,0.9033,0.2757,0.5531,0.0049,1.0,1.0,0.0004,1.0,0.0385,1.0,1.0,0.0164,1.0,0.9889
zero-shot,LC-QuAD,?A3∅A4,grok-3-mini,0.9646,0.5775,0.0001,0.0,0.0048,0.0,0.0002,0.0,0.0,1.0,0.0,0.5722,0.0,0.6399,0.5747,0.0,0.9599,0.0607
zero-shot,LC-QuAD,?A3∅A4,deepseek-chat,1.0,1.0,0.9995,0.6848,1.0,0.9989,0.9999,0.7889,0.9998,1.0,1.0,1.0,0.9247,1.0,1.0,0.8792,1.0,1.0
zero-shot,LC-QuAD,?A3∅A4,gemini-2.5-flash,0.9646,0.5747,0.0003,0.0,0.004,0.0,0.0002,0.0,0.0001,0.5722,0.0,1.0,0.0,0.6682,0.5982,0.0,0.9738,0.0551
zero-shot,LC-QuAD,?A3∅A4,gpt-5-nano,1.0,1.0,0.9751,0.2048,0.9974,0.9027,0.9644,0.2399,0.9785,1.0,0.1121,1.0,1.0,1.0,1.0,0.3714,1.0,1.0
zero-shot,LC-QuAD,?A3∅A4,deepseek-reasoner,0.9564,0.5,0.0004,0.0,0.0017,0.0,0.0001,0.0,0.0001,0.5,0.0,0.5,0.0,1.0,0.5,0.0,0.9506,0.0541
zero-shot,LC-QuAD,?A3∅A4,gemini-2.5-pro,0.9693,0.5747,0.0005,0.0,0.0048,0.0,0.0002,0.0,0.0,0.5747,0.0,0.5982,0.0,0.6494,1.0,0.0,0.968,0.0662
zero-shot,LC-QuAD,?A3∅A4,gpt-5-mini,1.0,1.0,0.9897,0.3244,0.9994,0.9544,0.9867,0.3939,0.9916,1.0,0.1746,1.0,0.7443,1.0,1.0,1.0,1.0,1.0
zero-shot,LC-QuAD,?A3∅A4,o3,0.5775,0.0925,0.0,0.0,0.0001,0.0,0.0,0.0,0.0,0.0814,0.0,0.0669,0.0,0.1002,0.0758,0.0,1.0,0.0007
zero-shot,LC-QuAD,?A3∅A4,gpt-5,0.9995,0.9749,0.0164,0.0,0.1748,0.0049,0.0201,0.0,0.022,0.9712,0.0,0.9749,0.0,0.9734,0.9674,0.0,0.9998,1.0
zero-shot,LC-QuAD,?A4=A1|3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,LC-QuAD,?A4=A1|3,gpt-oss:20b,0.001,1.0,0.212,0.001,0.002,0.1133,0.073,0.0193,0.4018,0.9996,0.073,0.9996,0.8949,0.7256,0.9947,0.9102,0.8811,0.9682
zero-shot,LC-QuAD,?A4=A1|3,gpt-4.1-nano-2025-04-14,0.0156,0.9102,1.0,0.0156,0.0625,0.5,0.377,0.1445,0.8062,0.9999,0.3633,0.9998,0.9793,0.9408,0.9992,0.9793,0.9793,0.9947
zero-shot,LC-QuAD,?A4=A1|3,mistral-small:24b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,LC-QuAD,?A4=A1|3,llama3.1:70b,0.5,1.0,0.9922,0.5,1.0,0.9844,1.0,0.875,0.998,1.0,0.9688,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,LC-QuAD,?A4=A1|3,gemini-2.0-flash,0.0312,0.9673,0.7461,0.0312,0.1094,1.0,0.5,0.2266,0.9375,1.0,0.5,1.0,0.9983,0.9807,1.0,0.9983,0.9983,0.9993
zero-shot,LC-QuAD,?A4=A1|3,gpt-4.1-mini-2025-04-14,0.0625,0.9807,0.8281,0.0625,0.125,0.7461,1.0,0.3438,0.927,1.0,0.6367,1.0,0.9979,0.9888,1.0,0.9979,0.9962,0.9996
zero-shot,LC-QuAD,?A4=A1|3,gpt-4o,0.25,0.9968,0.9648,0.25,0.5,0.9375,0.8906,1.0,0.9893,1.0,0.9375,1.0,0.9997,0.9983,1.0,0.9999,0.9997,1.0
zero-shot,LC-QuAD,?A4=A1|3,gpt-4.1-2025-04-14,0.0039,0.7728,0.3872,0.0039,0.0195,0.2266,0.1938,0.0547,1.0,0.9999,0.1719,0.9995,0.9616,0.8491,0.9987,0.9519,0.9713,0.9827
zero-shot,LC-QuAD,?A4=A1|3,grok-3-mini,0.0,0.0022,0.0005,0.0,0.0,0.0,0.0001,0.0,0.0004,1.0,0.0,0.5,0.0207,0.0073,0.345,0.0207,0.0207,0.1148
zero-shot,LC-QuAD,?A4=A1|3,deepseek-chat,0.0625,0.9807,0.8555,0.0625,0.1875,0.7734,0.6367,0.3125,0.9453,1.0,1.0,1.0,0.9979,0.9888,0.9999,0.9962,0.9979,0.9993
zero-shot,LC-QuAD,?A4=A1|3,gemini-2.5-flash,0.0,0.0022,0.0008,0.0,0.0,0.0001,0.0001,0.0,0.002,0.655,0.0001,1.0,0.0392,0.0038,0.4159,0.0539,0.061,0.1537
zero-shot,LC-QuAD,?A4=A1|3,gpt-5-nano,0.0001,0.2272,0.0577,0.0001,0.0001,0.0112,0.0106,0.0021,0.1051,0.9941,0.0106,0.9867,1.0,0.3036,0.9888,0.6128,0.623,0.8491
zero-shot,LC-QuAD,?A4=A1|3,deepseek-reasoner,0.0005,0.5,0.1509,0.0005,0.001,0.073,0.0461,0.0112,0.3036,0.998,0.0461,0.9993,0.8491,1.0,0.9915,0.8338,0.8338,0.9331
zero-shot,LC-QuAD,?A4=A1|3,gemini-2.5-pro,0.0,0.0173,0.003,0.0,0.0,0.0002,0.0002,0.0,0.0053,0.7878,0.0005,0.7383,0.0461,0.0262,1.0,0.0835,0.0461,0.2403
zero-shot,LC-QuAD,?A4=A1|3,gpt-5-mini,0.0001,0.212,0.0577,0.0001,0.0001,0.0112,0.0106,0.0009,0.1189,0.9941,0.0154,0.9784,0.6128,0.3145,0.9682,1.0,0.5982,0.8204
zero-shot,LC-QuAD,?A4=A1|3,o3,0.0001,0.2403,0.0577,0.0001,0.0005,0.0112,0.0154,0.0021,0.0898,0.9941,0.0106,0.9739,0.623,0.3145,0.9888,0.5982,1.0,0.7728
zero-shot,LC-QuAD,?A4=A1|3,gpt-5,0.0,0.0835,0.0173,0.0,0.0001,0.0038,0.0022,0.0004,0.0466,0.9461,0.0036,0.9242,0.3036,0.1431,0.8811,0.3238,0.4018,1.0
zero-shot,overall,?A1=A2,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,overall,?A1=A2,gpt-oss:20b,0.0,1.0,1.0,1.0,0.1986,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0036,1.0,1.0,1.0,1.0
zero-shot,overall,?A1=A2,gpt-4.1-nano-2025-04-14,0.0,0.0,1.0,1.0,0.0,0.9999,1.0,1.0,1.0,1.0,0.9999,1.0,1.0,0.0,0.9936,1.0,1.0,1.0
zero-shot,overall,?A1=A2,mistral-small:24b,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.8417,0.0011,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0
zero-shot,overall,?A1=A2,llama3.1:70b,0.0,0.8213,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0367,1.0,1.0,1.0,1.0
zero-shot,overall,?A1=A2,gemini-2.0-flash,0.0,0.0,0.0001,1.0,0.0,1.0,0.7896,1.0,1.0,0.9055,0.4605,0.7524,1.0,0.0,0.1038,1.0,0.8913,1.0
zero-shot,overall,?A1=A2,gpt-4.1-mini-2025-04-14,0.0,0.0,0.0,1.0,0.0,0.2294,1.0,1.0,1.0,0.7084,0.1854,0.4614,1.0,0.0,0.0204,1.0,0.7262,1.0
zero-shot,overall,?A1=A2,gpt-4o,0.0,0.0,0.0,0.1734,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0
zero-shot,overall,?A1=A2,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.9991,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0
zero-shot,overall,?A1=A2,grok-3-mini,0.0,0.0,0.0,1.0,0.0,0.1057,0.3142,1.0,1.0,1.0,0.0801,0.2726,1.0,0.0,0.0057,1.0,0.5254,1.0
zero-shot,overall,?A1=A2,deepseek-chat,0.0,0.0,0.0001,1.0,0.0,0.5657,0.8318,1.0,1.0,0.9291,1.0,0.7918,1.0,0.0,0.1248,1.0,0.9334,1.0
zero-shot,overall,?A1=A2,gemini-2.5-flash,0.0,0.0,0.0,1.0,0.0,0.2686,0.5642,1.0,1.0,0.7482,0.2273,1.0,1.0,0.0,0.026,1.0,0.7692,1.0
zero-shot,overall,?A1=A2,gpt-5-nano,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.9328
zero-shot,overall,?A1=A2,deepseek-reasoner,0.0,0.9971,1.0,1.0,0.969,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,overall,?A1=A2,gemini-2.5-pro,0.0,0.0,0.0076,1.0,0.0,0.9074,0.9826,1.0,1.0,0.9953,0.8882,0.9778,1.0,0.0,1.0,1.0,0.9961,1.0
zero-shot,overall,?A1=A2,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0006
zero-shot,overall,?A1=A2,o3,0.0,0.0,0.0,1.0,0.0,0.121,0.2953,1.0,1.0,0.5,0.0753,0.2507,1.0,0.0,0.0047,1.0,1.0,1.0
zero-shot,overall,?A1=A2,gpt-5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0756,0.0,0.0,0.9995,0.0,1.0
zero-shot,overall,?A1=A3+A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,overall,?A1=A3+A4,gpt-oss:20b,0.0,1.0,0.0002,1.0,0.0,0.0006,0.0935,1.0,0.8787,1.0,0.8806,1.0,1.0,0.0291,1.0,1.0,1.0,1.0
zero-shot,overall,?A1=A3+A4,gpt-4.1-nano-2025-04-14,0.0,0.9999,1.0,1.0,0.0,0.6572,0.9905,1.0,1.0,1.0,1.0,1.0,1.0,0.9656,1.0,1.0,1.0,1.0
zero-shot,overall,?A1=A3+A4,mistral-small:24b,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.9121,0.0,0.9587,0.0,0.7174,1.0,0.0,0.0813,1.0,0.1404,1.0
zero-shot,overall,?A1=A3+A4,llama3.1:70b,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,overall,?A1=A3+A4,gemini-2.0-flash,0.0,0.9996,0.3764,1.0,0.0,1.0,0.978,1.0,1.0,1.0,1.0,1.0,1.0,0.9296,1.0,1.0,1.0,1.0
zero-shot,overall,?A1=A3+A4,gpt-4.1-mini-2025-04-14,0.0,0.92,0.0121,1.0,0.0,0.0272,1.0,1.0,0.9956,1.0,0.9953,1.0,1.0,0.3158,1.0,1.0,1.0,1.0
zero-shot,overall,?A1=A3+A4,gpt-4o,0.0,0.0,0.0,0.0995,0.0,0.0,0.0,1.0,0.0,0.7139,0.0,0.2626,1.0,0.0,0.0049,1.0,0.0132,1.0
zero-shot,overall,?A1=A3+A4,gpt-4.1-2025-04-14,0.0,0.1383,0.0,1.0,0.0,0.0,0.0056,1.0,1.0,1.0,0.5164,1.0,1.0,0.0016,1.0,1.0,1.0,1.0
zero-shot,overall,?A1=A3+A4,grok-3-mini,0.0,0.0,0.0,0.0477,0.0,0.0,0.0,0.3091,0.0,1.0,0.0,0.1068,1.0,0.0,0.0007,1.0,0.003,1.0
zero-shot,overall,?A1=A3+A4,deepseek-chat,0.0,0.1364,0.0,1.0,0.0,0.0,0.006,1.0,0.5164,1.0,1.0,1.0,1.0,0.0014,1.0,1.0,1.0,1.0
zero-shot,overall,?A1=A3+A4,gemini-2.5-flash,0.0,0.0,0.0,0.306,0.0,0.0,0.0,0.7588,0.0,0.9057,0.0,1.0,1.0,0.0,0.0233,1.0,0.0625,1.0
zero-shot,overall,?A1=A3+A4,gpt-5-nano,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.9989,0.0,0.3702
zero-shot,overall,?A1=A3+A4,deepseek-reasoner,0.0,0.9763,0.0417,1.0,0.0,0.0832,0.7146,1.0,0.9988,1.0,0.999,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,overall,?A1=A3+A4,gemini-2.5-pro,0.0,0.0,0.0,0.9287,0.0,0.0,0.0,0.996,0.0,0.9994,0.0,0.9804,1.0,0.0,1.0,1.0,0.6965,1.0
zero-shot,overall,?A1=A3+A4,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0013,0.0,0.0,1.0,0.0,0.0003
zero-shot,overall,?A1=A3+A4,o3,0.0,0.0,0.0,0.8746,0.0,0.0,0.0,0.9889,0.0,0.9976,0.0,0.9458,1.0,0.0,0.3297,1.0,1.0,1.0
zero-shot,overall,?A1=A3+A4,gpt-5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6523,0.0,0.0,0.9997,0.0,1.0
zero-shot,overall,?A1>A3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,overall,?A1>A3,gpt-oss:20b,0.0,1.0,1.0,1.0,0.0,0.9956,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9481,1.0,1.0,1.0,1.0
zero-shot,overall,?A1>A3,gpt-4.1-nano-2025-04-14,0.0,0.0,1.0,0.9993,0.0,0.0014,0.9677,1.0,0.9985,1.0,0.4641,0.9971,1.0,0.0,0.0814,1.0,0.9997,1.0
zero-shot,overall,?A1>A3,mistral-small:24b,0.0,0.0,0.0009,1.0,0.0,0.0,0.0699,0.9902,0.3736,0.9261,0.0004,0.2873,1.0,0.0,0.0,1.0,0.5236,1.0
zero-shot,overall,?A1>A3,llama3.1:70b,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,overall,?A1>A3,gemini-2.0-flash,0.0,0.0052,0.9988,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.9985,1.0,1.0,0.171,0.9439,1.0,1.0,1.0
zero-shot,overall,?A1>A3,gpt-4.1-mini-2025-04-14,0.0,0.0,0.0369,0.9377,0.0,0.0,1.0,0.9999,0.8961,0.999,0.0294,0.8426,1.0,0.0,0.0007,1.0,0.9644,1.0
zero-shot,overall,?A1>A3,gpt-4o,0.0,0.0,0.0,0.0115,0.0,0.0,0.0001,1.0,0.0067,0.2484,0.0,0.004,1.0,0.0,0.0,1.0,0.023,1.0
zero-shot,overall,?A1>A3,gpt-4.1-2025-04-14,0.0,0.0,0.0018,0.6484,0.0,0.0,0.1155,0.9943,1.0,0.9679,0.001,0.4159,1.0,0.0,0.0,1.0,0.7357,1.0
zero-shot,overall,?A1>A3,grok-3-mini,0.0,0.0,0.0,0.0825,0.0,0.0,0.0012,0.7699,0.0367,1.0,0.0,0.0196,1.0,0.0,0.0,1.0,0.113,1.0
zero-shot,overall,?A1>A3,deepseek-chat,0.0,0.0,0.5597,0.9996,0.0,0.0018,0.9744,1.0,0.9992,1.0,1.0,0.9984,1.0,0.0001,0.0974,1.0,0.9999,1.0
zero-shot,overall,?A1>A3,gemini-2.5-flash,0.0,0.0,0.0035,0.7325,0.0,0.0,0.1725,0.9966,0.6076,0.9832,0.0019,1.0,1.0,0.0,0.0,1.0,0.811,1.0
zero-shot,overall,?A1>A3,gpt-5-nano,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.4628,0.0,0.5608
zero-shot,overall,?A1>A3,deepseek-reasoner,0.0,0.0588,1.0,1.0,0.0,0.8442,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9952,1.0,1.0,1.0
zero-shot,overall,?A1>A3,gemini-2.5-pro,0.0,0.0,0.9272,1.0,0.0,0.0632,0.9994,1.0,1.0,1.0,0.9126,1.0,1.0,0.0057,1.0,1.0,1.0,1.0
zero-shot,overall,?A1>A3,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5618,0.0,0.0,1.0,0.0,0.6095
zero-shot,overall,?A1>A3,o3,0.0,0.0,0.0004,0.5,0.0,0.0,0.0406,0.9801,0.2843,0.8984,0.0002,0.2059,1.0,0.0,0.0,1.0,1.0,1.0
zero-shot,overall,?A1>A3,gpt-5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4634,0.0,0.0,0.4144,0.0,1.0
zero-shot,overall,?A1>A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,overall,?A1>A4,gpt-oss:20b,0.0,1.0,0.9998,1.0,0.0,0.9851,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0371,1.0,1.0,1.0,1.0
zero-shot,overall,?A1>A4,gpt-4.1-nano-2025-04-14,0.0,0.0002,1.0,1.0,0.0,0.0906,0.8171,1.0,0.8714,1.0,0.9868,0.9994,1.0,0.0,0.8935,1.0,0.8739,1.0
zero-shot,overall,?A1>A4,mistral-small:24b,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.8904,0.0,0.1774,0.0001,0.0046,1.0,0.0,0.0,1.0,0.0,1.0
zero-shot,overall,?A1>A4,llama3.1:70b,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,overall,?A1>A4,gemini-2.0-flash,0.0,0.0175,0.9192,1.0,0.0,1.0,0.9891,1.0,0.9947,1.0,0.9999,1.0,1.0,0.0,0.9961,1.0,0.9955,1.0
zero-shot,overall,?A1>A4,gpt-4.1-mini-2025-04-14,0.0,0.0,0.1999,1.0,0.0,0.0129,1.0,1.0,0.6087,1.0,0.9152,0.9912,1.0,0.0,0.6583,1.0,0.5967,1.0
zero-shot,overall,?A1>A4,gpt-4o,0.0,0.0,0.0,0.1213,0.0,0.0,0.0,1.0,0.0,0.0135,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0
zero-shot,overall,?A1>A4,gpt-4.1-2025-04-14,0.0,0.0,0.1419,1.0,0.0,0.0064,0.415,1.0,1.0,0.9999,0.8719,0.9824,1.0,0.0,0.5613,1.0,0.5,1.0
zero-shot,overall,?A1>A4,grok-3-mini,0.0,0.0,0.0,0.8377,0.0,0.0,0.0,0.9884,0.0001,1.0,0.0047,0.0466,1.0,0.0,0.0001,1.0,0.0001,1.0
zero-shot,overall,?A1>A4,deepseek-chat,0.0,0.0,0.0154,0.9999,0.0,0.0002,0.0947,1.0,0.1414,0.996,1.0,0.8421,1.0,0.0,0.1719,1.0,0.1592,1.0
zero-shot,overall,?A1>A4,gemini-2.5-flash,0.0,0.0,0.0008,0.9962,0.0,0.0,0.0104,1.0,0.0204,0.959,0.173,1.0,1.0,0.0,0.0238,1.0,0.0211,1.0
zero-shot,overall,?A1>A4,gpt-5-nano,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.9999,0.0,0.9138
zero-shot,overall,?A1>A4,deepseek-reasoner,0.0,0.9678,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,overall,?A1>A4,gemini-2.5-pro,0.0,0.0,0.1181,1.0,0.0,0.0047,0.365,1.0,0.4632,0.9999,0.8432,0.9795,1.0,0.0,1.0,1.0,0.463,1.0
zero-shot,overall,?A1>A4,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0001,0.0,0.0,1.0,0.0,0.0087
zero-shot,overall,?A1>A4,o3,0.0,0.0,0.1394,1.0,0.0,0.0055,0.4271,1.0,0.5248,0.9999,0.855,0.9819,1.0,0.0,0.5615,1.0,1.0,1.0
zero-shot,overall,?A1>A4,gpt-5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0962,0.0,0.0,0.9927,0.0,1.0
zero-shot,overall,?A3∅A4,llama3.1:8b,1.0,0.1116,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9997,0.0,0.5806,0.0,0.0156,0.0045,0.0,0.9913,0.0
zero-shot,overall,?A3∅A4,gpt-oss:20b,0.9026,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.9308,0.0,0.1868,0.0849,0.0,0.9998,0.0
zero-shot,overall,?A3∅A4,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,0.0,1.0,0.6941,0.0,0.7346,0.8388,1.0,0.0,1.0,1.0,1.0,1.0,0.3709,1.0,1.0
zero-shot,overall,?A3∅A4,mistral-small:24b,1.0,1.0,1.0,1.0,1.0,1.0,0.9999,1.0,1.0,1.0,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,overall,?A3∅A4,llama3.1:70b,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0063,1.0,1.0,0.0,1.0,0.9754
zero-shot,overall,?A3∅A4,gemini-2.0-flash,1.0,1.0,0.3272,0.0,1.0,1.0,0.0,0.5592,0.7049,1.0,0.0,1.0,0.9999,1.0,1.0,0.2152,1.0,1.0
zero-shot,overall,?A3∅A4,gpt-4.1-mini-2025-04-14,1.0,1.0,1.0,0.0001,1.0,1.0,1.0,1.0,1.0,1.0,0.4172,1.0,1.0,1.0,1.0,0.9999,1.0,1.0
zero-shot,overall,?A3∅A4,gpt-4o,1.0,1.0,0.2853,0.0,1.0,0.4644,0.0,1.0,0.6538,1.0,0.0,1.0,0.9999,1.0,1.0,0.1714,1.0,1.0
zero-shot,overall,?A3∅A4,gpt-4.1-2025-04-14,1.0,1.0,0.1763,0.0,1.0,0.3173,0.0,0.3689,1.0,1.0,0.0,1.0,0.9994,1.0,1.0,0.098,1.0,1.0
zero-shot,overall,?A3∅A4,grok-3-mini,0.0005,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0006,0.0,0.0,0.0,0.0,0.1408,0.0
zero-shot,overall,?A3∅A4,deepseek-chat,1.0,1.0,1.0,0.0002,1.0,1.0,0.606,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9999,1.0,1.0
zero-shot,overall,?A3∅A4,gemini-2.5-flash,0.4514,0.0806,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9996,0.0,1.0,0.0,0.0083,0.0016,0.0,0.9874,0.0
zero-shot,overall,?A3∅A4,gpt-5-nano,1.0,1.0,0.0,0.0,0.9947,0.0002,0.0,0.0002,0.0008,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0
zero-shot,overall,?A3∅A4,deepseek-reasoner,0.9872,0.8333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.9933,0.0,1.0,0.3347,0.0,1.0,0.0
zero-shot,overall,?A3∅A4,gemini-2.5-pro,0.9964,0.9267,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.9988,0.0,0.693,1.0,0.0,1.0,0.0
zero-shot,overall,?A3∅A4,gpt-5-mini,1.0,1.0,0.6515,0.0,1.0,0.8015,0.0002,0.8437,0.912,1.0,0.0001,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,overall,?A3∅A4,o3,0.0109,0.0003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8783,0.0,0.0157,0.0,0.0,0.0,0.0,1.0,0.0
zero-shot,overall,?A3∅A4,gpt-5,1.0,1.0,0.0,0.0,0.0285,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0
zero-shot,overall,?A4=A1|3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,overall,?A4=A1|3,gpt-oss:20b,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5961,1.0,0.0004,1.0,1.0,0.8562,1.0,1.0,1.0,1.0
zero-shot,overall,?A4=A1|3,gpt-4.1-nano-2025-04-14,0.0,1.0,1.0,0.0,0.0146,0.9638,0.9949,0.9998,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,overall,?A4=A1|3,mistral-small:24b,0.0,1.0,1.0,1.0,0.9997,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,overall,?A4=A1|3,llama3.1:70b,0.0,1.0,0.9904,0.0006,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,overall,?A4=A1|3,gemini-2.0-flash,0.0,1.0,0.0492,0.0,0.0,1.0,0.8085,0.969,1.0,1.0,0.9998,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,overall,?A4=A1|3,gpt-4.1-mini-2025-04-14,0.0,1.0,0.0075,0.0,0.0,0.2302,1.0,0.8608,1.0,1.0,0.9973,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,overall,?A4=A1|3,gpt-4o,0.0,1.0,0.0003,0.0,0.0,0.0412,0.1695,1.0,1.0,1.0,0.958,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,overall,?A4=A1|3,gpt-4.1-2025-04-14,0.0,0.442,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0002,1.0,1.0,0.7938,1.0,1.0,1.0,1.0
zero-shot,overall,?A4=A1|3,grok-3-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.1605,0.0,0.0,0.0011,0.0,0.0004,0.1295
zero-shot,overall,?A4=A1|3,deepseek-chat,0.0,0.9997,0.0,0.0,0.0,0.0003,0.0039,0.0539,0.9999,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,overall,?A4=A1|3,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8568,0.0,1.0,0.0,0.0,0.0188,0.0,0.0085,0.4561
zero-shot,overall,?A4=A1|3,gpt-5-nano,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.9814,0.0013,0.9656,1.0
zero-shot,overall,?A4=A1|3,deepseek-reasoner,0.0,0.168,0.0,0.0,0.0,0.0,0.0,0.0,0.2347,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,overall,?A4=A1|3,gemini-2.5-pro,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9992,0.0,0.9844,0.0225,0.0,1.0,0.0,0.4075,0.9765
zero-shot,overall,?A4=A1|3,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.999,0.0001,1.0,1.0,1.0,1.0
zero-shot,overall,?A4=A1|3,o3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9997,0.0,0.993,0.0408,0.0,0.6225,0.0,1.0,0.9883
zero-shot,overall,?A4=A1|3,gpt-5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8853,0.0,0.5729,0.0,0.0,0.0279,0.0,0.0142,1.0
zero-shot,qawiki,?A1=A2,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,qawiki,?A1=A2,gpt-oss:20b,0.0,1.0,0.8544,1.0,0.4278,0.9002,0.9597,1.0,0.98,0.9972,0.6962,0.9605,1.0,0.0494,0.8544,1.0,0.9808,1.0
zero-shot,qawiki,?A1=A2,gpt-4.1-nano-2025-04-14,0.0,0.2257,1.0,1.0,0.0925,0.655,0.8569,1.0,0.9252,0.9832,0.2923,0.8519,1.0,0.0026,0.566,1.0,0.8837,1.0
zero-shot,qawiki,?A1=A2,mistral-small:24b,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.1958,0.0001,0.0013,0.0,0.0,0.9639,0.0,0.0,1.0,0.0002,1.0
zero-shot,qawiki,?A1=A2,llama3.1:70b,0.0,0.7077,0.9564,1.0,1.0,0.9784,0.9967,1.0,0.9992,0.9997,0.8365,0.9917,1.0,0.0577,0.9449,1.0,0.997,1.0
zero-shot,qawiki,?A1=A2,gemini-2.0-flash,0.0,0.1684,0.5,1.0,0.0539,1.0,0.7878,0.9999,0.8923,0.9734,0.221,0.8192,1.0,0.0012,0.5,1.0,0.8544,1.0
zero-shot,qawiki,?A1=A2,gpt-4.1-mini-2025-04-14,0.0,0.0769,0.2617,1.0,0.0113,0.345,1.0,0.9997,0.7709,0.9231,0.0758,0.6358,1.0,0.0002,0.3089,1.0,0.7443,1.0
zero-shot,qawiki,?A1=A2,gpt-4o,0.0,0.0,0.0001,0.8736,0.0,0.0002,0.0009,1.0,0.0038,0.0182,0.0,0.0009,0.9989,0.0,0.0001,1.0,0.001,1.0
zero-shot,qawiki,?A1=A2,gpt-4.1-2025-04-14,0.0,0.0395,0.1405,1.0,0.003,0.1885,0.3555,0.9984,1.0,0.8256,0.0216,0.4278,1.0,0.0,0.162,1.0,0.5643,1.0
zero-shot,qawiki,?A1=A2,grok-3-mini,0.0,0.0066,0.0365,0.9995,0.0011,0.0541,0.1341,0.9912,0.2664,1.0,0.0069,0.1481,1.0,0.0,0.0403,1.0,0.2712,1.0
zero-shot,qawiki,?A1=A2,deepseek-chat,0.0,0.4321,0.8192,1.0,0.2786,0.8761,0.968,1.0,0.9927,0.9975,1.0,0.9693,1.0,0.0021,0.8275,1.0,0.9825,1.0
zero-shot,qawiki,?A1=A2,gemini-2.5-flash,0.0,0.0719,0.2434,1.0,0.0205,0.2923,0.5,0.9997,0.7077,0.9186,0.068,1.0,1.0,0.0001,0.221,1.0,0.7017,1.0
zero-shot,qawiki,?A1=A2,gpt-5-nano,0.0,0.0,0.0,0.0619,0.0,0.0,0.0,0.0027,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.9999,0.0,0.9972
zero-shot,qawiki,?A1=A2,deepseek-reasoner,0.0,0.9786,0.9993,1.0,0.9793,0.9997,1.0,1.0,1.0,1.0,0.9997,1.0,1.0,1.0,0.9995,1.0,1.0,1.0
zero-shot,qawiki,?A1=A2,gemini-2.5-pro,0.0,0.2257,0.566,1.0,0.1077,0.6321,0.7975,1.0,0.9061,0.9808,0.2858,0.8761,1.0,0.0019,1.0,1.0,0.8785,1.0
zero-shot,qawiki,?A1=A2,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0003,0.0,0.0,1.0,0.0,0.1958
zero-shot,qawiki,?A1=A2,o3,0.0,0.0403,0.1856,0.9999,0.0083,0.2257,0.3714,0.9997,0.5643,0.8198,0.0401,0.43,1.0,0.0001,0.2025,1.0,1.0,1.0
zero-shot,qawiki,?A1=A2,gpt-5,0.0,0.0,0.0,0.0001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0066,0.0,0.0,0.8853,0.0,1.0
zero-shot,qawiki,?A1=A3+A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9999,1.0,1.0,1.0,1.0
zero-shot,qawiki,?A1=A3+A4,gpt-oss:20b,0.0,1.0,0.0307,0.9982,0.0002,0.1725,0.0843,0.9998,0.0216,0.9749,0.3555,0.9393,1.0,0.0073,0.3714,1.0,0.5722,1.0
zero-shot,qawiki,?A1=A3+A4,gpt-4.1-nano-2025-04-14,0.0001,0.9879,1.0,1.0,0.0898,0.8852,0.7878,1.0,0.5982,0.9999,0.9564,0.9998,1.0,0.4119,0.9622,1.0,0.9825,1.0
zero-shot,qawiki,?A1=A3+A4,mistral-small:24b,0.0,0.0048,0.0,1.0,0.0,0.0003,0.0001,0.8544,0.0,0.1611,0.0012,0.1312,0.9999,0.0,0.0045,1.0,0.0048,1.0
zero-shot,qawiki,?A1=A3+A4,llama3.1:70b,0.0078,1.0,0.9713,1.0,1.0,0.9978,0.9867,1.0,0.9713,1.0,0.9996,1.0,1.0,0.9102,0.9992,1.0,0.9998,1.0
zero-shot,qawiki,?A1=A3+A4,gemini-2.0-flash,0.0,0.9075,0.2122,0.9999,0.0096,1.0,0.4073,1.0,0.1662,0.9993,0.8338,0.9988,1.0,0.0592,0.779,1.0,0.9075,1.0
zero-shot,qawiki,?A1=A3+A4,gpt-4.1-mini-2025-04-14,0.0001,0.9622,0.345,1.0,0.0392,0.7597,1.0,1.0,0.3036,0.9997,0.9165,0.9997,1.0,0.1509,0.8761,1.0,0.9622,1.0
zero-shot,qawiki,?A1=A3+A4,gpt-4o,0.0,0.0005,0.0,0.2257,0.0,0.0,0.0,1.0,0.0,0.0524,0.0002,0.0314,0.9999,0.0,0.0003,1.0,0.0003,1.0
zero-shot,qawiki,?A1=A3+A4,gpt-4.1-2025-04-14,0.0005,0.9927,0.5982,1.0,0.0898,0.9283,0.8491,1.0,1.0,1.0,0.9738,0.9999,1.0,0.4018,0.968,1.0,0.9879,1.0
zero-shot,qawiki,?A1=A3+A4,grok-3-mini,0.0,0.0551,0.0003,0.8987,0.0,0.0026,0.0011,0.971,0.0,1.0,0.0235,0.4159,1.0,0.0,0.0096,1.0,0.0436,1.0
zero-shot,qawiki,?A1=A3+A4,deepseek-chat,0.0,0.7709,0.0925,0.9996,0.0022,0.3145,0.1796,1.0,0.0669,0.99,1.0,0.9765,1.0,0.0154,0.57,1.0,0.7634,1.0
zero-shot,qawiki,?A1=A3+A4,gemini-2.5-flash,0.0,0.1147,0.0007,0.9196,0.0,0.0047,0.0012,0.9834,0.0003,0.7383,0.0494,1.0,1.0,0.0,0.0216,1.0,0.068,1.0
zero-shot,qawiki,?A1=A3+A4,gpt-5-nano,0.0,0.0,0.0,0.0002,0.0,0.0,0.0,0.0003,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.9994,0.0,0.992
zero-shot,qawiki,?A1=A3+A4,deepseek-reasoner,0.0017,0.998,0.7483,1.0,0.212,0.9824,0.9408,1.0,0.7728,1.0,0.9962,1.0,1.0,1.0,0.9887,1.0,0.998,1.0
zero-shot,qawiki,?A1=A3+A4,gemini-2.5-pro,0.0,0.7443,0.0843,0.9981,0.0033,0.3506,0.221,0.9999,0.0758,0.997,0.57,0.9927,1.0,0.032,1.0,1.0,0.7214,1.0
zero-shot,qawiki,?A1=A3+A4,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002,0.0,0.0,1.0,0.0,0.3642
zero-shot,qawiki,?A1=A3+A4,o3,0.0,0.5722,0.0401,0.9982,0.0008,0.1725,0.0843,0.9999,0.0307,0.9822,0.3601,0.9693,1.0,0.0073,0.4225,1.0,1.0,1.0
zero-shot,qawiki,?A1=A3+A4,gpt-5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0178,0.0,0.0,0.7566,0.0,1.0
zero-shot,qawiki,?A1>A3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,qawiki,?A1>A3,gpt-oss:20b,0.0,1.0,0.7825,0.97,0.0,0.3015,0.9238,0.9751,0.6556,0.9778,0.7646,0.9196,1.0,0.0267,0.0845,1.0,0.8898,1.0
zero-shot,qawiki,?A1>A3,gpt-4.1-nano-2025-04-14,0.0,0.3015,1.0,0.9,0.0,0.0871,0.7693,0.9124,0.3776,0.908,0.5,0.7517,1.0,0.0039,0.0182,1.0,0.7121,1.0
zero-shot,qawiki,?A1>A3,mistral-small:24b,0.0,0.0517,0.1528,1.0,0.0,0.0082,0.3045,0.6101,0.0587,0.5,0.1264,0.3045,0.9995,0.0002,0.0035,1.0,0.2662,1.0
zero-shot,qawiki,?A1>A3,llama3.1:70b,0.0001,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9968,0.9995,1.0,1.0,1.0
zero-shot,qawiki,?A1>A3,gemini-2.0-flash,0.0,0.7825,0.9519,0.9962,0.0001,1.0,0.9902,0.9973,0.8837,0.9967,0.9281,0.9818,1.0,0.0967,0.2354,1.0,0.9663,1.0
zero-shot,qawiki,?A1>A3,gpt-4.1-mini-2025-04-14,0.0,0.1264,0.3294,0.7787,0.0,0.0218,1.0,0.8019,0.162,0.7693,0.2879,0.5573,1.0,0.0003,0.0033,1.0,0.5,1.0
zero-shot,qawiki,?A1>A3,gpt-4o,0.0,0.0432,0.1339,0.5,0.0,0.006,0.2723,1.0,0.0556,0.4487,0.0963,0.2693,0.9996,0.0001,0.0009,1.0,0.2094,1.0
zero-shot,qawiki,?A1>A3,gpt-4.1-2025-04-14,0.0,0.4469,0.7336,0.9663,0.0,0.1856,0.9061,0.9686,1.0,0.9605,0.6641,0.8785,1.0,0.003,0.033,1.0,0.8688,1.0
zero-shot,qawiki,?A1>A3,grok-3-mini,0.0,0.0427,0.151,0.5964,0.0,0.0077,0.3294,0.6506,0.0719,1.0,0.1163,0.322,0.9997,0.0,0.0002,1.0,0.2557,1.0
zero-shot,qawiki,?A1>A3,deepseek-chat,0.0,0.3327,0.6026,0.9191,0.0,0.1215,0.7995,0.9413,0.4439,0.9324,1.0,0.8042,1.0,0.004,0.0222,1.0,0.7558,1.0
zero-shot,qawiki,?A1>A3,gemini-2.5-flash,0.0,0.1312,0.3417,0.7787,0.0,0.0352,0.5573,0.8055,0.1908,0.7796,0.2841,1.0,0.9999,0.0004,0.0018,1.0,0.5,1.0
zero-shot,qawiki,?A1>A3,gpt-5-nano,0.0,0.0,0.0,0.0011,0.0,0.0,0.0001,0.0011,0.0,0.0007,0.0,0.0002,1.0,0.0,0.0,0.967,0.0,0.9648
zero-shot,qawiki,?A1>A3,deepseek-reasoner,0.0,0.9865,0.9984,0.9999,0.0083,0.9443,0.9999,1.0,0.9991,1.0,0.9985,0.9999,1.0,1.0,0.7912,1.0,0.9999,1.0
zero-shot,qawiki,?A1>A3,gemini-2.5-pro,0.0,0.9508,0.9912,0.9983,0.0014,0.8438,0.9987,0.9996,0.9842,0.9999,0.9894,0.9994,1.0,0.3136,1.0,1.0,0.9977,1.0
zero-shot,qawiki,?A1>A3,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0631,0.0,0.0,1.0,0.0,0.6196
zero-shot,qawiki,?A1>A3,o3,0.0,0.1704,0.3899,0.8091,0.0,0.0587,0.6101,0.8597,0.2005,0.838,0.3389,0.6196,1.0,0.0003,0.0057,1.0,1.0,1.0
zero-shot,qawiki,?A1>A3,gpt-5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0632,0.0,0.0,0.5,0.0,1.0
zero-shot,qawiki,?A1>A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,qawiki,?A1>A4,gpt-oss:20b,0.0,1.0,0.5489,0.9991,0.0019,0.7478,0.6101,0.9937,0.446,0.8296,0.7159,0.5573,1.0,0.0003,0.1264,1.0,0.2005,1.0
zero-shot,qawiki,?A1>A4,gpt-4.1-nano-2025-04-14,0.0,0.5489,1.0,0.9981,0.0019,0.7601,0.6146,0.9907,0.4402,0.8129,0.6955,0.5522,1.0,0.001,0.1358,1.0,0.2213,1.0
zero-shot,qawiki,?A1>A4,mistral-small:24b,0.0,0.0021,0.004,1.0,0.0,0.0082,0.0058,0.3045,0.0018,0.0166,0.0067,0.0019,0.9998,0.0,0.0001,1.0,0.0,1.0
zero-shot,qawiki,?A1>A4,llama3.1:70b,0.0,0.9993,0.9993,1.0,1.0,1.0,0.9997,1.0,0.9997,1.0,0.9998,0.9994,1.0,0.5643,0.9862,1.0,0.9887,1.0
zero-shot,qawiki,?A1>A4,gemini-2.0-flash,0.0,0.3444,0.3359,0.9962,0.0001,1.0,0.3746,0.9818,0.2148,0.6641,0.5,0.3294,1.0,0.0,0.0427,1.0,0.0595,1.0
zero-shot,qawiki,?A1>A4,gpt-4.1-mini-2025-04-14,0.0,0.5,0.5,0.9972,0.0009,0.7388,1.0,0.9907,0.3601,0.8042,0.6641,0.5,1.0,0.0001,0.0662,1.0,0.1403,1.0
zero-shot,qawiki,?A1>A4,gpt-4o,0.0,0.0124,0.0172,0.7787,0.0,0.0337,0.0178,1.0,0.0043,0.071,0.0279,0.0113,1.0,0.0,0.0005,1.0,0.0006,1.0
zero-shot,qawiki,?A1>A4,gpt-4.1-2025-04-14,0.0,0.6583,0.6742,0.9992,0.0011,0.8659,0.7634,0.9981,1.0,0.9129,0.8042,0.6821,1.0,0.0006,0.1481,1.0,0.2204,1.0
zero-shot,qawiki,?A1>A4,grok-3-mini,0.0,0.2483,0.2629,0.9918,0.0,0.4439,0.2841,0.9568,0.1456,1.0,0.4022,0.2148,1.0,0.0,0.0158,1.0,0.038,1.0
zero-shot,qawiki,?A1>A4,deepseek-chat,0.0,0.3877,0.3991,0.9969,0.0005,0.6146,0.4439,0.985,0.2841,0.6899,1.0,0.3899,1.0,0.0003,0.0595,1.0,0.1,1.0
zero-shot,qawiki,?A1>A4,gemini-2.5-flash,0.0,0.5573,0.5522,0.9992,0.0016,0.7693,0.6224,0.9944,0.4373,0.8659,0.7121,1.0,1.0,0.0002,0.0748,1.0,0.1341,1.0
zero-shot,qawiki,?A1>A4,gpt-5-nano,0.0,0.0,0.0,0.0005,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.9552,0.0,0.8785
zero-shot,qawiki,?A1>A4,deepseek-reasoner,0.0,0.9999,0.9996,1.0,0.5643,1.0,1.0,1.0,0.9998,1.0,0.9999,1.0,1.0,1.0,0.9917,1.0,0.9931,1.0
zero-shot,qawiki,?A1>A4,gemini-2.5-pro,0.0,0.9238,0.9155,1.0,0.0298,0.9778,0.9674,0.9998,0.9186,0.9931,0.9675,0.9646,1.0,0.0205,1.0,1.0,0.6196,1.0
zero-shot,qawiki,?A1>A4,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0877,0.0,0.0,1.0,0.0,0.434
zero-shot,qawiki,?A1>A4,o3,0.0,0.8688,0.8472,1.0,0.0244,0.9675,0.9115,0.9998,0.86,0.9799,0.9381,0.9231,1.0,0.0168,0.5,1.0,1.0,1.0
zero-shot,qawiki,?A1>A4,gpt-5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1908,0.0,0.0,0.6911,0.0,1.0
zero-shot,qawiki,?A3∅A4,llama3.1:8b,1.0,0.2612,0.0,0.0,0.0005,0.0,0.0,0.0,0.0,0.8115,0.0,0.2291,0.0,0.1002,0.033,0.0,0.0266,0.0
zero-shot,qawiki,?A3∅A4,gpt-oss:20b,0.8316,1.0,0.0,0.0,0.0083,0.0003,0.0,0.0,0.0001,0.9693,0.0,0.5612,0.0,0.3776,0.1147,0.0,0.1147,0.0
zero-shot,qawiki,?A3∅A4,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,0.0,0.9818,0.6399,0.0668,0.362,0.4525,1.0,0.4007,1.0,0.2443,1.0,0.9979,0.0207,0.9984,0.1778
zero-shot,qawiki,?A3∅A4,mistral-small:24b,1.0,1.0,1.0,1.0,1.0,1.0,0.9974,1.0,1.0,1.0,1.0,1.0,0.9993,1.0,1.0,0.9828,1.0,0.9991
zero-shot,qawiki,?A3∅A4,llama3.1:70b,0.9999,0.9968,0.0352,0.0,1.0,0.0668,0.0009,0.0124,0.024,1.0,0.0058,0.996,0.0049,0.98,0.9179,0.0002,0.9393,0.0043
zero-shot,qawiki,?A3∅A4,gemini-2.0-flash,1.0,0.9999,0.4525,0.0,0.962,1.0,0.0407,0.2595,0.3494,1.0,0.2916,1.0,0.181,0.9997,0.9973,0.0199,0.9974,0.1208
zero-shot,qawiki,?A3∅A4,gpt-4.1-mini-2025-04-14,1.0,1.0,0.962,0.0053,0.9996,0.978,1.0,0.8556,0.9481,1.0,0.9115,1.0,0.7474,1.0,1.0,0.2906,1.0,0.6735
zero-shot,qawiki,?A3∅A4,gpt-4o,1.0,1.0,0.722,0.0,0.994,0.8169,0.2048,1.0,0.646,1.0,0.5937,1.0,0.3854,1.0,0.9998,0.0352,0.9999,0.2664
zero-shot,qawiki,?A3∅A4,gpt-4.1-2025-04-14,1.0,1.0,0.6399,0.0001,0.9876,0.7405,0.092,0.4503,1.0,1.0,0.5,1.0,0.3222,1.0,0.9997,0.0423,0.9997,0.2414
zero-shot,qawiki,?A3∅A4,grok-3-mini,0.2983,0.068,0.0,0.0,0.0001,0.0,0.0,0.0,0.0,1.0,0.0,0.061,0.0,0.0288,0.0038,0.0,0.0019,0.0
zero-shot,qawiki,?A3∅A4,deepseek-chat,1.0,1.0,0.6927,0.0001,0.9978,0.7949,0.1403,0.5,0.6061,1.0,1.0,1.0,0.3638,1.0,0.9998,0.0642,0.9998,0.2858
zero-shot,qawiki,?A3∅A4,gemini-2.5-flash,0.8675,0.5612,0.0001,0.0,0.0098,0.0,0.0,0.0,0.0,0.9739,0.0,1.0,0.0,0.3555,0.1147,0.0,0.1405,0.0
zero-shot,qawiki,?A3∅A4,gpt-5-nano,1.0,1.0,0.8222,0.0016,0.9978,0.8728,0.3285,0.72,0.7557,1.0,0.7193,1.0,1.0,1.0,1.0,0.0261,1.0,0.434
zero-shot,qawiki,?A3∅A4,deepseek-reasoner,0.9506,0.7336,0.0001,0.0,0.0395,0.0008,0.0,0.0001,0.0001,0.9878,0.0,0.7709,0.0,1.0,0.2757,0.0,0.2612,0.0
zero-shot,qawiki,?A3∅A4,gemini-2.5-pro,0.9842,0.9393,0.0046,0.0,0.14,0.006,0.0,0.0004,0.0009,0.9987,0.0006,0.9393,0.0001,0.8144,1.0,0.0,0.6358,0.0
zero-shot,qawiki,?A3∅A4,gpt-5-mini,1.0,1.0,0.9888,0.0302,0.9999,0.9885,0.7801,0.9818,0.9748,1.0,0.959,1.0,0.9904,1.0,1.0,1.0,1.0,0.9693
zero-shot,qawiki,?A3∅A4,o3,0.9881,0.9393,0.0038,0.0,0.1147,0.0056,0.0,0.0003,0.0007,0.9995,0.0004,0.9252,0.0,0.8316,0.5,0.0,1.0,0.0
zero-shot,qawiki,?A3∅A4,gpt-5,1.0,1.0,0.876,0.0019,0.998,0.9201,0.4111,0.8256,0.8254,1.0,0.7859,1.0,0.6911,1.0,1.0,0.068,1.0,1.0
zero-shot,qawiki,?A4=A1|3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,qawiki,?A4=A1|3,gpt-oss:20b,0.0002,1.0,0.0112,0.0005,0.0032,0.073,0.073,0.2272,0.2744,0.9999,0.2905,0.9999,0.9408,0.5,0.9784,0.9616,0.9519,0.9999
zero-shot,qawiki,?A4=A1|3,gpt-4.1-nano-2025-04-14,0.125,0.9983,1.0,0.125,0.5,0.9102,0.9102,0.9805,0.9893,1.0,0.9807,1.0,0.9999,0.9935,1.0,1.0,0.9998,1.0
zero-shot,qawiki,?A4=A1|3,mistral-small:24b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,qawiki,?A4=A1|3,llama3.1:70b,0.25,0.9998,0.8125,0.25,1.0,1.0,0.9844,0.9961,1.0,1.0,0.998,1.0,1.0,0.9995,1.0,1.0,1.0,1.0
zero-shot,qawiki,?A4=A1|3,gemini-2.0-flash,0.0156,0.9807,0.2539,0.0312,0.0625,1.0,0.6367,0.8062,0.9102,1.0,0.9102,1.0,0.9988,0.9673,0.9999,0.9993,0.9997,1.0
zero-shot,qawiki,?A4=A1|3,gpt-4.1-mini-2025-04-14,0.0156,0.9807,0.2539,0.0312,0.1094,0.6367,1.0,0.8062,0.9375,1.0,0.9102,1.0,0.9999,0.9673,0.9999,0.9993,0.9997,1.0
zero-shot,qawiki,?A4=A1|3,gpt-4o,0.0039,0.8949,0.0898,0.0039,0.0352,0.3872,0.3872,1.0,0.7256,1.0,0.7095,1.0,0.9963,0.8491,0.9996,0.9941,0.9904,1.0
zero-shot,qawiki,?A4=A1|3,gpt-4.1-2025-04-14,0.002,0.8867,0.0547,0.0039,0.0078,0.2539,0.2266,0.5,1.0,1.0,0.6128,1.0,0.9968,0.8062,0.9967,0.9936,0.9904,1.0
zero-shot,qawiki,?A4=A1|3,grok-3-mini,0.0,0.0004,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0001,0.5,0.0096,0.0001,0.0946,0.0207,0.0262,0.5806
zero-shot,qawiki,?A4=A1|3,deepseek-chat,0.002,0.8666,0.073,0.0039,0.0195,0.2539,0.2539,0.5,0.6128,1.0,1.0,1.0,0.9894,0.8281,0.9978,0.9936,0.9936,1.0
zero-shot,qawiki,?A4=A1|3,gemini-2.5-flash,0.0,0.0007,0.0,0.0,0.0,0.0,0.0,0.0,0.0001,0.6964,0.0003,1.0,0.0106,0.0002,0.1316,0.0392,0.0262,0.6682
zero-shot,qawiki,?A4=A1|3,gpt-5-nano,0.0,0.1509,0.0007,0.0,0.0,0.0064,0.0017,0.0176,0.0193,0.9978,0.0384,0.9979,1.0,0.073,0.8463,0.7256,0.6855,0.9964
zero-shot,qawiki,?A4=A1|3,deepseek-reasoner,0.0005,0.7095,0.0287,0.001,0.0059,0.1133,0.1133,0.3036,0.3872,1.0,0.377,1.0,0.9807,1.0,0.9887,0.9941,0.9755,1.0
zero-shot,qawiki,?A4=A1|3,gemini-2.5-pro,0.0,0.0539,0.0,0.0,0.0,0.0007,0.0007,0.0022,0.0113,0.9608,0.0085,0.9423,0.2706,0.032,1.0,0.3318,0.2403,0.9824
zero-shot,qawiki,?A4=A1|3,gpt-5-mini,0.0,0.1051,0.0001,0.0,0.0001,0.0038,0.0038,0.0207,0.0245,0.9941,0.0245,0.9867,0.5,0.0327,0.8083,1.0,0.5982,0.9962
zero-shot,qawiki,?A4=A1|3,o3,0.0,0.1189,0.0013,0.0,0.0001,0.0021,0.0021,0.0318,0.0318,0.9915,0.0245,0.9915,0.5,0.0717,0.8811,0.5982,1.0,0.9979
zero-shot,qawiki,?A4=A1|3,gpt-5,0.0,0.0008,0.0,0.0,0.0,0.0,0.0,0.0,0.0001,0.5806,0.0,0.5,0.0133,0.0002,0.0592,0.0154,0.0106,1.0
zero-shot,spinach,?A1=A2,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,spinach,?A1=A2,gpt-oss:20b,0.0,1.0,0.9818,1.0,0.5,0.9998,1.0,1.0,1.0,0.9907,0.9818,0.9953,1.0,0.2025,0.9822,1.0,1.0,1.0
zero-shot,spinach,?A1=A2,gpt-4.1-nano-2025-04-14,0.0,0.0337,1.0,0.9981,0.0045,0.9519,0.9834,1.0,0.9989,0.5627,0.5,0.5598,1.0,0.0007,0.446,1.0,0.9597,1.0
zero-shot,spinach,?A1=A2,mistral-small:24b,0.0,0.0,0.0043,1.0,0.0,0.065,0.2135,0.9281,0.4503,0.0031,0.0006,0.0021,0.9959,0.0,0.0007,0.9999,0.1037,0.9975
zero-shot,spinach,?A1=A2,llama3.1:70b,0.0,0.6224,0.9985,1.0,1.0,1.0,1.0,1.0,1.0,0.9985,0.9988,0.9985,1.0,0.2366,0.9943,1.0,1.0,1.0
zero-shot,spinach,?A1=A2,gemini-2.0-flash,0.0,0.0005,0.0871,0.9615,0.0,1.0,0.8198,0.9991,0.9734,0.0821,0.0494,0.0717,1.0,0.0,0.0326,1.0,0.6911,1.0
zero-shot,spinach,?A1=A2,gpt-4.1-mini-2025-04-14,0.0,0.0,0.0314,0.8554,0.0,0.2712,1.0,0.9853,0.86,0.0138,0.0057,0.0178,0.9998,0.0,0.012,1.0,0.3294,0.9999
zero-shot,spinach,?A1=A2,gpt-4o,0.0,0.0,0.0,0.1215,0.0,0.0023,0.0266,1.0,0.1037,0.0,0.0,0.0001,0.9648,0.0,0.0,0.9974,0.0045,0.9799
zero-shot,spinach,?A1=A2,gpt-4.1-2025-04-14,0.0,0.0,0.0027,0.646,0.0,0.0541,0.2204,0.935,1.0,0.0023,0.0003,0.001,0.9964,0.0,0.0003,0.9999,0.0939,0.9986
zero-shot,spinach,?A1=A2,grok-3-mini,0.0,0.02,0.5627,0.9987,0.0045,0.9558,0.9942,1.0,0.9991,1.0,0.5,0.566,1.0,0.0004,0.4427,1.0,0.9734,1.0
zero-shot,spinach,?A1=A2,deepseek-chat,0.0,0.0352,0.6196,0.9998,0.0041,0.9765,0.998,1.0,0.9999,0.6254,1.0,0.6358,1.0,0.0008,0.5,1.0,0.9917,1.0
zero-shot,spinach,?A1=A2,gemini-2.5-flash,0.0,0.0119,0.5598,0.9991,0.0045,0.9635,0.992,1.0,0.9997,0.566,0.5,1.0,1.0,0.0005,0.4321,1.0,0.9947,1.0
zero-shot,spinach,?A1=A2,gpt-5-nano,0.0,0.0,0.0,0.0084,0.0,0.0001,0.0005,0.0632,0.007,0.0,0.0,0.0,1.0,0.0,0.0,0.9281,0.0001,0.6742
zero-shot,spinach,?A1=A2,deepseek-reasoner,0.0,0.8785,0.9998,1.0,0.8595,1.0,1.0,1.0,1.0,0.9999,0.9998,0.9999,1.0,1.0,0.9997,1.0,1.0,1.0
zero-shot,spinach,?A1=A2,gemini-2.5-pro,0.0,0.0362,0.6583,0.9997,0.0144,0.9856,0.9945,1.0,0.9999,0.6673,0.6254,0.6962,1.0,0.0009,1.0,1.0,0.998,1.0
zero-shot,spinach,?A1=A2,gpt-5-mini,0.0,0.0,0.0,0.0002,0.0,0.0,0.0,0.0056,0.0003,0.0,0.0,0.0,0.1215,0.0,0.0,1.0,0.0,0.1856
zero-shot,spinach,?A1=A2,o3,0.0,0.0002,0.0769,0.935,0.0,0.434,0.7693,0.9981,0.9506,0.0541,0.0192,0.0147,1.0,0.0,0.0057,1.0,1.0,1.0
zero-shot,spinach,?A1=A2,gpt-5,0.0,0.0,0.0,0.0052,0.0,0.0,0.0002,0.038,0.0029,0.0,0.0,0.0,0.4402,0.0,0.0,0.8837,0.0001,1.0
zero-shot,spinach,?A1=A3+A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,spinach,?A1=A3+A4,gpt-oss:20b,0.0,1.0,0.8675,0.9999,0.2517,0.4018,0.9967,1.0,0.9965,1.0,0.8852,1.0,1.0,0.6855,0.9999,1.0,1.0,1.0
zero-shot,spinach,?A1=A3+A4,gpt-4.1-nano-2025-04-14,0.0,0.2291,1.0,0.9992,0.0318,0.1325,0.9252,0.9992,0.9506,0.9993,0.5722,0.9975,1.0,0.2858,0.9765,1.0,1.0,1.0
zero-shot,spinach,?A1=A3+A4,mistral-small:24b,0.0,0.0002,0.0025,1.0,0.0,0.0001,0.0871,0.7336,0.1279,0.6146,0.0038,0.5,0.9998,0.0006,0.14,1.0,0.9332,0.9999
zero-shot,spinach,?A1=A3+A4,llama3.1:70b,0.0001,0.8684,0.9904,1.0,1.0,0.7383,1.0,1.0,1.0,1.0,0.9867,1.0,1.0,0.9054,1.0,1.0,1.0,1.0
zero-shot,spinach,?A1=A3+A4,gemini-2.0-flash,0.0,0.7728,0.932,1.0,0.4159,1.0,0.9992,1.0,0.9989,1.0,0.9608,1.0,1.0,0.8083,0.9998,1.0,1.0,1.0
zero-shot,spinach,?A1=A3+A4,gpt-4.1-mini-2025-04-14,0.0,0.0113,0.1405,0.9519,0.0002,0.0033,1.0,0.9862,0.7142,0.9919,0.1325,0.9646,1.0,0.0307,0.8115,1.0,0.9998,1.0
zero-shot,spinach,?A1=A3+A4,gpt-4o,0.0,0.0001,0.0023,0.3776,0.0,0.0,0.0298,1.0,0.0586,0.5,0.0014,0.3417,0.9996,0.0001,0.1215,1.0,0.8472,0.9998
zero-shot,spinach,?A1=A3+A4,gpt-4.1-2025-04-14,0.0,0.01,0.0939,0.9283,0.0003,0.0035,0.4253,0.9702,1.0,0.9749,0.068,0.9123,1.0,0.0121,0.7214,1.0,0.9997,1.0
zero-shot,spinach,?A1=A3+A4,grok-3-mini,0.0,0.0,0.0022,0.5,0.0,0.0,0.0214,0.6123,0.0551,1.0,0.0009,0.345,0.9997,0.0,0.1077,1.0,0.9739,1.0
zero-shot,spinach,?A1=A3+A4,deepseek-chat,0.0,0.2122,0.5722,0.9987,0.0392,0.0946,0.932,0.9995,0.9693,0.9997,1.0,0.9985,1.0,0.2786,0.9825,1.0,1.0,1.0
zero-shot,spinach,?A1=A3+A4,gemini-2.5-flash,0.0,0.0001,0.0069,0.6043,0.0,0.0,0.0748,0.7517,0.1553,0.7878,0.0045,1.0,0.9999,0.0,0.2122,1.0,0.9904,1.0
zero-shot,spinach,?A1=A3+A4,gpt-5-nano,0.0,0.0,0.0,0.0007,0.0,0.0,0.0,0.001,0.0,0.0009,0.0,0.0003,1.0,0.0,0.0,0.967,0.0587,0.8144
zero-shot,spinach,?A1=A3+A4,deepseek-reasoner,0.0,0.5,0.8275,0.9998,0.1917,0.3318,0.9879,1.0,0.9959,1.0,0.8365,1.0,1.0,1.0,0.9998,1.0,1.0,1.0
zero-shot,spinach,?A1=A3+A4,gemini-2.5-pro,0.0,0.0008,0.0494,0.9179,0.0001,0.0007,0.2983,0.9281,0.4225,0.9449,0.0401,0.8852,1.0,0.0013,1.0,1.0,0.9997,1.0
zero-shot,spinach,?A1=A3+A4,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0631,0.0,0.0,1.0,0.007,0.2148
zero-shot,spinach,?A1=A3+A4,o3,0.0,0.0,0.0,0.1102,0.0,0.0,0.0008,0.2213,0.0011,0.061,0.0,0.0261,0.9663,0.0,0.0011,0.9964,1.0,0.9924
zero-shot,spinach,?A1=A3+A4,gpt-5,0.0,0.0,0.0,0.0002,0.0,0.0,0.0,0.0004,0.0,0.0001,0.0,0.0,0.2757,0.0,0.0,0.8659,0.015,1.0
zero-shot,spinach,?A1>A3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,spinach,?A1>A3,gpt-oss:20b,0.0,1.0,1.0,0.9944,0.2712,0.9476,0.9999,1.0,0.9997,1.0,0.9934,0.9952,1.0,0.9238,0.9405,1.0,1.0,1.0
zero-shot,spinach,?A1>A3,gpt-4.1-nano-2025-04-14,0.0,0.0001,1.0,0.2051,0.0,0.0135,0.6482,0.7825,0.2983,0.6506,0.074,0.074,0.9,0.0092,0.0178,0.9748,0.8169,0.993
zero-shot,spinach,?A1>A3,mistral-small:24b,0.0,0.0113,0.8642,1.0,0.0004,0.1185,0.9296,0.9721,0.7084,0.9332,0.3417,0.3939,0.9908,0.0668,0.1375,0.9975,0.9663,0.9996
zero-shot,spinach,?A1>A3,llama3.1:70b,0.0,0.8198,1.0,0.9999,1.0,0.9934,1.0,1.0,1.0,1.0,0.9989,0.9996,1.0,0.9836,0.9865,1.0,1.0,1.0
zero-shot,spinach,?A1>A3,gemini-2.0-flash,0.0,0.0885,0.9937,0.926,0.0147,1.0,0.9973,0.9982,0.9808,0.9977,0.7995,0.8144,0.999,0.4388,0.5,1.0,0.9997,1.0
zero-shot,spinach,?A1>A3,gpt-4.1-mini-2025-04-14,0.0,0.0001,0.4495,0.1144,0.0,0.0063,1.0,0.6927,0.1908,0.5573,0.0407,0.0297,0.8222,0.0027,0.006,0.9597,0.6985,0.9916
zero-shot,spinach,?A1>A3,gpt-4o,0.0,0.0,0.3015,0.049,0.0,0.0041,0.4007,1.0,0.1225,0.3957,0.015,0.0232,0.7405,0.0011,0.0022,0.935,0.5,0.9818
zero-shot,spinach,?A1>A3,gpt-4.1-2025-04-14,0.0,0.0009,0.7865,0.3919,0.0,0.0403,0.8785,0.9225,1.0,0.9002,0.1958,0.1553,0.97,0.0218,0.033,0.9962,0.9597,0.9993
zero-shot,spinach,?A1>A3,grok-3-mini,0.0,0.0,0.4487,0.1102,0.0,0.0055,0.5573,0.7017,0.1684,1.0,0.027,0.0192,0.8512,0.0007,0.0052,0.9659,0.72,0.9933
zero-shot,spinach,?A1>A3,deepseek-chat,0.0,0.0147,0.9565,0.7517,0.0027,0.2879,0.978,0.9928,0.8736,0.9871,1.0,0.5598,0.9966,0.1802,0.2399,0.9997,0.9898,1.0
zero-shot,spinach,?A1>A3,gemini-2.5-flash,0.0,0.0113,0.9565,0.705,0.0012,0.2757,0.9853,0.9876,0.9123,0.9917,0.5598,1.0,0.9984,0.1684,0.2204,0.9998,0.9934,1.0
zero-shot,spinach,?A1>A3,gpt-5-nano,0.0,0.0,0.1528,0.0182,0.0,0.0022,0.2443,0.3494,0.0517,0.2175,0.007,0.0038,1.0,0.0005,0.0009,0.9123,0.3582,0.9638
zero-shot,spinach,?A1>A3,deepseek-reasoner,0.0,0.1264,0.9957,0.962,0.0325,0.678,0.9989,0.9996,0.9902,0.9998,0.889,0.9002,0.9998,1.0,0.6146,1.0,0.9999,1.0
zero-shot,spinach,?A1>A3,gemini-2.5-pro,0.0,0.1013,0.9907,0.9091,0.0267,0.6081,0.9973,0.9991,0.9842,0.9977,0.8389,0.86,0.9996,0.5,1.0,1.0,0.9998,1.0
zero-shot,spinach,?A1>A3,gpt-5-mini,0.0,0.0,0.0423,0.0052,0.0,0.0001,0.0651,0.1037,0.0077,0.0571,0.0008,0.0005,0.1553,0.0,0.0,1.0,0.1746,0.7852
zero-shot,spinach,?A1>A3,o3,0.0,0.0,0.2595,0.0587,0.0,0.0009,0.3974,0.5993,0.0769,0.3854,0.0198,0.0147,0.7277,0.0004,0.0005,0.8792,1.0,0.9568
zero-shot,spinach,?A1>A3,gpt-5,0.0,0.0,0.0133,0.0009,0.0,0.0,0.0164,0.0337,0.0015,0.0137,0.0001,0.0,0.0676,0.0,0.0,0.3179,0.071,1.0
zero-shot,spinach,?A1>A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,spinach,?A1>A4,gpt-oss:20b,0.0,1.0,0.9999,1.0,0.5,0.9283,0.9987,1.0,0.9997,1.0,0.9999,1.0,1.0,0.7852,1.0,1.0,1.0,1.0
zero-shot,spinach,?A1>A4,gpt-4.1-nano-2025-04-14,0.0,0.0003,1.0,0.7825,0.0004,0.0178,0.3854,0.9876,0.617,0.9033,0.6985,0.9281,1.0,0.0055,0.6556,1.0,0.9953,1.0
zero-shot,spinach,?A1>A4,mistral-small:24b,0.0,0.0001,0.3015,1.0,0.0001,0.0124,0.2094,0.954,0.4495,0.7558,0.5,0.8019,0.9999,0.0013,0.3991,1.0,0.9456,1.0
zero-shot,spinach,?A1>A4,llama3.1:70b,0.0,0.6224,0.9999,1.0,1.0,0.9369,0.9998,1.0,1.0,1.0,1.0,1.0,1.0,0.8256,1.0,1.0,1.0,1.0
zero-shot,spinach,?A1>A4,gemini-2.0-flash,0.0,0.1279,0.992,0.9937,0.111,1.0,0.9703,1.0,0.9952,1.0,0.9982,1.0,1.0,0.3258,0.9945,1.0,1.0,1.0
zero-shot,spinach,?A1>A4,gpt-4.1-mini-2025-04-14,0.0,0.0033,0.72,0.8597,0.0007,0.0557,1.0,0.9984,0.7796,0.9702,0.8341,0.9703,1.0,0.0147,0.8144,1.0,0.9977,1.0
zero-shot,spinach,?A1>A4,gpt-4o,0.0,0.0,0.0232,0.0804,0.0,0.0001,0.0039,1.0,0.0201,0.1568,0.0462,0.1871,0.9865,0.0,0.0361,0.9997,0.6009,0.9962
zero-shot,spinach,?A1>A4,gpt-4.1-2025-04-14,0.0,0.0007,0.5,0.6482,0.0002,0.0113,0.322,0.9901,1.0,0.8837,0.6706,0.9179,0.9999,0.0039,0.6146,1.0,0.9917,1.0
zero-shot,spinach,?A1>A4,grok-3-mini,0.0,0.0,0.1562,0.3389,0.0,0.0001,0.0586,0.8963,0.1856,1.0,0.295,0.6224,0.9995,0.0001,0.2204,1.0,0.9393,1.0
zero-shot,spinach,?A1>A4,deepseek-chat,0.0,0.0002,0.3974,0.5964,0.0,0.0048,0.2442,0.9741,0.4415,0.7906,1.0,0.8438,0.9999,0.0031,0.5,1.0,0.9639,1.0
zero-shot,spinach,?A1>A4,gemini-2.5-flash,0.0,0.0,0.1215,0.2723,0.0,0.0001,0.0557,0.8736,0.14,0.5,0.2354,1.0,0.9985,0.0,0.162,1.0,0.8853,0.9998
zero-shot,spinach,?A1>A4,gpt-5-nano,0.0,0.0,0.0001,0.0003,0.0,0.0,0.0,0.0267,0.0002,0.0013,0.0003,0.0034,1.0,0.0,0.0003,0.9573,0.0598,0.8209
zero-shot,spinach,?A1>A4,deepseek-reasoner,0.0,0.3179,0.9977,0.9995,0.2664,0.7743,0.9934,1.0,0.9984,1.0,0.9987,1.0,1.0,1.0,0.9996,1.0,1.0,1.0
zero-shot,spinach,?A1>A4,gemini-2.5-pro,0.0,0.0002,0.4469,0.6955,0.0002,0.012,0.2757,0.9802,0.5,0.86,0.6043,0.9061,0.9999,0.0012,1.0,1.0,0.9977,1.0
zero-shot,spinach,?A1>A4,gpt-5-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0008,0.0,0.0,0.0,0.0001,0.0762,0.0,0.0,1.0,0.0028,0.2841
zero-shot,spinach,?A1>A4,o3,0.0,0.0,0.0106,0.0876,0.0,0.0,0.0055,0.5,0.0192,0.1147,0.0619,0.1958,0.9639,0.0,0.0068,0.9987,1.0,0.9942
zero-shot,spinach,?A1>A4,gpt-5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0077,0.0,0.0001,0.0001,0.0004,0.2559,0.0,0.0,0.8042,0.0112,1.0
zero-shot,spinach,?A3∅A4,llama3.1:8b,1.0,0.8945,0.0,0.0002,0.111,0.0045,0.0,0.0072,0.0011,0.9917,0.0,0.7852,0.0198,0.9459,0.7912,0.0004,0.9994,0.0668
zero-shot,spinach,?A3∅A4,gpt-oss:20b,0.1744,1.0,0.0,0.0,0.0057,0.0,0.0,0.0001,0.0,0.9075,0.0,0.3714,0.0004,0.7383,0.3642,0.0,0.9822,0.004
zero-shot,spinach,?A3∅A4,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,0.6009,0.9995,0.9593,0.0775,0.9568,0.8252,1.0,0.4495,1.0,0.9751,1.0,1.0,0.7681,1.0,0.9964
zero-shot,spinach,?A3∅A4,mistral-small:24b,0.9999,1.0,0.5,1.0,0.9977,0.9663,0.0556,0.9778,0.8432,1.0,0.5,1.0,0.9865,1.0,1.0,0.7307,1.0,0.993
zero-shot,spinach,?A3∅A4,llama3.1:70b,0.9369,0.998,0.0013,0.0052,1.0,0.0586,0.0,0.1144,0.0135,0.9999,0.0006,0.9822,0.1659,0.9994,0.9842,0.015,1.0,0.3957
zero-shot,spinach,?A3∅A4,gemini-2.0-flash,0.9981,1.0,0.0704,0.0587,0.9702,1.0,0.0006,0.6043,0.2204,1.0,0.0297,0.9998,0.7121,1.0,0.9998,0.2352,1.0,0.8775
zero-shot,spinach,?A3∅A4,gpt-4.1-mini-2025-04-14,1.0,1.0,0.9538,0.9686,1.0,0.9998,1.0,0.9996,0.9948,1.0,0.9091,1.0,0.9999,1.0,1.0,0.9828,1.0,1.0
zero-shot,spinach,?A3∅A4,gpt-4o,0.9968,1.0,0.071,0.0427,0.9296,0.5,0.0009,1.0,0.2005,1.0,0.048,0.9996,0.678,1.0,0.9995,0.1456,1.0,0.8837
zero-shot,spinach,?A3∅A4,gpt-4.1-2025-04-14,0.9995,1.0,0.2522,0.225,0.9937,0.86,0.0111,0.8688,1.0,1.0,0.1909,1.0,0.9238,1.0,1.0,0.5,1.0,0.9818
zero-shot,spinach,?A3∅A4,grok-3-mini,0.0205,0.1725,0.0,0.0,0.0005,0.0,0.0,0.0,0.0,1.0,0.0,0.061,0.0,0.2706,0.0539,0.0,0.8569,0.0001
zero-shot,spinach,?A3∅A4,deepseek-chat,1.0,1.0,0.6482,0.6009,0.9998,0.9853,0.1375,0.9716,0.8698,1.0,1.0,1.0,0.9828,1.0,1.0,0.81,1.0,0.9952
zero-shot,spinach,?A3∅A4,gemini-2.5-flash,0.3179,0.7443,0.0,0.0,0.0362,0.0006,0.0,0.0011,0.0,0.9739,0.0,1.0,0.0012,0.8595,0.5841,0.0,0.9967,0.0057
zero-shot,spinach,?A3∅A4,gpt-5-nano,0.9898,0.9999,0.0432,0.0267,0.8942,0.3899,0.0004,0.4388,0.1264,1.0,0.0302,0.9996,1.0,1.0,0.9995,0.0551,1.0,0.8447
zero-shot,spinach,?A3∅A4,deepseek-reasoner,0.0998,0.4159,0.0,0.0,0.002,0.0,0.0,0.0001,0.0,0.8463,0.0,0.2366,0.0001,1.0,0.2024,0.0,0.9622,0.001
zero-shot,spinach,?A3∅A4,gemini-2.5-pro,0.3136,0.7566,0.0,0.0,0.033,0.0005,0.0,0.0013,0.0,0.9784,0.0,0.5841,0.0015,0.895,1.0,0.0,0.9987,0.0035
zero-shot,spinach,?A3∅A4,gpt-5-mini,0.9998,1.0,0.3127,0.3561,0.9924,0.8322,0.0302,0.9129,0.6009,1.0,0.2552,1.0,0.9749,1.0,1.0,1.0,1.0,0.9988
zero-shot,spinach,?A3∅A4,o3,0.002,0.0436,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2617,0.0,0.0113,0.0,0.0843,0.0053,0.0,1.0,0.0
zero-shot,spinach,?A3∅A4,gpt-5,0.962,0.9985,0.0076,0.0136,0.7017,0.1831,0.0001,0.1856,0.0352,1.0,0.0092,0.998,0.2498,0.9997,0.9989,0.0041,1.0,1.0
zero-shot,spinach,?A4=A1|3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,spinach,?A4=A1|3,gpt-oss:20b,0.0002,1.0,0.1051,0.073,0.1662,0.0461,0.5,0.7483,0.9461,1.0,0.5841,1.0,1.0,0.927,1.0,0.9991,1.0,1.0
zero-shot,spinach,?A4=A1|3,gpt-4.1-nano-2025-04-14,0.0156,0.9616,1.0,0.623,0.7461,0.5,0.9673,0.9894,0.9993,1.0,0.9616,1.0,1.0,0.9979,1.0,0.9999,1.0,1.0
zero-shot,spinach,?A4=A1|3,mistral-small:24b,0.0312,0.9807,0.623,1.0,0.8281,0.623,0.9713,0.9904,0.9998,1.0,0.9888,1.0,1.0,0.9962,1.0,1.0,1.0,1.0
zero-shot,spinach,?A4=A1|3,llama3.1:70b,0.0078,0.9283,0.5,0.377,1.0,0.377,0.9844,0.9941,0.9997,1.0,0.9539,1.0,1.0,0.9904,1.0,0.9992,1.0,1.0
zero-shot,spinach,?A4=A1|3,gemini-2.0-flash,0.0312,0.9888,0.7734,0.623,0.8281,1.0,0.9807,0.9963,0.9999,1.0,0.9824,1.0,1.0,0.9995,1.0,1.0,1.0,1.0
zero-shot,spinach,?A4=A1|3,gpt-4.1-mini-2025-04-14,0.0005,0.6762,0.1133,0.0898,0.1094,0.073,1.0,0.8491,0.9846,1.0,0.7095,1.0,0.9995,0.895,0.9998,0.9879,1.0,1.0
zero-shot,spinach,?A4=A1|3,gpt-4o,0.0001,0.4119,0.0384,0.0318,0.0327,0.0176,0.3036,1.0,0.9408,1.0,0.4119,0.9999,0.998,0.7597,0.9995,0.968,1.0,0.9999
zero-shot,spinach,?A4=A1|3,gpt-4.1-2025-04-14,0.0,0.1148,0.0036,0.0013,0.0021,0.0007,0.0481,0.1509,1.0,1.0,0.105,0.9988,0.968,0.3388,0.9855,0.7709,1.0,0.9988
zero-shot,spinach,?A4=A1|3,grok-3-mini,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0002,1.0,0.0,0.1537,0.0053,0.0,0.0307,0.0001,0.9242,0.1635
zero-shot,spinach,?A4=A1|3,deepseek-chat,0.0002,0.5841,0.1051,0.0461,0.1334,0.0592,0.5,0.7483,0.9534,1.0,1.0,1.0,0.9983,0.8569,0.9993,0.9786,1.0,0.9999
zero-shot,spinach,?A4=A1|3,gemini-2.5-flash,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0003,0.0041,0.9242,0.0002,1.0,0.0717,0.0001,0.1917,0.0145,0.9947,0.5841
zero-shot,spinach,?A4=A1|3,gpt-5-nano,0.0,0.0001,0.0,0.0,0.0001,0.0,0.0019,0.0073,0.0758,0.9987,0.0053,0.9755,1.0,0.0096,0.7483,0.1796,1.0,0.9608
zero-shot,spinach,?A4=A1|3,deepseek-reasoner,0.0,0.1938,0.0106,0.0154,0.0318,0.0037,0.2024,0.4073,0.7976,1.0,0.2617,1.0,0.9978,1.0,0.9993,0.9616,1.0,1.0
zero-shot,spinach,?A4=A1|3,gemini-2.5-pro,0.0,0.0001,0.0,0.0,0.0,0.0,0.0007,0.002,0.0378,0.9879,0.0023,0.9054,0.4119,0.0036,1.0,0.105,0.9997,0.9165
zero-shot,spinach,?A4=A1|3,gpt-5-mini,0.0,0.0065,0.0004,0.0002,0.003,0.0001,0.0307,0.0758,0.3555,1.0,0.0494,0.9953,0.9165,0.1051,0.9534,1.0,1.0,0.9993
zero-shot,spinach,?A4=A1|3,o3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1537,0.0,0.0173,0.0003,0.0,0.0011,0.0,1.0,0.0178
zero-shot,spinach,?A4=A1|3,gpt-5,0.0,0.0,0.0,0.0,0.0,0.0,0.0001,0.0004,0.0041,0.9157,0.0003,0.5841,0.0946,0.0001,0.1796,0.0038,0.9937,1.0
zero-shot,synthetic,?A1=A2,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,synthetic,?A1=A2,gpt-oss:20b,0.0,1.0,0.9853,0.9981,0.838,0.9998,1.0,0.999,1.0,1.0,1.0,1.0,1.0,0.6821,0.9999,1.0,0.996,1.0
zero-shot,synthetic,?A1=A2,gpt-4.1-nano-2025-04-14,0.0,0.0297,1.0,0.8296,0.1163,0.9324,0.992,0.8252,1.0,0.9862,0.9931,0.9931,0.9998,0.0481,0.962,0.9998,0.6641,1.0
zero-shot,synthetic,?A1=A2,mistral-small:24b,0.0,0.0045,0.2483,1.0,0.0222,0.72,0.9115,0.5561,1.0,0.8597,0.9281,0.9238,0.998,0.0077,0.8544,0.998,0.347,0.9968
zero-shot,synthetic,?A1=A2,llama3.1:70b,0.0,0.2557,0.9324,0.9894,1.0,0.9986,1.0,0.9931,1.0,0.9998,1.0,1.0,1.0,0.3679,0.9998,1.0,0.954,1.0
zero-shot,synthetic,?A1=A2,gemini-2.0-flash,0.0,0.0007,0.1163,0.3854,0.004,1.0,0.8544,0.3877,1.0,0.7743,0.8785,0.8659,0.9964,0.001,0.7288,0.9936,0.1684,0.9962
zero-shot,synthetic,?A1=A2,gpt-4.1-mini-2025-04-14,0.0,0.0,0.0178,0.1403,0.0001,0.2257,1.0,0.1568,0.9995,0.4388,0.5612,0.5627,0.968,0.0,0.3877,0.9556,0.033,0.9686
zero-shot,synthetic,?A1=A2,gpt-4o,0.0,0.0028,0.2522,0.5561,0.0158,0.7159,0.8963,1.0,1.0,0.8642,0.9196,0.9155,0.9995,0.0099,0.8341,0.9991,0.3417,0.9984
zero-shot,synthetic,?A1=A2,gpt-4.1-2025-04-14,0.0,0.0,0.0,0.0001,0.0,0.0001,0.0014,0.0001,1.0,0.0008,0.0006,0.0023,0.2693,0.0,0.0004,0.2858,0.0,0.2094
zero-shot,synthetic,?A1=A2,grok-3-mini,0.0,0.0,0.0298,0.2094,0.0006,0.3258,0.678,0.2051,0.9997,1.0,0.6673,0.678,0.985,0.0002,0.5,0.9863,0.0676,0.9865
zero-shot,synthetic,?A1=A2,deepseek-chat,0.0,0.0,0.0158,0.1215,0.0001,0.2025,0.5612,0.1312,0.9998,0.4427,1.0,0.5627,0.9721,0.0,0.3776,0.9639,0.0492,0.9663
zero-shot,synthetic,?A1=A2,gemini-2.5-flash,0.0,0.0,0.0158,0.1264,0.0001,0.2148,0.5627,0.1358,0.9992,0.4388,0.5627,1.0,0.976,0.0,0.3714,0.9659,0.033,0.9733
zero-shot,synthetic,?A1=A2,gpt-5-nano,0.0,0.0,0.0004,0.0043,0.0,0.0076,0.0544,0.0013,0.8055,0.0279,0.049,0.0435,1.0,0.0,0.0198,0.5573,0.0002,0.5
zero-shot,synthetic,?A1=A2,deepseek-reasoner,0.0,0.4373,0.9756,0.9967,0.7502,0.9997,1.0,0.9955,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9887,1.0
zero-shot,synthetic,?A1=A2,gemini-2.5-pro,0.0,0.0003,0.0668,0.2257,0.0007,0.3804,0.7159,0.2442,0.9999,0.6101,0.7336,0.7443,0.9898,0.0001,1.0,0.9853,0.0967,0.985
zero-shot,synthetic,?A1=A2,gpt-5-mini,0.0,0.0,0.0004,0.0043,0.0,0.0122,0.0703,0.0022,0.7859,0.0259,0.0598,0.0571,0.5573,0.0,0.0266,1.0,0.0001,0.5
zero-shot,synthetic,?A1=A2,o3,0.0,0.0098,0.4439,0.7441,0.0804,0.9002,0.9842,0.7517,1.0,0.9638,0.9733,0.9842,0.9999,0.0244,0.9443,1.0,1.0,1.0
zero-shot,synthetic,?A1=A2,gpt-5,0.0,0.0,0.0001,0.0064,0.0,0.0082,0.0556,0.0036,0.8597,0.0267,0.0587,0.0492,0.6123,0.0,0.0279,0.6081,0.0002,1.0
zero-shot,synthetic,?A1=A3+A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,synthetic,?A1=A3+A4,gpt-oss:20b,0.0,1.0,0.0035,0.162,0.0002,0.1885,0.1885,0.0748,0.9338,0.9997,0.5,0.9998,1.0,0.7383,0.9998,0.999,0.9061,1.0
zero-shot,synthetic,?A1=A3+A4,gpt-4.1-nano-2025-04-14,0.0,0.9989,1.0,0.9599,0.2706,0.9786,0.9855,0.9252,1.0,1.0,0.9983,1.0,1.0,0.9996,1.0,1.0,0.9999,1.0
zero-shot,synthetic,?A1=A3+A4,mistral-small:24b,0.0,0.9061,0.0814,1.0,0.0073,0.6358,0.6358,0.4225,0.992,1.0,0.8659,1.0,1.0,0.9369,0.9999,0.9999,0.9871,1.0
zero-shot,synthetic,?A1=A3+A4,llama3.1:70b,0.0001,1.0,0.8463,0.998,1.0,0.9981,0.9996,0.9947,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,synthetic,?A1=A3+A4,gemini-2.0-flash,0.0,0.8923,0.0494,0.5,0.0063,1.0,0.5775,0.3555,0.9988,1.0,0.8595,1.0,1.0,0.9449,1.0,0.9999,0.99,1.0
zero-shot,synthetic,?A1=A3+A4,gpt-4.1-mini-2025-04-14,0.0,0.8923,0.0378,0.5,0.0022,0.5775,1.0,0.3601,0.9988,1.0,0.8675,1.0,1.0,0.9564,1.0,0.9999,0.9917,1.0
zero-shot,synthetic,?A1=A3+A4,gpt-4o,0.0,0.9646,0.1405,0.7214,0.0173,0.7709,0.7634,1.0,0.9991,1.0,0.9393,1.0,1.0,0.9795,1.0,1.0,0.996,1.0
zero-shot,synthetic,?A1=A3+A4,gpt-4.1-2025-04-14,0.0,0.1215,0.0001,0.0178,0.0,0.0047,0.0047,0.003,1.0,0.9825,0.0814,0.9795,0.9996,0.1808,0.9832,0.9413,0.5,0.9986
zero-shot,synthetic,?A1=A3+A4,grok-3-mini,0.0,0.0009,0.0,0.0002,0.0,0.0,0.0,0.0,0.0401,1.0,0.0008,0.5775,0.9481,0.0023,0.6321,0.5561,0.0442,0.9061
zero-shot,synthetic,?A1=A3+A4,deepseek-chat,0.0,0.6445,0.0053,0.2148,0.0002,0.2366,0.2291,0.1147,0.9599,0.9998,1.0,0.9998,1.0,0.7878,0.9997,0.9994,0.9283,1.0
zero-shot,synthetic,?A1=A3+A4,gemini-2.5-flash,0.0,0.0007,0.0,0.0002,0.0,0.0001,0.0,0.0,0.0448,0.5775,0.0008,1.0,0.9481,0.0008,0.6494,0.5551,0.0442,0.8945
zero-shot,synthetic,?A1=A3+A4,gpt-5-nano,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0012,0.092,0.0,0.092,1.0,0.0001,0.1264,0.0717,0.001,0.3714
zero-shot,synthetic,?A1=A3+A4,deepseek-reasoner,0.0,0.4159,0.0015,0.111,0.0,0.1077,0.0925,0.0448,0.8998,0.9993,0.345,0.9998,1.0,1.0,0.9996,0.996,0.8519,1.0
zero-shot,synthetic,?A1=A3+A4,gemini-2.5-pro,0.0,0.0008,0.0,0.0002,0.0,0.0001,0.0001,0.0,0.0365,0.5,0.001,0.5,0.9238,0.0015,1.0,0.5,0.0205,0.8659
zero-shot,synthetic,?A1=A3+A4,gpt-5-mini,0.0,0.0027,0.0,0.0003,0.0,0.0004,0.0003,0.0,0.0963,0.5561,0.0018,0.5551,0.9635,0.0093,0.6081,1.0,0.0632,0.889
zero-shot,synthetic,?A1=A3+A4,o3,0.0,0.162,0.0004,0.027,0.0,0.0235,0.0205,0.0098,0.6254,0.9782,0.1279,0.9782,0.9996,0.2434,0.9917,0.9648,1.0,0.9995
zero-shot,synthetic,?A1=A3+A4,gpt-5,0.0,0.0001,0.0,0.0,0.0,0.0,0.0,0.0,0.004,0.162,0.0,0.1744,0.7443,0.0001,0.2148,0.1802,0.0017,1.0
zero-shot,synthetic,?A1>A3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,synthetic,?A1>A3,gpt-oss:20b,0.0,1.0,0.6482,0.9381,0.0325,0.7693,0.9129,0.908,1.0,0.9981,0.6556,0.9934,0.9999,0.7912,0.9912,0.7681,0.7646,0.9889
zero-shot,synthetic,?A1>A3,gpt-4.1-nano-2025-04-14,0.0,0.4495,1.0,0.8963,0.03,0.653,0.8341,0.8209,0.9999,0.9908,0.5522,0.978,0.9984,0.6482,0.968,0.6823,0.6482,0.9721
zero-shot,synthetic,?A1>A3,mistral-small:24b,0.0,0.1,0.1568,1.0,0.0009,0.2175,0.4007,0.3877,0.9957,0.8775,0.1528,0.7995,0.9876,0.2213,0.7787,0.2629,0.2213,0.775
zero-shot,synthetic,?A1>A3,llama3.1:70b,0.0,0.9836,0.9836,0.9996,1.0,0.9972,0.9995,0.9987,1.0,1.0,0.9923,1.0,1.0,0.9967,1.0,0.9957,0.9948,1.0
zero-shot,synthetic,?A1>A3,gemini-2.0-flash,0.0,0.3294,0.4478,0.8512,0.0066,1.0,0.7693,0.7405,1.0,0.9931,0.4427,0.9648,0.9989,0.5561,0.951,0.5993,0.5561,0.9593
zero-shot,synthetic,?A1>A3,gpt-4.1-mini-2025-04-14,0.0,0.1456,0.2442,0.6927,0.0013,0.3294,1.0,0.5522,0.9993,0.9508,0.2693,0.8856,0.993,0.3389,0.8987,0.4063,0.3417,0.8856
zero-shot,synthetic,?A1>A3,gpt-4o,0.0,0.151,0.2559,0.7159,0.0031,0.3494,0.5522,1.0,0.9995,0.929,0.2442,0.8898,0.9977,0.3444,0.8898,0.3919,0.347,0.8987
zero-shot,synthetic,?A1>A3,gpt-4.1-2025-04-14,0.0,0.0,0.0002,0.0092,0.0,0.0002,0.0019,0.0013,1.0,0.0631,0.0002,0.0218,0.3919,0.0003,0.0244,0.0009,0.0,0.0244
zero-shot,synthetic,?A1>A3,grok-3-mini,0.0,0.0047,0.0182,0.1831,0.0,0.0158,0.0845,0.1108,0.967,1.0,0.0198,0.3746,0.8856,0.02,0.3854,0.0544,0.0267,0.3854
zero-shot,synthetic,?A1>A3,deepseek-chat,0.0,0.4469,0.5522,0.9,0.0164,0.6673,0.8055,0.8341,0.9999,0.9898,1.0,0.9818,0.9998,0.653,0.9741,0.6955,0.6482,0.978
zero-shot,synthetic,?A1>A3,gemini-2.5-flash,0.0,0.0147,0.0407,0.2879,0.0,0.0632,0.1748,0.1704,0.9902,0.7388,0.0352,1.0,0.9508,0.0595,0.5627,0.1142,0.0442,0.5598
zero-shot,synthetic,?A1>A3,gpt-5-nano,0.0,0.0002,0.0033,0.024,0.0,0.0026,0.0136,0.0055,0.7084,0.1748,0.0006,0.0845,1.0,0.0011,0.0885,0.0019,0.0019,0.0885
zero-shot,synthetic,?A1>A3,deepseek-reasoner,0.0,0.3136,0.4495,0.8472,0.0077,0.5561,0.7558,0.7478,0.9999,0.9907,0.4478,0.9675,0.9996,1.0,0.9703,0.5978,0.5561,0.9593
zero-shot,synthetic,?A1>A3,gemini-2.5-pro,0.0,0.0182,0.0544,0.3045,0.0001,0.0809,0.1611,0.1704,0.9887,0.72,0.0462,0.5627,0.9476,0.0557,1.0,0.1037,0.0403,0.5598
zero-shot,synthetic,?A1>A3,gpt-5-mini,0.0,0.3127,0.4063,0.8129,0.0092,0.5,0.6823,0.7084,0.9996,0.968,0.3991,0.926,0.9993,0.5,0.935,1.0,0.5,0.935
zero-shot,synthetic,?A1>A3,o3,0.0,0.3327,0.4495,0.8472,0.0111,0.5561,0.7517,0.7441,1.0,0.9865,0.4495,0.9782,0.9992,0.5561,0.9808,0.5993,1.0,0.9703
zero-shot,synthetic,?A1>A3,gpt-5,0.0,0.022,0.049,0.3073,0.0001,0.0704,0.1748,0.1611,0.9887,0.72,0.0407,0.5598,0.9476,0.0704,0.5598,0.1037,0.0557,1.0
zero-shot,synthetic,?A1>A4,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,synthetic,?A1>A4,gpt-oss:20b,0.0,1.0,0.8554,0.7825,0.0325,0.8341,0.999,0.6641,0.9991,0.9998,0.98,0.9998,1.0,0.8837,0.9871,1.0,0.9002,1.0
zero-shot,synthetic,?A1>A4,gpt-4.1-nano-2025-04-14,0.0,0.2135,1.0,0.4495,0.0027,0.5,0.9615,0.3127,0.976,0.9834,0.8129,0.9876,0.9957,0.5522,0.8472,0.9977,0.5531,1.0
zero-shot,synthetic,?A1>A4,mistral-small:24b,0.0,0.3015,0.6482,1.0,0.0072,0.5993,0.9785,0.3919,0.985,0.9898,0.8856,0.9897,0.998,0.6439,0.8963,0.9988,0.6506,1.0
zero-shot,synthetic,?A1>A4,llama3.1:70b,0.0,0.9836,0.9989,0.9968,1.0,0.9984,1.0,0.9876,1.0,1.0,0.9999,1.0,1.0,0.9984,0.9999,1.0,0.9981,1.0
zero-shot,synthetic,?A1>A4,gemini-2.0-flash,0.0,0.2442,0.595,0.5,0.0039,1.0,0.9871,0.347,0.992,0.9912,0.8688,0.9945,0.9965,0.6081,0.8942,0.9983,0.6101,1.0
zero-shot,synthetic,?A1>A4,gpt-4.1-mini-2025-04-14,0.0,0.0028,0.065,0.0385,0.0,0.027,1.0,0.015,0.6224,0.6706,0.2051,0.7288,0.8698,0.046,0.2354,0.9091,0.046,0.9908
zero-shot,synthetic,?A1>A4,gpt-4o,0.0,0.4439,0.7681,0.7084,0.024,0.7441,0.9924,1.0,0.9962,0.9954,0.9413,0.9991,0.9997,0.7787,0.9565,1.0,0.8042,1.0
zero-shot,synthetic,?A1>A4,gpt-4.1-2025-04-14,0.0,0.0023,0.0435,0.0279,0.0,0.0178,0.5,0.0082,1.0,0.617,0.1611,0.6821,0.8472,0.0297,0.1744,0.8792,0.0325,0.9889
zero-shot,synthetic,?A1>A4,grok-3-mini,0.0,0.0006,0.0314,0.0198,0.0,0.0182,0.4415,0.0093,0.5,1.0,0.1446,0.6254,0.8252,0.0222,0.14,0.8815,0.02,0.9865
zero-shot,synthetic,?A1>A4,deepseek-chat,0.0,0.0395,0.2629,0.1748,0.0002,0.2005,0.8642,0.0963,0.8987,0.9076,1.0,0.9405,0.9818,0.2204,0.6081,0.9887,0.2522,0.9997
zero-shot,synthetic,?A1>A4,gemini-2.5-flash,0.0,0.0005,0.024,0.0192,0.0,0.012,0.3804,0.0023,0.4373,0.5,0.1013,1.0,0.7825,0.0098,0.0748,0.8432,0.022,0.976
zero-shot,synthetic,?A1>A4,gpt-5-nano,0.0,0.0001,0.0085,0.0043,0.0,0.007,0.1909,0.0008,0.2213,0.2522,0.0337,0.3015,1.0,0.0056,0.0517,0.678,0.0019,0.9238
zero-shot,synthetic,?A1>A4,deepseek-reasoner,0.0,0.1856,0.5522,0.4511,0.0038,0.5,0.9756,0.3045,0.9853,0.9894,0.86,0.996,0.9974,1.0,0.889,0.9994,0.5551,1.0
zero-shot,synthetic,?A1>A4,gemini-2.5-pro,0.0,0.027,0.2213,0.1568,0.0002,0.1659,0.8438,0.074,0.8945,0.9179,0.5,0.9646,0.97,0.1802,1.0,0.985,0.1856,0.9995
zero-shot,synthetic,?A1>A4,gpt-5-mini,0.0,0.0001,0.0048,0.0026,0.0,0.0035,0.1375,0.0001,0.1746,0.1791,0.0215,0.225,0.4388,0.0016,0.0279,1.0,0.0016,0.8785
zero-shot,synthetic,?A1>A4,o3,0.0,0.1684,0.5531,0.4487,0.0043,0.5,0.9756,0.2841,0.9836,0.9907,0.8252,0.9889,0.9993,0.5551,0.8837,0.9994,1.0,1.0
zero-shot,synthetic,?A1>A4,gpt-5,0.0,0.0,0.0001,0.0001,0.0,0.0001,0.0182,0.0,0.022,0.0267,0.0009,0.0435,0.1264,0.0001,0.0013,0.1908,0.0,1.0
zero-shot,synthetic,?A3∅A4,llama3.1:8b,1.0,0.5,0.0,0.0,0.0011,0.0,0.0,0.0222,0.0001,0.9832,0.0,0.9795,0.9459,0.1908,0.6146,0.3294,0.9369,0.9635
zero-shot,synthetic,?A3∅A4,gpt-oss:20b,0.617,1.0,0.0,0.0,0.0019,0.0,0.0,0.038,0.0001,0.9947,0.0,0.9832,0.9674,0.2307,0.7017,0.3919,0.9635,0.9765
zero-shot,synthetic,?A3∅A4,gpt-4.1-nano-2025-04-14,1.0,1.0,1.0,0.1909,0.9281,0.295,0.0871,0.9937,0.7478,1.0,0.354,1.0,1.0,0.9997,1.0,0.9999,1.0,1.0
zero-shot,synthetic,?A3∅A4,mistral-small:24b,1.0,1.0,0.8698,1.0,0.9887,0.6985,0.446,0.9999,0.9593,1.0,0.7193,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,synthetic,?A3∅A4,llama3.1:70b,0.9996,0.9993,0.1215,0.0215,1.0,0.0325,0.0055,0.8898,0.2916,1.0,0.0556,1.0,1.0,0.9863,0.9998,0.9977,1.0,1.0
zero-shot,synthetic,?A3∅A4,gemini-2.0-flash,1.0,1.0,0.7906,0.3974,0.9836,1.0,0.2557,0.9991,0.9599,1.0,0.5978,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,synthetic,?A3∅A4,gpt-4.1-mini-2025-04-14,1.0,1.0,0.9519,0.6583,0.9977,0.838,1.0,0.9999,0.9887,1.0,0.8252,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,synthetic,?A3∅A4,gpt-4o,0.9894,0.9799,0.0135,0.0003,0.1704,0.0023,0.0002,1.0,0.0266,1.0,0.007,1.0,0.9999,0.8987,0.9943,0.9638,0.9998,1.0
zero-shot,synthetic,?A3∅A4,gpt-4.1-2025-04-14,1.0,1.0,0.3444,0.0704,0.7949,0.0814,0.0244,0.9881,1.0,1.0,0.1831,1.0,1.0,0.9991,1.0,0.9999,1.0,1.0
zero-shot,synthetic,?A3∅A4,grok-3-mini,0.0365,0.0147,0.0,0.0,0.0,0.0,0.0,0.0001,0.0,1.0,0.0,0.5,0.345,0.0022,0.0307,0.0098,0.345,0.4253
zero-shot,synthetic,?A3∅A4,deepseek-chat,1.0,1.0,0.7338,0.3638,0.9686,0.5,0.2522,0.9966,0.8775,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,synthetic,?A3∅A4,gemini-2.5-flash,0.0448,0.0365,0.0,0.0,0.0,0.0,0.0,0.0001,0.0,0.6682,0.0,1.0,0.4278,0.0007,0.0262,0.0138,0.4225,0.5
zero-shot,synthetic,?A3∅A4,gpt-5-nano,0.0998,0.0662,0.0,0.0,0.0,0.0,0.0,0.0003,0.0,0.7878,0.0,0.7077,1.0,0.0083,0.1002,0.0121,0.5747,0.655
zero-shot,synthetic,?A3∅A4,deepseek-reasoner,0.8785,0.849,0.0008,0.0001,0.0259,0.0,0.0,0.1611,0.0023,0.9993,0.0001,0.9998,0.9968,1.0,0.9231,0.7388,0.9968,0.9991
zero-shot,synthetic,?A3∅A4,gemini-2.5-pro,0.5,0.43,0.0,0.0,0.0005,0.0,0.0,0.0129,0.0,0.9879,0.0,0.9915,0.9506,0.1341,1.0,0.2498,0.9564,0.9739
zero-shot,synthetic,?A3∅A4,gpt-5-mini,0.7693,0.7084,0.0003,0.0,0.0055,0.0,0.0,0.0676,0.0002,0.996,0.0,0.9942,0.9959,0.3746,0.8447,1.0,0.99,0.9965
zero-shot,synthetic,?A3∅A4,o3,0.111,0.0717,0.0,0.0,0.0,0.0,0.0,0.0006,0.0,0.7878,0.0,0.7214,0.5747,0.0083,0.0925,0.0235,1.0,0.655
zero-shot,synthetic,?A3∅A4,gpt-5,0.0717,0.0494,0.0,0.0,0.0,0.0,0.0,0.0001,0.0,0.7142,0.0,0.6612,0.5,0.003,0.061,0.01,0.5,1.0
zero-shot,synthetic,?A4=A1|3,llama3.1:8b,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,synthetic,?A4=A1|3,gpt-oss:20b,0.0,1.0,0.0008,0.0,0.0,0.0059,0.0047,0.0036,0.5,0.9999,0.1148,0.9999,0.9999,0.6762,0.9998,0.9242,0.9853,1.0
zero-shot,synthetic,?A4=A1|3,gpt-4.1-nano-2025-04-14,0.002,0.9998,1.0,0.0898,0.3872,0.9539,0.8666,0.8949,0.9997,1.0,0.9915,1.0,1.0,0.9999,1.0,1.0,1.0,1.0
zero-shot,synthetic,?A4=A1|3,mistral-small:24b,0.0625,1.0,0.9805,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,synthetic,?A4=A1|3,llama3.1:70b,0.0078,1.0,0.8062,0.125,1.0,0.9941,0.9673,0.9961,1.0,1.0,0.9997,1.0,1.0,1.0,1.0,1.0,1.0,1.0
zero-shot,synthetic,?A4=A1|3,gemini-2.0-flash,0.0001,0.9987,0.1334,0.001,0.0327,1.0,0.3872,0.5,0.9978,1.0,0.9283,1.0,1.0,0.9959,1.0,0.9997,1.0,1.0
zero-shot,synthetic,?A4=A1|3,gpt-4.1-mini-2025-04-14,0.0002,0.9988,0.2905,0.0039,0.1133,0.8062,1.0,0.7095,0.9999,1.0,0.9824,1.0,1.0,0.9995,1.0,0.9999,1.0,1.0
zero-shot,synthetic,?A4=A1|3,gpt-4o,0.0001,0.9993,0.2272,0.002,0.0352,0.7095,0.5,1.0,0.9997,1.0,0.9423,1.0,1.0,0.9988,1.0,0.9999,1.0,1.0
zero-shot,synthetic,?A4=A1|3,gpt-4.1-2025-04-14,0.0,0.6494,0.0012,0.0,0.0,0.0096,0.0012,0.0021,1.0,0.9999,0.1431,1.0,1.0,0.7214,0.9999,0.9252,0.9856,1.0
zero-shot,synthetic,?A4=A1|3,grok-3-mini,0.0,0.0003,0.0,0.0,0.0,0.0,0.0,0.0,0.0003,1.0,0.0,0.6358,0.57,0.0009,0.5643,0.01,0.0939,0.8761
zero-shot,synthetic,?A4=A1|3,deepseek-chat,0.0,0.9461,0.0262,0.0,0.0021,0.1662,0.0592,0.1316,0.9331,1.0,1.0,1.0,1.0,0.9564,1.0,0.9947,0.9994,1.0
zero-shot,synthetic,?A4=A1|3,gemini-2.5-flash,0.0,0.0003,0.0,0.0,0.0,0.0,0.0,0.0,0.0002,0.5,0.0,1.0,0.5,0.0,0.5,0.0068,0.0769,0.7912
zero-shot,synthetic,?A4=A1|3,gpt-5-nano,0.0,0.0003,0.0,0.0,0.0,0.0,0.0,0.0,0.0001,0.57,0.0,0.6358,1.0,0.0013,0.5679,0.0063,0.0939,0.8595
zero-shot,synthetic,?A4=A1|3,deepseek-reasoner,0.0,0.5,0.0005,0.0,0.0,0.0121,0.002,0.0047,0.4225,0.9997,0.0925,1.0,0.9996,1.0,0.9998,0.8852,0.9786,1.0
zero-shot,synthetic,?A4=A1|3,gemini-2.5-pro,0.0,0.0008,0.0,0.0,0.0,0.0,0.0,0.0,0.0003,0.5643,0.0,0.6494,0.5679,0.0007,1.0,0.0144,0.0877,0.8519
zero-shot,synthetic,?A4=A1|3,gpt-5-mini,0.0,0.1537,0.0,0.0,0.0,0.0011,0.0003,0.0003,0.1405,0.9965,0.0147,0.9977,0.9981,0.2122,0.9943,1.0,0.8595,0.9999
zero-shot,synthetic,?A4=A1|3,o3,0.0,0.0354,0.0,0.0,0.0,0.0001,0.0,0.0,0.0326,0.9506,0.002,0.9597,0.9506,0.0494,0.9552,0.2366,1.0,0.9955
zero-shot,synthetic,?A4=A1|3,gpt-5,0.0,0.0001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.221,0.0,0.3136,0.2366,0.0001,0.2434,0.0004,0.0122,1.0
