llm,dataset,relation,True,Accuracy,Size,Equivalence,Contains,ContainedBy,Overlap,Disjoint,Unknown,Else
deepseek-chat,LC-QuAD,R(1-2),Equivalence,0.9267(139),150,0.9267(139),0.0000(0),0.0000(0),0.0600(9),0.0000(0),0.0133(2),0.0000(0)
deepseek-chat,LC-QuAD,R(1-3),Contains,0.3267(49),150,0.0000(0),0.3267(49),0.5933(89),0.0533(8),0.0267(4),0.0000(0),0.0000(0)
deepseek-chat,LC-QuAD,R(1-4),Contains,0.2067(31),150,0.0067(1),0.2067(31),0.0867(13),0.5333(80),0.1667(25),0.0000(0),0.0000(0)
deepseek-chat,LC-QuAD,R(3-4),Disjoint,0.9867(148),150,0.0000(0),0.0000(0),0.0067(1),0.0067(1),0.9867(148),0.0000(0),0.0000(0)
deepseek-chat,LC-QuAD,R(1-34),Equivalence,0.8800(132),150,0.8800(132),0.0133(2),0.0067(1),0.0200(3),0.0800(12),0.0000(0),0.0000(0)
deepseek-chat,qawiki,R(1-2),Equivalence,0.9800(147),150,0.9800(147),0.0000(0),0.0000(0),0.0200(3),0.0000(0),0.0000(0),0.0000(0)
deepseek-chat,qawiki,R(1-3),Contains,0.2333(35),150,0.0067(1),0.2333(35),0.7400(111),0.0200(3),0.0000(0),0.0000(0),0.0000(0)
deepseek-chat,qawiki,R(1-4),Contains,0.2067(31),150,0.0000(0),0.2067(31),0.0333(5),0.5867(88),0.1733(26),0.0000(0),0.0000(0)
deepseek-chat,qawiki,R(3-4),Disjoint,1.0000(150),150,0.0000(0),0.0000(0),0.0000(0),0.0000(0),1.0000(150),0.0000(0),0.0000(0)
deepseek-chat,qawiki,R(1-34),Equivalence,0.9267(139),150,0.9267(139),0.0133(2),0.0000(0),0.0133(2),0.0467(7),0.0000(0),0.0000(0)
deepseek-chat,spinach,R(1-2),Equivalence,0.9733(146),150,0.9733(146),0.0000(0),0.0000(0),0.0200(3),0.0000(0),0.0067(1),0.0000(0)
deepseek-chat,spinach,R(1-3),Contains,0.4267(64),150,0.0067(1),0.4267(64),0.5067(76),0.0600(9),0.0000(0),0.0000(0),0.0000(0)
deepseek-chat,spinach,R(1-4),Contains,0.1933(29),150,0.0000(0),0.1933(29),0.0467(7),0.5333(80),0.2267(34),0.0000(0),0.0000(0)
deepseek-chat,spinach,R(3-4),Disjoint,0.9933(149),150,0.0000(0),0.0000(0),0.0000(0),0.0067(1),0.9933(149),0.0000(0),0.0000(0)
deepseek-chat,spinach,R(1-34),Equivalence,0.9000(135),150,0.9000(135),0.0000(0),0.0067(1),0.0200(3),0.0733(11),0.0000(0),0.0000(0)
deepseek-chat,synthetic,R(1-2),Equivalence,0.9600(144),150,0.9600(144),0.0000(0),0.0000(0),0.0267(4),0.0133(2),0.0000(0),0.0000(0)
deepseek-chat,synthetic,R(1-3),Contains,0.4333(65),150,0.0133(2),0.4333(65),0.5067(76),0.0333(5),0.0133(2),0.0000(0),0.0000(0)
deepseek-chat,synthetic,R(1-4),Contains,0.2267(34),150,0.0000(0),0.2267(34),0.0400(6),0.4933(74),0.2400(36),0.0000(0),0.0000(0)
deepseek-chat,synthetic,R(3-4),Disjoint,0.9933(149),150,0.0067(1),0.0000(0),0.0000(0),0.0000(0),0.9933(149),0.0000(0),0.0000(0)
deepseek-chat,synthetic,R(1-34),Equivalence,0.9133(137),150,0.9133(137),0.0000(0),0.0000(0),0.0133(2),0.0733(11),0.0000(0),0.0000(0)
deepseek-reasoner,LC-QuAD,R(1-2),Equivalence,0.9267(139),150,0.9267(139),0.0133(2),0.0200(3),0.0067(1),0.0200(3),0.0133(2),0.0000(0)
deepseek-reasoner,LC-QuAD,R(1-3),Contains,0.9800(147),150,0.0000(0),0.9800(147),0.0133(2),0.0000(0),0.0000(0),0.0067(1),0.0000(0)
deepseek-reasoner,LC-QuAD,R(1-4),Contains,0.9800(147),150,0.0067(1),0.9800(147),0.0000(0),0.0000(0),0.0000(0),0.0133(2),0.0000(0)
deepseek-reasoner,LC-QuAD,R(3-4),Disjoint,0.9667(145),150,0.0000(0),0.0200(3),0.0000(0),0.0000(0),0.9667(145),0.0133(2),0.0000(0)
deepseek-reasoner,LC-QuAD,R(1-34),Equivalence,0.9200(138),150,0.9200(138),0.0333(5),0.0333(5),0.0000(0),0.0067(1),0.0067(1),0.0000(0)
deepseek-reasoner,qawiki,R(1-2),Equivalence,0.8533(128),150,0.8533(128),0.0333(5),0.1067(16),0.0000(0),0.0000(0),0.0067(1),0.0000(0)
deepseek-reasoner,qawiki,R(1-3),Contains,0.9667(145),150,0.0200(3),0.9667(145),0.0000(0),0.0000(0),0.0000(0),0.0133(2),0.0000(0)
deepseek-reasoner,qawiki,R(1-4),Contains,0.9667(145),150,0.0067(1),0.9667(145),0.0000(0),0.0000(0),0.0200(3),0.0067(1),0.0000(0)
deepseek-reasoner,qawiki,R(3-4),Disjoint,0.9600(144),150,0.0000(0),0.0067(1),0.0000(0),0.0200(3),0.9600(144),0.0133(2),0.0000(0)
deepseek-reasoner,qawiki,R(1-34),Equivalence,0.9533(143),150,0.9533(143),0.0200(3),0.0067(1),0.0000(0),0.0133(2),0.0067(1),0.0000(0)
deepseek-reasoner,spinach,R(1-2),Equivalence,0.8933(134),150,0.8933(134),0.0400(6),0.0533(8),0.0067(1),0.0000(0),0.0067(1),0.0000(0)
deepseek-reasoner,spinach,R(1-3),Contains,0.9667(145),150,0.0000(0),0.9667(145),0.0133(2),0.0200(3),0.0000(0),0.0000(0),0.0000(0)
deepseek-reasoner,spinach,R(1-4),Contains,0.9600(144),150,0.0200(3),0.9600(144),0.0000(0),0.0200(3),0.0000(0),0.0000(0),0.0000(0)
deepseek-reasoner,spinach,R(3-4),Disjoint,0.9533(143),150,0.0067(1),0.0000(0),0.0000(0),0.0333(5),0.9533(143),0.0067(1),0.0000(0)
deepseek-reasoner,spinach,R(1-34),Equivalence,0.9267(139),150,0.9267(139),0.0400(6),0.0200(3),0.0133(2),0.0000(0),0.0000(0),0.0000(0)
deepseek-reasoner,synthetic,R(1-2),Equivalence,0.8800(132),150,0.8800(132),0.0600(9),0.0400(6),0.0000(0),0.0200(3),0.0000(0),0.0000(0)
deepseek-reasoner,synthetic,R(1-3),Contains,0.9667(145),150,0.0200(3),0.9667(145),0.0067(1),0.0000(0),0.0067(1),0.0000(0),0.0000(0)
deepseek-reasoner,synthetic,R(1-4),Contains,0.9600(144),150,0.0000(0),0.9600(144),0.0000(0),0.0133(2),0.0133(2),0.0133(2),0.0000(0)
deepseek-reasoner,synthetic,R(3-4),Disjoint,0.9733(146),150,0.0067(1),0.0000(0),0.0000(0),0.0200(3),0.9733(146),0.0000(0),0.0000(0)
deepseek-reasoner,synthetic,R(1-34),Equivalence,0.9733(146),150,0.9733(146),0.0000(0),0.0133(2),0.0000(0),0.0133(2),0.0000(0),0.0000(0)
gemini-2.0-flash,LC-QuAD,R(1-2),Equivalence,0.9533(143),150,0.9533(143),0.0133(2),0.0133(2),0.0200(3),0.0000(0),0.0000(0),0.0000(0)
gemini-2.0-flash,LC-QuAD,R(1-3),Contains,0.7200(108),150,0.0200(3),0.7200(108),0.2267(34),0.0200(3),0.0067(1),0.0067(1),0.0000(0)
gemini-2.0-flash,LC-QuAD,R(1-4),Contains,0.2467(37),150,0.0200(3),0.2467(37),0.0267(4),0.6267(94),0.0800(12),0.0000(0),0.0000(0)
gemini-2.0-flash,LC-QuAD,R(3-4),Disjoint,0.9467(142),150,0.0000(0),0.0133(2),0.0000(0),0.0400(6),0.9467(142),0.0000(0),0.0000(0)
gemini-2.0-flash,LC-QuAD,R(1-34),Equivalence,0.8133(122),150,0.8133(122),0.0067(1),0.0000(0),0.1533(23),0.0267(4),0.0000(0),0.0000(0)
gemini-2.0-flash,qawiki,R(1-2),Equivalence,0.9667(145),150,0.9667(145),0.0133(2),0.0000(0),0.0200(3),0.0000(0),0.0000(0),0.0000(0)
gemini-2.0-flash,qawiki,R(1-3),Contains,0.8600(129),150,0.0067(1),0.8600(129),0.1133(17),0.0200(3),0.0000(0),0.0000(0),0.0000(0)
gemini-2.0-flash,qawiki,R(1-4),Contains,0.1533(23),150,0.0000(0),0.1533(23),0.0333(5),0.7267(109),0.0867(13),0.0000(0),0.0000(0)
gemini-2.0-flash,qawiki,R(3-4),Disjoint,0.9400(141),150,0.0000(0),0.0067(1),0.0000(0),0.0533(8),0.9400(141),0.0000(0),0.0000(0)
gemini-2.0-flash,qawiki,R(1-34),Equivalence,0.8333(125),150,0.8333(125),0.0000(0),0.0000(0),0.0933(14),0.0733(11),0.0000(0),0.0000(0)
gemini-2.0-flash,spinach,R(1-2),Equivalence,0.9667(145),150,0.9667(145),0.0067(1),0.0133(2),0.0133(2),0.0000(0),0.0000(0),0.0000(0)
gemini-2.0-flash,spinach,R(1-3),Contains,0.8867(133),150,0.0000(0),0.8867(133),0.0800(12),0.0200(3),0.0133(2),0.0000(0),0.0000(0)
gemini-2.0-flash,spinach,R(1-4),Contains,0.2600(39),150,0.0000(0),0.2600(39),0.0400(6),0.6133(92),0.0867(13),0.0000(0),0.0000(0)
gemini-2.0-flash,spinach,R(3-4),Disjoint,0.9733(146),150,0.0000(0),0.0000(0),0.0000(0),0.0267(4),0.9733(146),0.0000(0),0.0000(0)
gemini-2.0-flash,spinach,R(1-34),Equivalence,0.8600(129),150,0.8600(129),0.0000(0),0.0000(0),0.0800(12),0.0600(9),0.0000(0),0.0000(0)
gemini-2.0-flash,synthetic,R(1-2),Equivalence,0.7467(112),150,0.7467(112),0.0133(2),0.0067(1),0.0133(2),0.2200(33),0.0000(0),0.0000(0)
gemini-2.0-flash,synthetic,R(1-3),Contains,0.7000(105),150,0.0267(4),0.7000(105),0.0667(10),0.0133(2),0.1933(29),0.0000(0),0.0000(0)
gemini-2.0-flash,synthetic,R(1-4),Contains,0.1933(29),150,0.0200(3),0.1933(29),0.0267(4),0.5133(77),0.2467(37),0.0000(0),0.0000(0)
gemini-2.0-flash,synthetic,R(3-4),Disjoint,0.9467(142),150,0.0133(2),0.0000(0),0.0000(0),0.0400(6),0.9467(142),0.0000(0),0.0000(0)
gemini-2.0-flash,synthetic,R(1-34),Equivalence,0.7067(106),150,0.7067(106),0.0000(0),0.0000(0),0.0533(8),0.1000(15),0.1400(21),0.0000(0)
gemini-2.5-flash,LC-QuAD,R(1-2),Equivalence,0.8933(134),150,0.8933(134),0.0467(7),0.0400(6),0.0067(1),0.0133(2),0.0000(0),0.0000(0)
gemini-2.5-flash,LC-QuAD,R(1-3),Contains,0.9600(144),150,0.0067(1),0.9600(144),0.0067(1),0.0000(0),0.0000(0),0.0267(4),0.0000(0)
gemini-2.5-flash,LC-QuAD,R(1-4),Contains,0.9667(145),150,0.0000(0),0.9667(145),0.0000(0),0.0000(0),0.0000(0),0.0333(5),0.0000(0)
gemini-2.5-flash,LC-QuAD,R(3-4),Disjoint,0.9600(144),150,0.0000(0),0.0267(4),0.0000(0),0.0133(2),0.9600(144),0.0000(0),0.0000(0)
gemini-2.5-flash,LC-QuAD,R(1-34),Equivalence,0.9733(146),150,0.9733(146),0.0133(2),0.0133(2),0.0000(0),0.0000(0),0.0000(0),0.0000(0)
gemini-2.5-flash,qawiki,R(1-2),Equivalence,0.7933(119),150,0.7933(119),0.0667(10),0.1400(21),0.0000(0),0.0000(0),0.0000(0),0.0000(0)
gemini-2.5-flash,qawiki,R(1-3),Contains,0.9533(143),150,0.0267(4),0.9533(143),0.0067(1),0.0000(0),0.0000(0),0.0133(2),0.0000(0)
gemini-2.5-flash,qawiki,R(1-4),Contains,0.9667(145),150,0.0067(1),0.9667(145),0.0000(0),0.0000(0),0.0200(3),0.0067(1),0.0000(0)
gemini-2.5-flash,qawiki,R(3-4),Disjoint,0.9733(146),150,0.0000(0),0.0067(1),0.0000(0),0.0200(3),0.9733(146),0.0000(0),0.0000(0)
gemini-2.5-flash,qawiki,R(1-34),Equivalence,0.9667(145),150,0.9667(145),0.0133(2),0.0067(1),0.0000(0),0.0133(2),0.0000(0),0.0000(0)
gemini-2.5-flash,spinach,R(1-2),Equivalence,0.8733(131),150,0.8733(131),0.0467(7),0.0600(9),0.0200(3),0.0000(0),0.0000(0),0.0000(0)
gemini-2.5-flash,spinach,R(1-3),Contains,0.9533(143),150,0.0067(1),0.9533(143),0.0000(0),0.0133(2),0.0067(1),0.0133(2),0.0067(1)
gemini-2.5-flash,spinach,R(1-4),Contains,0.9533(143),150,0.0200(3),0.9533(143),0.0000(0),0.0133(2),0.0000(0),0.0133(2),0.0000(0)
gemini-2.5-flash,spinach,R(3-4),Disjoint,0.9667(145),150,0.0067(1),0.0000(0),0.0067(1),0.0200(3),0.9667(145),0.0000(0),0.0000(0)
gemini-2.5-flash,spinach,R(1-34),Equivalence,0.9267(139),150,0.9267(139),0.0400(6),0.0133(2),0.0067(1),0.0067(1),0.0067(1),0.0000(0)
gemini-2.5-flash,synthetic,R(1-2),Equivalence,0.8933(134),150,0.8933(134),0.0467(7),0.0333(5),0.0133(2),0.0067(1),0.0067(1),0.0000(0)
gemini-2.5-flash,synthetic,R(1-3),Contains,0.9600(144),150,0.0200(3),0.9600(144),0.0067(1),0.0067(1),0.0000(0),0.0067(1),0.0000(0)
gemini-2.5-flash,synthetic,R(1-4),Contains,0.9667(145),150,0.0067(1),0.9667(145),0.0000(0),0.0067(1),0.0067(1),0.0067(1),0.0067(1)
gemini-2.5-flash,synthetic,R(3-4),Disjoint,0.9733(146),150,0.0067(1),0.0067(1),0.0000(0),0.0133(2),0.9733(146),0.0000(0),0.0000(0)
gemini-2.5-flash,synthetic,R(1-34),Equivalence,0.9533(143),150,0.9533(143),0.0133(2),0.0133(2),0.0000(0),0.0133(2),0.0000(0),0.0067(1)
gemini-2.5-pro,LC-QuAD,R(1-2),Equivalence,0.9533(143),150,0.9533(143),0.0133(2),0.0267(4),0.0067(1),0.0000(0),0.0000(0),0.0000(0)
gemini-2.5-pro,LC-QuAD,R(1-3),Contains,0.9933(149),150,0.0000(0),0.9933(149),0.0067(1),0.0000(0),0.0000(0),0.0000(0),0.0000(0)
gemini-2.5-pro,LC-QuAD,R(1-4),Contains,0.9667(145),150,0.0067(1),0.9667(145),0.0000(0),0.0133(2),0.0067(1),0.0000(0),0.0067(1)
gemini-2.5-pro,LC-QuAD,R(3-4),Disjoint,0.9933(149),150,0.0000(0),0.0067(1),0.0000(0),0.0000(0),0.9933(149),0.0000(0),0.0000(0)
gemini-2.5-pro,LC-QuAD,R(1-34),Equivalence,0.9667(145),150,0.9667(145),0.0267(4),0.0067(1),0.0000(0),0.0000(0),0.0000(0),0.0000(0)
gemini-2.5-pro,qawiki,R(1-2),Equivalence,0.8733(131),150,0.8733(131),0.0333(5),0.0867(13),0.0067(1),0.0000(0),0.0000(0),0.0000(0)
gemini-2.5-pro,qawiki,R(1-3),Contains,0.9667(145),150,0.0267(4),0.9667(145),0.0067(1),0.0000(0),0.0000(0),0.0000(0),0.0000(0)
gemini-2.5-pro,qawiki,R(1-4),Contains,0.9333(140),150,0.0067(1),0.9333(140),0.0000(0),0.0267(4),0.0333(5),0.0000(0),0.0000(0)
gemini-2.5-pro,qawiki,R(3-4),Disjoint,0.9867(148),150,0.0000(0),0.0000(0),0.0000(0),0.0133(2),0.9867(148),0.0000(0),0.0000(0)
gemini-2.5-pro,qawiki,R(1-34),Equivalence,0.9667(145),150,0.9667(145),0.0000(0),0.0200(3),0.0000(0),0.0133(2),0.0000(0),0.0000(0)
gemini-2.5-pro,spinach,R(1-2),Equivalence,0.9133(137),150,0.9133(137),0.0267(4),0.0600(9),0.0000(0),0.0000(0),0.0000(0),0.0000(0)
gemini-2.5-pro,spinach,R(1-3),Contains,0.9733(146),150,0.0133(2),0.9733(146),0.0000(0),0.0067(1),0.0067(1),0.0000(0),0.0000(0)
gemini-2.5-pro,spinach,R(1-4),Contains,0.9200(138),150,0.0333(5),0.9200(138),0.0000(0),0.0133(2),0.0200(3),0.0000(0),0.0133(2)
gemini-2.5-pro,spinach,R(3-4),Disjoint,0.9800(147),150,0.0067(1),0.0000(0),0.0000(0),0.0067(1),0.9800(147),0.0000(0),0.0067(1)
gemini-2.5-pro,spinach,R(1-34),Equivalence,0.9533(143),150,0.9533(143),0.0333(5),0.0133(2),0.0000(0),0.0000(0),0.0000(0),0.0000(0)
gemini-2.5-pro,synthetic,R(1-2),Equivalence,0.9133(137),150,0.9133(137),0.0400(6),0.0267(4),0.0000(0),0.0133(2),0.0000(0),0.0067(1)
gemini-2.5-pro,synthetic,R(1-3),Contains,0.9733(146),150,0.0067(1),0.9733(146),0.0067(1),0.0067(1),0.0067(1),0.0000(0),0.0000(0)
gemini-2.5-pro,synthetic,R(1-4),Contains,0.9600(144),150,0.0000(0),0.9600(144),0.0000(0),0.0267(4),0.0133(2),0.0000(0),0.0000(0)
gemini-2.5-pro,synthetic,R(3-4),Disjoint,0.9800(147),150,0.0067(1),0.0000(0),0.0000(0),0.0133(2),0.9800(147),0.0000(0),0.0000(0)
gemini-2.5-pro,synthetic,R(1-34),Equivalence,0.9600(144),150,0.9600(144),0.0067(1),0.0200(3),0.0000(0),0.0133(2),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,LC-QuAD,R(1-2),Equivalence,0.9667(145),150,0.9667(145),0.0200(3),0.0133(2),0.0000(0),0.0000(0),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,LC-QuAD,R(1-3),Contains,0.9800(147),150,0.0000(0),0.9800(147),0.0067(1),0.0133(2),0.0000(0),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,LC-QuAD,R(1-4),Contains,0.7600(114),150,0.0000(0),0.7600(114),0.0000(0),0.2267(34),0.0133(2),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,LC-QuAD,R(3-4),Disjoint,0.9933(149),150,0.0000(0),0.0067(1),0.0000(0),0.0000(0),0.9933(149),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,LC-QuAD,R(1-34),Equivalence,1.0000(150),150,1.0000(150),0.0000(0),0.0000(0),0.0000(0),0.0000(0),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,qawiki,R(1-2),Equivalence,0.9533(143),150,0.9533(143),0.0467(7),0.0000(0),0.0000(0),0.0000(0),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,qawiki,R(1-3),Contains,0.9867(148),150,0.0067(1),0.9867(148),0.0000(0),0.0067(1),0.0000(0),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,qawiki,R(1-4),Contains,0.6267(94),150,0.0000(0),0.6267(94),0.0000(0),0.3133(47),0.0600(9),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,qawiki,R(3-4),Disjoint,1.0000(150),150,0.0000(0),0.0000(0),0.0000(0),0.0000(0),1.0000(150),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,qawiki,R(1-34),Equivalence,1.0000(150),150,1.0000(150),0.0000(0),0.0000(0),0.0000(0),0.0000(0),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,spinach,R(1-2),Equivalence,0.9400(141),150,0.9400(141),0.0333(5),0.0200(3),0.0000(0),0.0000(0),0.0067(1),0.0000(0)
gpt-4.1-2025-04-14,spinach,R(1-3),Contains,0.9933(149),150,0.0000(0),0.9933(149),0.0000(0),0.0067(1),0.0000(0),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,spinach,R(1-4),Contains,0.6733(101),150,0.0000(0),0.6733(101),0.0067(1),0.2800(42),0.0400(6),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,spinach,R(3-4),Disjoint,0.9933(149),150,0.0000(0),0.0000(0),0.0000(0),0.0067(1),0.9933(149),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,spinach,R(1-34),Equivalence,0.9800(147),150,0.9800(147),0.0000(0),0.0067(1),0.0067(1),0.0067(1),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,synthetic,R(1-2),Equivalence,0.9200(138),150,0.9200(138),0.0600(9),0.0067(1),0.0000(0),0.0133(2),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,synthetic,R(1-3),Contains,0.9867(148),150,0.0000(0),0.9867(148),0.0133(2),0.0000(0),0.0000(0),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,synthetic,R(1-4),Contains,0.7267(109),150,0.0000(0),0.7267(109),0.0000(0),0.2200(33),0.0533(8),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,synthetic,R(3-4),Disjoint,0.9933(149),150,0.0067(1),0.0000(0),0.0000(0),0.0000(0),0.9933(149),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,synthetic,R(1-34),Equivalence,0.9933(149),150,0.9933(149),0.0000(0),0.0000(0),0.0000(0),0.0067(1),0.0000(0),0.0000(0)
gpt-4.1-mini-2025-04-14,LC-QuAD,R(1-2),Equivalence,0.9067(136),150,0.9067(136),0.0267(4),0.0467(7),0.0133(2),0.0000(0),0.0067(1),0.0000(0)
gpt-4.1-mini-2025-04-14,LC-QuAD,R(1-3),Contains,0.3467(52),150,0.0000(0),0.3467(52),0.6400(96),0.0067(1),0.0000(0),0.0067(1),0.0000(0)
gpt-4.1-mini-2025-04-14,LC-QuAD,R(1-4),Contains,0.2533(38),150,0.0067(1),0.2533(38),0.0933(14),0.6133(92),0.0333(5),0.0000(0),0.0000(0)
gpt-4.1-mini-2025-04-14,LC-QuAD,R(3-4),Disjoint,0.9800(147),150,0.0000(0),0.0133(2),0.0000(0),0.0067(1),0.9800(147),0.0000(0),0.0000(0)
gpt-4.1-mini-2025-04-14,LC-QuAD,R(1-34),Equivalence,0.9467(142),150,0.9467(142),0.0067(1),0.0000(0),0.0133(2),0.0333(5),0.0000(0),0.0000(0)
gpt-4.1-mini-2025-04-14,qawiki,R(1-2),Equivalence,0.9133(137),150,0.9133(137),0.0133(2),0.0667(10),0.0067(1),0.0000(0),0.0000(0),0.0000(0)
gpt-4.1-mini-2025-04-14,qawiki,R(1-3),Contains,0.3467(52),150,0.0067(1),0.3467(52),0.6400(96),0.0067(1),0.0000(0),0.0000(0),0.0000(0)
gpt-4.1-mini-2025-04-14,qawiki,R(1-4),Contains,0.2133(32),150,0.0000(0),0.2133(32),0.0600(9),0.6333(95),0.0933(14),0.0000(0),0.0000(0)
gpt-4.1-mini-2025-04-14,qawiki,R(3-4),Disjoint,1.0000(150),150,0.0000(0),0.0000(0),0.0000(0),0.0000(0),1.0000(150),0.0000(0),0.0000(0)
gpt-4.1-mini-2025-04-14,qawiki,R(1-34),Equivalence,0.9667(145),150,0.9667(145),0.0000(0),0.0000(0),0.0133(2),0.0200(3),0.0000(0),0.0000(0)
gpt-4.1-mini-2025-04-14,spinach,R(1-2),Equivalence,0.9400(141),150,0.9400(141),0.0067(1),0.0467(7),0.0000(0),0.0000(0),0.0067(1),0.0000(0)
gpt-4.1-mini-2025-04-14,spinach,R(1-3),Contains,0.3600(54),150,0.0000(0),0.3600(54),0.6133(92),0.0133(2),0.0067(1),0.0067(1),0.0000(0)
gpt-4.1-mini-2025-04-14,spinach,R(1-4),Contains,0.1533(23),150,0.0000(0),0.1533(23),0.1467(22),0.6267(94),0.0600(9),0.0133(2),0.0000(0)
gpt-4.1-mini-2025-04-14,spinach,R(3-4),Disjoint,0.9933(149),150,0.0000(0),0.0000(0),0.0000(0),0.0067(1),0.9933(149),0.0000(0),0.0000(0)
gpt-4.1-mini-2025-04-14,spinach,R(1-34),Equivalence,0.9467(142),150,0.9467(142),0.0067(1),0.0000(0),0.0133(2),0.0333(5),0.0000(0),0.0000(0)
gpt-4.1-mini-2025-04-14,synthetic,R(1-2),Equivalence,0.8800(132),150,0.8800(132),0.0333(5),0.0667(10),0.0067(1),0.0133(2),0.0000(0),0.0000(0)
gpt-4.1-mini-2025-04-14,synthetic,R(1-3),Contains,0.3267(49),150,0.0000(0),0.3267(49),0.6400(96),0.0333(5),0.0000(0),0.0000(0),0.0000(0)
gpt-4.1-mini-2025-04-14,synthetic,R(1-4),Contains,0.2133(32),150,0.0133(2),0.2133(32),0.0733(11),0.5200(78),0.1800(27),0.0000(0),0.0000(0)
gpt-4.1-mini-2025-04-14,synthetic,R(3-4),Disjoint,0.9867(148),150,0.0067(1),0.0000(0),0.0000(0),0.0067(1),0.9867(148),0.0000(0),0.0000(0)
gpt-4.1-mini-2025-04-14,synthetic,R(1-34),Equivalence,0.9733(146),150,0.9733(146),0.0067(1),0.0000(0),0.0000(0),0.0200(3),0.0000(0),0.0000(0)
gpt-4.1-nano-2025-04-14,LC-QuAD,R(1-2),Equivalence,0.7067(106),150,0.7067(106),0.0000(0),0.0000(0),0.2667(40),0.0000(0),0.0133(2),0.0133(2)
gpt-4.1-nano-2025-04-14,LC-QuAD,R(1-3),Contains,0.3333(50),150,0.0200(3),0.3333(50),0.0733(11),0.4467(67),0.0333(5),0.0467(7),0.0467(7)
gpt-4.1-nano-2025-04-14,LC-QuAD,R(1-4),Contains,0.3800(57),150,0.0067(1),0.3800(57),0.1067(16),0.2267(34),0.1133(17),0.1133(17),0.0533(8)
gpt-4.1-nano-2025-04-14,LC-QuAD,R(3-4),Disjoint,0.5267(79),150,0.0000(0),0.1333(20),0.1467(22),0.0533(8),0.5267(79),0.0667(10),0.0733(11)
gpt-4.1-nano-2025-04-14,LC-QuAD,R(1-34),Equivalence,0.0000(0),150,0.0000(0),0.0333(5),0.1467(22),0.0000(0),0.8133(122),0.0000(0),0.0067(1)
gpt-4.1-nano-2025-04-14,qawiki,R(1-2),Equivalence,0.6067(91),150,0.6067(91),0.0067(1),0.0000(0),0.3800(57),0.0000(0),0.0067(1),0.0000(0)
gpt-4.1-nano-2025-04-14,qawiki,R(1-3),Contains,0.4800(72),150,0.0267(4),0.4800(72),0.1067(16),0.3333(50),0.0000(0),0.0533(8),0.0000(0)
gpt-4.1-nano-2025-04-14,qawiki,R(1-4),Contains,0.5467(82),150,0.0000(0),0.5467(82),0.1333(20),0.1333(20),0.1200(18),0.0667(10),0.0000(0)
gpt-4.1-nano-2025-04-14,qawiki,R(3-4),Disjoint,0.6467(97),150,0.0000(0),0.1800(27),0.0800(12),0.0533(8),0.6467(97),0.0400(6),0.0000(0)
gpt-4.1-nano-2025-04-14,qawiki,R(1-34),Equivalence,0.0000(0),150,0.0000(0),0.0000(0),0.1200(18),0.0000(0),0.8800(132),0.0000(0),0.0000(0)
gpt-4.1-nano-2025-04-14,spinach,R(1-2),Equivalence,0.8000(120),150,0.8000(120),0.0000(0),0.0000(0),0.2000(30),0.0000(0),0.0000(0),0.0000(0)
gpt-4.1-nano-2025-04-14,spinach,R(1-3),Contains,0.3533(53),150,0.0467(7),0.3533(53),0.0533(8),0.4867(73),0.0133(2),0.0467(7),0.0000(0)
gpt-4.1-nano-2025-04-14,spinach,R(1-4),Contains,0.2933(44),150,0.0000(0),0.2933(44),0.1600(24),0.2200(33),0.2533(38),0.0733(11),0.0000(0)
gpt-4.1-nano-2025-04-14,spinach,R(3-4),Disjoint,0.6867(103),150,0.0000(0),0.1467(22),0.0600(9),0.0533(8),0.6867(103),0.0467(7),0.0067(1)
gpt-4.1-nano-2025-04-14,spinach,R(1-34),Equivalence,0.0000(0),150,0.0000(0),0.0067(1),0.0800(12),0.0000(0),0.9133(137),0.0000(0),0.0000(0)
gpt-4.1-nano-2025-04-14,synthetic,R(1-2),Equivalence,0.6533(98),150,0.6533(98),0.0267(4),0.0000(0),0.3133(47),0.0000(0),0.0067(1),0.0000(0)
gpt-4.1-nano-2025-04-14,synthetic,R(1-3),Contains,0.4067(61),150,0.0133(2),0.4067(61),0.0867(13),0.4200(63),0.0400(6),0.0333(5),0.0000(0)
gpt-4.1-nano-2025-04-14,synthetic,R(1-4),Contains,0.4333(65),150,0.0000(0),0.4333(65),0.2333(35),0.1467(22),0.1200(18),0.0667(10),0.0000(0)
gpt-4.1-nano-2025-04-14,synthetic,R(3-4),Disjoint,0.6333(95),150,0.0000(0),0.1267(19),0.1467(22),0.0333(5),0.6333(95),0.0600(9),0.0000(0)
gpt-4.1-nano-2025-04-14,synthetic,R(1-34),Equivalence,0.0000(0),150,0.0000(0),0.0333(5),0.1400(21),0.0000(0),0.8267(124),0.0000(0),0.0000(0)
gpt-4o,LC-QuAD,R(1-2),Equivalence,0.9667(145),150,0.9667(145),0.0333(5),0.0000(0),0.0000(0),0.0000(0),0.0000(0),0.0000(0)
gpt-4o,LC-QuAD,R(1-3),Contains,0.2267(34),150,0.0067(1),0.2267(34),0.7267(109),0.0200(3),0.0067(1),0.0133(2),0.0000(0)
gpt-4o,LC-QuAD,R(1-4),Contains,0.1533(23),150,0.0067(1),0.1533(23),0.0467(7),0.6200(93),0.1733(26),0.0000(0),0.0000(0)
gpt-4o,LC-QuAD,R(3-4),Disjoint,0.9867(148),150,0.0000(0),0.0000(0),0.0000(0),0.0133(2),0.9867(148),0.0000(0),0.0000(0)
gpt-4o,LC-QuAD,R(1-34),Equivalence,0.6333(95),150,0.6333(95),0.0200(3),0.0000(0),0.0067(1),0.2267(34),0.1133(17),0.0000(0)
gpt-4o,qawiki,R(1-2),Equivalence,0.9400(141),150,0.9400(141),0.0400(6),0.0133(2),0.0067(1),0.0000(0),0.0000(0),0.0000(0)
gpt-4o,qawiki,R(1-3),Contains,0.3000(45),150,0.0067(1),0.3000(45),0.6800(102),0.0067(1),0.0000(0),0.0067(1),0.0000(0)
gpt-4o,qawiki,R(1-4),Contains,0.1200(18),150,0.0000(0),0.1200(18),0.0267(4),0.6667(100),0.1867(28),0.0000(0),0.0000(0)
gpt-4o,qawiki,R(3-4),Disjoint,1.0000(150),150,0.0000(0),0.0000(0),0.0000(0),0.0000(0),1.0000(150),0.0000(0),0.0000(0)
gpt-4o,qawiki,R(1-34),Equivalence,0.6333(95),150,0.6333(95),0.0133(2),0.0000(0),0.0267(4),0.2333(35),0.0933(14),0.0000(0)
gpt-4o,spinach,R(1-2),Equivalence,0.9400(141),150,0.9400(141),0.0600(9),0.0000(0),0.0000(0),0.0000(0),0.0000(0),0.0000(0)
gpt-4o,spinach,R(1-3),Contains,0.3733(56),150,0.0000(0),0.3733(56),0.5867(88),0.0267(4),0.0000(0),0.0133(2),0.0000(0)
gpt-4o,spinach,R(1-4),Contains,0.1067(16),150,0.0000(0),0.1067(16),0.0200(3),0.6867(103),0.1867(28),0.0000(0),0.0000(0)
gpt-4o,spinach,R(3-4),Disjoint,0.9933(149),150,0.0000(0),0.0000(0),0.0000(0),0.0067(1),0.9933(149),0.0000(0),0.0000(0)
gpt-4o,spinach,R(1-34),Equivalence,0.6667(100),150,0.6667(100),0.0067(1),0.0000(0),0.0200(3),0.2200(33),0.0867(13),0.0000(0)
gpt-4o,synthetic,R(1-2),Equivalence,0.9333(140),150,0.9333(140),0.0333(5),0.0200(3),0.0000(0),0.0067(1),0.0067(1),0.0000(0)
gpt-4o,synthetic,R(1-3),Contains,0.3667(55),150,0.0067(1),0.3667(55),0.6000(90),0.0200(3),0.0000(0),0.0067(1),0.0000(0)
gpt-4o,synthetic,R(1-4),Contains,0.1067(16),150,0.0067(1),0.1067(16),0.0400(6),0.7200(108),0.1200(18),0.0067(1),0.0000(0)
gpt-4o,synthetic,R(3-4),Disjoint,0.9933(149),150,0.0067(1),0.0000(0),0.0000(0),0.0000(0),0.9933(149),0.0000(0),0.0000(0)
gpt-4o,synthetic,R(1-34),Equivalence,0.6800(102),150,0.6800(102),0.0000(0),0.0067(1),0.0333(5),0.1933(29),0.0867(13),0.0000(0)
gpt-5,LC-QuAD,R(1-2),Equivalence,0.9467(142),150,0.9467(142),0.0200(3),0.0200(3),0.0067(1),0.0000(0),0.0067(1),0.0000(0)
gpt-5,LC-QuAD,R(1-3),Contains,0.9800(147),150,0.0067(1),0.9800(147),0.0067(1),0.0000(0),0.0000(0),0.0067(1),0.0000(0)
gpt-5,LC-QuAD,R(1-4),Contains,0.9933(149),150,0.0000(0),0.9933(149),0.0000(0),0.0000(0),0.0000(0),0.0067(1),0.0000(0)
gpt-5,LC-QuAD,R(3-4),Disjoint,0.9800(147),150,0.0000(0),0.0133(2),0.0000(0),0.0000(0),0.9800(147),0.0067(1),0.0000(0)
gpt-5,LC-QuAD,R(1-34),Equivalence,0.9533(143),150,0.9533(143),0.0267(4),0.0200(3),0.0000(0),0.0000(0),0.0000(0),0.0000(0)
gpt-5,qawiki,R(1-2),Equivalence,0.8733(131),150,0.8733(131),0.0333(5),0.0667(10),0.0067(1),0.0000(0),0.0200(3),0.0000(0)
gpt-5,qawiki,R(1-3),Contains,0.9800(147),150,0.0133(2),0.9800(147),0.0000(0),0.0000(0),0.0000(0),0.0067(1),0.0000(0)
gpt-5,qawiki,R(1-4),Contains,0.9800(147),150,0.0000(0),0.9800(147),0.0000(0),0.0000(0),0.0200(3),0.0000(0),0.0000(0)
gpt-5,qawiki,R(3-4),Disjoint,0.9733(146),150,0.0000(0),0.0000(0),0.0000(0),0.0200(3),0.9733(146),0.0067(1),0.0000(0)
gpt-5,qawiki,R(1-34),Equivalence,0.9467(142),150,0.9467(142),0.0067(1),0.0333(5),0.0000(0),0.0133(2),0.0000(0),0.0000(0)
gpt-5,spinach,R(1-2),Equivalence,0.9133(137),150,0.9133(137),0.0267(4),0.0400(6),0.0067(1),0.0000(0),0.0133(2),0.0000(0)
gpt-5,spinach,R(1-3),Contains,0.9733(146),150,0.0000(0),0.9733(146),0.0000(0),0.0133(2),0.0067(1),0.0067(1),0.0000(0)
gpt-5,spinach,R(1-4),Contains,0.9733(146),150,0.0000(0),0.9733(146),0.0000(0),0.0200(3),0.0000(0),0.0067(1),0.0000(0)
gpt-5,spinach,R(3-4),Disjoint,0.9467(142),150,0.0000(0),0.0000(0),0.0000(0),0.0333(5),0.9467(142),0.0200(3),0.0000(0)
gpt-5,spinach,R(1-34),Equivalence,0.9067(136),150,0.9067(136),0.0400(6),0.0333(5),0.0133(2),0.0000(0),0.0067(1),0.0000(0)
gpt-5,synthetic,R(1-2),Equivalence,0.8867(133),150,0.8867(133),0.0533(8),0.0467(7),0.0000(0),0.0133(2),0.0000(0),0.0000(0)
gpt-5,synthetic,R(1-3),Contains,0.9733(146),150,0.0133(2),0.9733(146),0.0067(1),0.0000(0),0.0067(1),0.0000(0),0.0000(0)
gpt-5,synthetic,R(1-4),Contains,0.9867(148),150,0.0067(1),0.9867(148),0.0000(0),0.0000(0),0.0067(1),0.0000(0),0.0000(0)
gpt-5,synthetic,R(3-4),Disjoint,0.9733(146),150,0.0067(1),0.0067(1),0.0000(0),0.0067(1),0.9733(146),0.0067(1),0.0000(0)
gpt-5,synthetic,R(1-34),Equivalence,0.9733(146),150,0.9733(146),0.0067(1),0.0067(1),0.0000(0),0.0133(2),0.0000(0),0.0000(0)
gpt-5-mini,LC-QuAD,R(1-2),Equivalence,0.9400(141),150,0.9400(141),0.0200(3),0.0067(1),0.0067(1),0.0067(1),0.0200(3),0.0000(0)
gpt-5-mini,LC-QuAD,R(1-3),Contains,0.9600(144),150,0.0133(2),0.9600(144),0.0133(2),0.0000(0),0.0000(0),0.0133(2),0.0000(0)
gpt-5-mini,LC-QuAD,R(1-4),Contains,0.9800(147),150,0.0000(0),0.9800(147),0.0000(0),0.0000(0),0.0000(0),0.0200(3),0.0000(0)
gpt-5-mini,LC-QuAD,R(3-4),Disjoint,0.9600(144),150,0.0000(0),0.0133(2),0.0000(0),0.0067(1),0.9600(144),0.0200(3),0.0000(0)
gpt-5-mini,LC-QuAD,R(1-34),Equivalence,0.9400(141),150,0.9400(141),0.0267(4),0.0200(3),0.0000(0),0.0067(1),0.0067(1),0.0000(0)
gpt-5-mini,qawiki,R(1-2),Equivalence,0.8533(128),150,0.8533(128),0.0467(7),0.0667(10),0.0067(1),0.0000(0),0.0267(4),0.0000(0)
gpt-5-mini,qawiki,R(1-3),Contains,0.9867(148),150,0.0067(1),0.9867(148),0.0000(0),0.0000(0),0.0000(0),0.0067(1),0.0000(0)
gpt-5-mini,qawiki,R(1-4),Contains,0.9733(146),150,0.0067(1),0.9733(146),0.0000(0),0.0000(0),0.0200(3),0.0000(0),0.0000(0)
gpt-5-mini,qawiki,R(3-4),Disjoint,0.9467(142),150,0.0000(0),0.0000(0),0.0000(0),0.0067(1),0.9467(142),0.0467(7),0.0000(0)
gpt-5-mini,qawiki,R(1-34),Equivalence,0.9533(143),150,0.9533(143),0.0000(0),0.0200(3),0.0067(1),0.0200(3),0.0000(0),0.0000(0)
gpt-5-mini,spinach,R(1-2),Equivalence,0.9067(136),150,0.9067(136),0.0267(4),0.0333(5),0.0000(0),0.0000(0),0.0333(5),0.0000(0)
gpt-5-mini,spinach,R(1-3),Contains,0.9800(147),150,0.0067(1),0.9800(147),0.0000(0),0.0000(0),0.0000(0),0.0133(2),0.0000(0)
gpt-5-mini,spinach,R(1-4),Contains,0.9867(148),150,0.0067(1),0.9867(148),0.0000(0),0.0000(0),0.0000(0),0.0067(1),0.0000(0)
gpt-5-mini,spinach,R(3-4),Disjoint,0.9200(138),150,0.0067(1),0.0000(0),0.0000(0),0.0400(6),0.9200(138),0.0333(5),0.0000(0)
gpt-5-mini,spinach,R(1-34),Equivalence,0.9333(140),150,0.9333(140),0.0400(6),0.0200(3),0.0000(0),0.0000(0),0.0067(1),0.0000(0)
gpt-5-mini,synthetic,R(1-2),Equivalence,0.9133(137),150,0.9133(137),0.0467(7),0.0133(2),0.0000(0),0.0067(1),0.0200(3),0.0000(0)
gpt-5-mini,synthetic,R(1-3),Contains,0.9600(144),150,0.0133(2),0.9600(144),0.0067(1),0.0067(1),0.0000(0),0.0133(2),0.0000(0)
gpt-5-mini,synthetic,R(1-4),Contains,0.9733(146),150,0.0000(0),0.9733(146),0.0000(0),0.0000(0),0.0067(1),0.0200(3),0.0000(0)
gpt-5-mini,synthetic,R(3-4),Disjoint,0.9733(146),150,0.0067(1),0.0000(0),0.0000(0),0.0133(2),0.9733(146),0.0067(1),0.0000(0)
gpt-5-mini,synthetic,R(1-34),Equivalence,0.9800(147),150,0.9800(147),0.0000(0),0.0133(2),0.0000(0),0.0067(1),0.0000(0),0.0000(0)
gpt-5-nano,LC-QuAD,R(1-2),Equivalence,0.9467(142),150,0.9467(142),0.0000(0),0.0000(0),0.0000(0),0.0067(1),0.0467(7),0.0000(0)
gpt-5-nano,LC-QuAD,R(1-3),Contains,0.8400(126),150,0.0733(11),0.8400(126),0.0133(2),0.0000(0),0.0200(3),0.0467(7),0.0067(1)
gpt-5-nano,LC-QuAD,R(1-4),Contains,0.9200(138),150,0.0400(6),0.9200(138),0.0133(2),0.0000(0),0.0067(1),0.0133(2),0.0067(1)
gpt-5-nano,LC-QuAD,R(3-4),Disjoint,0.9067(136),150,0.0067(1),0.0400(6),0.0067(1),0.0067(1),0.9067(136),0.0333(5),0.0000(0)
gpt-5-nano,LC-QuAD,R(1-34),Equivalence,0.9200(138),150,0.9200(138),0.0267(4),0.0333(5),0.0000(0),0.0133(2),0.0067(1),0.0000(0)
gpt-5-nano,qawiki,R(1-2),Equivalence,0.8800(132),150,0.8800(132),0.0400(6),0.0467(7),0.0133(2),0.0067(1),0.0133(2),0.0000(0)
gpt-5-nano,qawiki,R(1-3),Contains,0.9133(137),150,0.0400(6),0.9133(137),0.0067(1),0.0067(1),0.0067(1),0.0267(4),0.0000(0)
gpt-5-nano,qawiki,R(1-4),Contains,0.9267(139),150,0.0200(3),0.9267(139),0.0000(0),0.0067(1),0.0133(2),0.0200(3),0.0133(2)
gpt-5-nano,qawiki,R(3-4),Disjoint,0.8733(131),150,0.0000(0),0.0600(9),0.0200(3),0.0000(0),0.8733(131),0.0467(7),0.0000(0)
gpt-5-nano,qawiki,R(1-34),Equivalence,0.9200(138),150,0.9200(138),0.0200(3),0.0200(3),0.0067(1),0.0267(4),0.0067(1),0.0000(0)
gpt-5-nano,spinach,R(1-2),Equivalence,0.9067(136),150,0.9067(136),0.0267(4),0.0267(4),0.0067(1),0.0000(0),0.0333(5),0.0000(0)
gpt-5-nano,spinach,R(1-3),Contains,0.8133(122),150,0.1067(16),0.8133(122),0.0133(2),0.0000(0),0.0200(3),0.0333(5),0.0133(2)
gpt-5-nano,spinach,R(1-4),Contains,0.9067(136),150,0.0200(3),0.9067(136),0.0067(1),0.0000(0),0.0267(4),0.0333(5),0.0067(1)
gpt-5-nano,spinach,R(3-4),Disjoint,0.8533(128),150,0.0067(1),0.0333(5),0.0200(3),0.0333(5),0.8533(128),0.0533(8),0.0000(0)
gpt-5-nano,spinach,R(1-34),Equivalence,0.9133(137),150,0.9133(137),0.0333(5),0.0200(3),0.0000(0),0.0267(4),0.0067(1),0.0000(0)
gpt-5-nano,synthetic,R(1-2),Equivalence,0.9400(141),150,0.9400(141),0.0333(5),0.0133(2),0.0000(0),0.0133(2),0.0000(0),0.0000(0)
gpt-5-nano,synthetic,R(1-3),Contains,0.8333(125),150,0.0667(10),0.8333(125),0.0067(1),0.0067(1),0.0267(4),0.0467(7),0.0133(2)
gpt-5-nano,synthetic,R(1-4),Contains,0.9067(136),150,0.0333(5),0.9067(136),0.0000(0),0.0067(1),0.0067(1),0.0333(5),0.0133(2)
gpt-5-nano,synthetic,R(3-4),Disjoint,0.9067(136),150,0.0133(2),0.0200(3),0.0133(2),0.0000(0),0.9067(136),0.0467(7),0.0000(0)
gpt-5-nano,synthetic,R(1-34),Equivalence,0.8933(134),150,0.8933(134),0.0467(7),0.0400(6),0.0067(1),0.0000(0),0.0067(1),0.0067(1)
gpt-oss:20b,LC-QuAD,R(1-2),Equivalence,0.9733(146),150,0.9733(146),0.0067(1),0.0133(2),0.0000(0),0.0000(0),0.0067(1),0.0000(0)
gpt-oss:20b,LC-QuAD,R(1-3),Contains,0.9200(138),150,0.0400(6),0.9200(138),0.0267(4),0.0000(0),0.0067(1),0.0067(1),0.0000(0)
gpt-oss:20b,LC-QuAD,R(1-4),Contains,0.9400(141),150,0.0267(4),0.9400(141),0.0200(3),0.0067(1),0.0067(1),0.0000(0),0.0000(0)
gpt-oss:20b,LC-QuAD,R(3-4),Disjoint,0.9133(137),150,0.0067(1),0.0267(4),0.0400(6),0.0000(0),0.9133(137),0.0133(2),0.0000(0)
gpt-oss:20b,LC-QuAD,R(1-34),Equivalence,0.9333(140),150,0.9333(140),0.0200(3),0.0333(5),0.0000(0),0.0000(0),0.0133(2),0.0000(0)
gpt-oss:20b,qawiki,R(1-2),Equivalence,0.9267(139),150,0.9267(139),0.0133(2),0.0333(5),0.0200(3),0.0067(1),0.0000(0),0.0000(0)
gpt-oss:20b,qawiki,R(1-3),Contains,0.9333(140),150,0.0400(6),0.9333(140),0.0200(3),0.0000(0),0.0067(1),0.0000(0),0.0000(0)
gpt-oss:20b,qawiki,R(1-4),Contains,0.9600(144),150,0.0200(3),0.9600(144),0.0000(0),0.0000(0),0.0200(3),0.0000(0),0.0000(0)
gpt-oss:20b,qawiki,R(3-4),Disjoint,0.9200(138),150,0.0000(0),0.0200(3),0.0200(3),0.0400(6),0.9200(138),0.0000(0),0.0000(0)
gpt-oss:20b,qawiki,R(1-34),Equivalence,0.9533(143),150,0.9533(143),0.0200(3),0.0200(3),0.0000(0),0.0067(1),0.0000(0),0.0000(0)
gpt-oss:20b,spinach,R(1-2),Equivalence,0.9133(137),150,0.9133(137),0.0600(9),0.0200(3),0.0067(1),0.0000(0),0.0000(0),0.0000(0)
gpt-oss:20b,spinach,R(1-3),Contains,0.9067(136),150,0.0533(8),0.9067(136),0.0200(3),0.0133(2),0.0067(1),0.0000(0),0.0000(0)
gpt-oss:20b,spinach,R(1-4),Contains,0.9733(146),150,0.0267(4),0.9733(146),0.0000(0),0.0000(0),0.0000(0),0.0000(0),0.0000(0)
gpt-oss:20b,spinach,R(3-4),Disjoint,0.8867(133),150,0.0133(2),0.0467(7),0.0200(3),0.0333(5),0.8867(133),0.0000(0),0.0000(0)
gpt-oss:20b,spinach,R(1-34),Equivalence,0.9200(138),150,0.9200(138),0.0200(3),0.0467(7),0.0067(1),0.0000(0),0.0067(1),0.0000(0)
gpt-oss:20b,synthetic,R(1-2),Equivalence,0.9400(141),150,0.9400(141),0.0333(5),0.0200(3),0.0000(0),0.0067(1),0.0000(0),0.0000(0)
gpt-oss:20b,synthetic,R(1-3),Contains,0.9333(140),150,0.0267(4),0.9333(140),0.0200(3),0.0000(0),0.0133(2),0.0067(1),0.0000(0)
gpt-oss:20b,synthetic,R(1-4),Contains,0.9800(147),150,0.0067(1),0.9800(147),0.0000(0),0.0067(1),0.0067(1),0.0000(0),0.0000(0)
gpt-oss:20b,synthetic,R(3-4),Disjoint,0.9333(140),150,0.0133(2),0.0267(4),0.0133(2),0.0133(2),0.9333(140),0.0000(0),0.0000(0)
gpt-oss:20b,synthetic,R(1-34),Equivalence,0.9533(143),150,0.9533(143),0.0267(4),0.0067(1),0.0000(0),0.0133(2),0.0000(0),0.0000(0)
grok-3-mini,LC-QuAD,R(1-2),Equivalence,0.9600(144),150,0.9600(144),0.0200(3),0.0133(2),0.0000(0),0.0000(0),0.0067(1),0.0000(0)
grok-3-mini,LC-QuAD,R(1-3),Contains,0.9467(142),150,0.0133(2),0.9467(142),0.0067(1),0.0000(0),0.0000(0),0.0333(5),0.0000(0)
grok-3-mini,LC-QuAD,R(1-4),Contains,0.9667(145),150,0.0067(1),0.9667(145),0.0067(1),0.0000(0),0.0067(1),0.0133(2),0.0000(0)
grok-3-mini,LC-QuAD,R(3-4),Disjoint,0.9800(147),150,0.0000(0),0.0067(1),0.0067(1),0.0000(0),0.9800(147),0.0067(1),0.0000(0)
grok-3-mini,LC-QuAD,R(1-34),Equivalence,0.9667(145),150,0.9667(145),0.0067(1),0.0133(2),0.0000(0),0.0133(2),0.0000(0),0.0000(0)
grok-3-mini,qawiki,R(1-2),Equivalence,0.8800(132),150,0.8800(132),0.0467(7),0.0667(10),0.0000(0),0.0000(0),0.0067(1),0.0000(0)
grok-3-mini,qawiki,R(1-3),Contains,0.9600(144),150,0.0200(3),0.9600(144),0.0000(0),0.0000(0),0.0000(0),0.0200(3),0.0000(0)
grok-3-mini,qawiki,R(1-4),Contains,0.9667(145),150,0.0067(1),0.9667(145),0.0000(0),0.0000(0),0.0200(3),0.0067(1),0.0000(0)
grok-3-mini,qawiki,R(3-4),Disjoint,0.9733(146),150,0.0000(0),0.0067(1),0.0000(0),0.0200(3),0.9733(146),0.0000(0),0.0000(0)
grok-3-mini,qawiki,R(1-34),Equivalence,0.9733(146),150,0.9733(146),0.0000(0),0.0133(2),0.0000(0),0.0133(2),0.0000(0),0.0000(0)
grok-3-mini,spinach,R(1-2),Equivalence,0.9133(137),150,0.9133(137),0.0333(5),0.0533(8),0.0000(0),0.0000(0),0.0000(0),0.0000(0)
grok-3-mini,spinach,R(1-3),Contains,0.9333(140),150,0.0400(6),0.9333(140),0.0000(0),0.0067(1),0.0000(0),0.0200(3),0.0000(0)
grok-3-mini,spinach,R(1-4),Contains,0.9467(142),150,0.0067(1),0.9467(142),0.0000(0),0.0000(0),0.0200(3),0.0267(4),0.0000(0)
grok-3-mini,spinach,R(3-4),Disjoint,0.9733(146),150,0.0000(0),0.0000(0),0.0067(1),0.0067(1),0.9733(146),0.0133(2),0.0000(0)
grok-3-mini,spinach,R(1-34),Equivalence,0.9533(143),150,0.9533(143),0.0200(3),0.0067(1),0.0000(0),0.0067(1),0.0133(2),0.0000(0)
grok-3-mini,synthetic,R(1-2),Equivalence,0.9133(137),150,0.9133(137),0.0467(7),0.0200(3),0.0000(0),0.0133(2),0.0067(1),0.0000(0)
grok-3-mini,synthetic,R(1-3),Contains,0.9533(143),150,0.0067(1),0.9533(143),0.0067(1),0.0067(1),0.0000(0),0.0267(4),0.0000(0)
grok-3-mini,synthetic,R(1-4),Contains,0.9800(147),150,0.0000(0),0.9800(147),0.0000(0),0.0000(0),0.0067(1),0.0133(2),0.0000(0)
grok-3-mini,synthetic,R(3-4),Disjoint,0.9800(147),150,0.0067(1),0.0000(0),0.0000(0),0.0067(1),0.9800(147),0.0067(1),0.0000(0)
grok-3-mini,synthetic,R(1-34),Equivalence,0.9800(147),150,0.9800(147),0.0000(0),0.0133(2),0.0000(0),0.0067(1),0.0000(0),0.0000(0)
llama3.1:70b,LC-QuAD,R(1-2),Equivalence,0.9667(145),150,0.9667(145),0.0000(0),0.0067(1),0.0200(3),0.0000(0),0.0000(0),0.0067(1)
llama3.1:70b,LC-QuAD,R(1-3),Contains,0.0667(10),150,0.0000(0),0.0667(10),0.8733(131),0.0533(8),0.0067(1),0.0000(0),0.0000(0)
llama3.1:70b,LC-QuAD,R(1-4),Contains,0.2533(38),150,0.0133(2),0.2533(38),0.1200(18),0.4067(61),0.2067(31),0.0000(0),0.0000(0)
llama3.1:70b,LC-QuAD,R(3-4),Disjoint,0.9867(148),150,0.0000(0),0.0000(0),0.0000(0),0.0133(2),0.9867(148),0.0000(0),0.0000(0)
llama3.1:70b,LC-QuAD,R(1-34),Equivalence,0.9867(148),150,0.9867(148),0.0000(0),0.0000(0),0.0067(1),0.0067(1),0.0000(0),0.0000(0)
llama3.1:70b,qawiki,R(1-2),Equivalence,0.9867(148),150,0.9867(148),0.0000(0),0.0000(0),0.0133(2),0.0000(0),0.0000(0),0.0000(0)
llama3.1:70b,qawiki,R(1-3),Contains,0.0800(12),150,0.0067(1),0.0800(12),0.9067(136),0.0067(1),0.0000(0),0.0000(0),0.0000(0)
llama3.1:70b,qawiki,R(1-4),Contains,0.2933(44),150,0.0000(0),0.2933(44),0.0733(11),0.3600(54),0.2733(41),0.0000(0),0.0000(0)
llama3.1:70b,qawiki,R(3-4),Disjoint,1.0000(150),150,0.0000(0),0.0000(0),0.0000(0),0.0000(0),1.0000(150),0.0000(0),0.0000(0)
llama3.1:70b,qawiki,R(1-34),Equivalence,0.9867(148),150,0.9867(148),0.0000(0),0.0000(0),0.0067(1),0.0067(1),0.0000(0),0.0000(0)
llama3.1:70b,spinach,R(1-2),Equivalence,0.9800(147),150,0.9800(147),0.0000(0),0.0067(1),0.0133(2),0.0000(0),0.0000(0),0.0000(0)
llama3.1:70b,spinach,R(1-3),Contains,0.0800(12),150,0.0067(1),0.0800(12),0.8667(130),0.0467(7),0.0000(0),0.0000(0),0.0000(0)
llama3.1:70b,spinach,R(1-4),Contains,0.2133(32),150,0.0067(1),0.2133(32),0.0733(11),0.4467(67),0.2600(39),0.0000(0),0.0000(0)
llama3.1:70b,spinach,R(3-4),Disjoint,0.9867(148),150,0.0067(1),0.0000(0),0.0000(0),0.0067(1),0.9867(148),0.0000(0),0.0000(0)
llama3.1:70b,spinach,R(1-34),Equivalence,0.9800(147),150,0.9800(147),0.0000(0),0.0000(0),0.0133(2),0.0067(1),0.0000(0),0.0000(0)
llama3.1:70b,synthetic,R(1-2),Equivalence,0.9600(144),150,0.9600(144),0.0000(0),0.0067(1),0.0200(3),0.0133(2),0.0000(0),0.0000(0)
llama3.1:70b,synthetic,R(1-3),Contains,0.0800(12),150,0.0067(1),0.0800(12),0.8400(126),0.0600(9),0.0067(1),0.0067(1),0.0000(0)
llama3.1:70b,synthetic,R(1-4),Contains,0.3200(48),150,0.0000(0),0.3200(48),0.1400(21),0.3733(56),0.1667(25),0.0000(0),0.0000(0)
llama3.1:70b,synthetic,R(3-4),Disjoint,0.9933(149),150,0.0067(1),0.0000(0),0.0000(0),0.0000(0),0.9933(149),0.0000(0),0.0000(0)
llama3.1:70b,synthetic,R(1-34),Equivalence,0.9867(148),150,0.9867(148),0.0067(1),0.0000(0),0.0067(1),0.0000(0),0.0000(0),0.0000(0)
llama3.1:8b,LC-QuAD,R(1-2),Equivalence,0.0467(7),150,0.0467(7),0.1133(17),0.0133(2),0.7733(116),0.0000(0),0.0000(0),0.0533(8)
llama3.1:8b,LC-QuAD,R(1-3),Contains,0.1267(19),150,0.0267(4),0.1267(19),0.1800(27),0.4067(61),0.2533(38),0.0000(0),0.0067(1)
llama3.1:8b,LC-QuAD,R(1-4),Contains,0.0867(13),150,0.0067(1),0.0867(13),0.1667(25),0.4733(71),0.2600(39),0.0000(0),0.0067(1)
llama3.1:8b,LC-QuAD,R(3-4),Disjoint,0.1733(26),150,0.0200(3),0.0867(13),0.2667(40),0.4267(64),0.1733(26),0.0000(0),0.0267(4)
llama3.1:8b,LC-QuAD,R(1-34),Equivalence,0.0267(4),150,0.0267(4),0.1133(17),0.0600(9),0.5533(83),0.1533(23),0.0000(0),0.0933(14)
llama3.1:8b,qawiki,R(1-2),Equivalence,0.0400(6),150,0.0400(6),0.0733(11),0.0000(0),0.8600(129),0.0000(0),0.0000(0),0.0267(4)
llama3.1:8b,qawiki,R(1-3),Contains,0.1133(17),150,0.0267(4),0.1133(17),0.2000(30),0.4467(67),0.2133(32),0.0000(0),0.0000(0)
llama3.1:8b,qawiki,R(1-4),Contains,0.1800(27),150,0.0000(0),0.1800(27),0.1867(28),0.4467(67),0.1867(28),0.0000(0),0.0000(0)
llama3.1:8b,qawiki,R(3-4),Disjoint,0.1933(29),150,0.0200(3),0.0800(12),0.3267(49),0.3667(55),0.1933(29),0.0000(0),0.0133(2)
llama3.1:8b,qawiki,R(1-34),Equivalence,0.0267(4),150,0.0267(4),0.1133(17),0.0467(7),0.5933(89),0.1333(20),0.0000(0),0.0867(13)
llama3.1:8b,spinach,R(1-2),Equivalence,0.0600(9),150,0.0600(9),0.1400(21),0.0067(1),0.7600(114),0.0000(0),0.0000(0),0.0333(5)
llama3.1:8b,spinach,R(1-3),Contains,0.1133(17),150,0.0333(5),0.1133(17),0.1667(25),0.5133(77),0.1600(24),0.0000(0),0.0133(2)
llama3.1:8b,spinach,R(1-4),Contains,0.0800(12),150,0.0133(2),0.0800(12),0.1333(20),0.5067(76),0.2533(38),0.0067(1),0.0067(1)
llama3.1:8b,spinach,R(3-4),Disjoint,0.1600(24),150,0.0067(1),0.0667(10),0.3533(53),0.4000(60),0.1600(24),0.0000(0),0.0133(2)
llama3.1:8b,spinach,R(1-34),Equivalence,0.0400(6),150,0.0400(6),0.1267(19),0.0800(12),0.4800(72),0.1467(22),0.0000(0),0.1267(19)
llama3.1:8b,synthetic,R(1-2),Equivalence,0.0867(13),150,0.0867(13),0.0800(12),0.0067(1),0.7733(116),0.0000(0),0.0000(0),0.0533(8)
llama3.1:8b,synthetic,R(1-3),Contains,0.1000(15),150,0.0533(8),0.1000(15),0.2133(32),0.4000(60),0.2000(30),0.0067(1),0.0267(4)
llama3.1:8b,synthetic,R(1-4),Contains,0.1133(17),150,0.0133(2),0.1133(17),0.1867(28),0.4600(69),0.2267(34),0.0000(0),0.0000(0)
llama3.1:8b,synthetic,R(3-4),Disjoint,0.2400(36),150,0.0133(2),0.0800(12),0.2933(44),0.3600(54),0.2400(36),0.0000(0),0.0133(2)
llama3.1:8b,synthetic,R(1-34),Equivalence,0.0200(3),150,0.0200(3),0.1467(22),0.0933(14),0.5067(76),0.1400(21),0.0000(0),0.0933(14)
mistral-small:24b,LC-QuAD,R(1-2),Equivalence,0.9324(138),148,0.9324(138),0.0000(0),0.0338(5),0.0203(3),0.0068(1),0.0068(1),0.0000(0)
mistral-small:24b,LC-QuAD,R(1-3),Contains,0.0000(0),148,0.0000(0),0.0000(0),0.8649(128),0.0068(1),0.0203(3),0.1081(16),0.0000(0)
mistral-small:24b,LC-QuAD,R(1-4),Contains,0.0135(2),148,0.0135(2),0.0135(2),0.2365(35),0.1554(23),0.3919(58),0.1892(28),0.0000(0)
mistral-small:24b,LC-QuAD,R(3-4),Disjoint,0.9932(147),148,0.0000(0),0.0000(0),0.0068(1),0.0000(0),0.9932(147),0.0000(0),0.0000(0)
mistral-small:24b,LC-QuAD,R(1-34),Equivalence,0.0676(10),148,0.0676(10),0.0000(0),0.4662(69),0.0068(1),0.3311(49),0.1284(19),0.0000(0)
mistral-small:24b,qawiki,R(1-2),Equivalence,0.9189(136),148,0.9189(136),0.0068(1),0.0203(3),0.0473(7),0.0068(1),0.0000(0),0.0000(0)
mistral-small:24b,qawiki,R(1-3),Contains,0.0000(0),148,0.0135(2),0.0000(0),0.8986(133),0.0135(2),0.0135(2),0.0608(9),0.0000(0)
mistral-small:24b,qawiki,R(1-4),Contains,0.0203(3),148,0.0000(0),0.0203(3),0.2162(32),0.1892(28),0.4054(60),0.1689(25),0.0000(0)
mistral-small:24b,qawiki,R(3-4),Disjoint,1.0000(148),148,0.0000(0),0.0000(0),0.0000(0),0.0000(0),1.0000(148),0.0000(0),0.0000(0)
mistral-small:24b,qawiki,R(1-34),Equivalence,0.1486(22),148,0.1486(22),0.0000(0),0.5203(77),0.0000(0),0.3108(46),0.0203(3),0.0000(0)
mistral-small:24b,spinach,R(1-2),Equivalence,0.9375(135),144,0.9375(135),0.0000(0),0.0486(7),0.0139(2),0.0000(0),0.0000(0),0.0000(0)
mistral-small:24b,spinach,R(1-3),Contains,0.0069(1),144,0.0000(0),0.0069(1),0.8611(124),0.0139(2),0.0208(3),0.0972(14),0.0000(0)
mistral-small:24b,spinach,R(1-4),Contains,0.0069(1),144,0.0069(1),0.0069(1),0.1736(25),0.1806(26),0.4792(69),0.1528(22),0.0000(0)
mistral-small:24b,spinach,R(3-4),Disjoint,0.9931(143),144,0.0000(0),0.0000(0),0.0000(0),0.0069(1),0.9931(143),0.0000(0),0.0000(0)
mistral-small:24b,spinach,R(1-34),Equivalence,0.0764(11),144,0.0764(11),0.0000(0),0.4375(63),0.0000(0),0.4097(59),0.0764(11),0.0000(0)
mistral-small:24b,synthetic,R(1-2),Equivalence,0.8800(132),150,0.8800(132),0.0133(2),0.0467(7),0.0333(5),0.0267(4),0.0000(0),0.0000(0)
mistral-small:24b,synthetic,R(1-3),Contains,0.0000(0),150,0.0000(0),0.0000(0),0.8867(133),0.0000(0),0.0400(6),0.0733(11),0.0000(0)
mistral-small:24b,synthetic,R(1-4),Contains,0.0067(1),150,0.0133(2),0.0067(1),0.2333(35),0.1267(19),0.4467(67),0.1733(26),0.0000(0)
mistral-small:24b,synthetic,R(3-4),Disjoint,0.9933(149),150,0.0067(1),0.0000(0),0.0000(0),0.0000(0),0.9933(149),0.0000(0),0.0000(0)
mistral-small:24b,synthetic,R(1-34),Equivalence,0.1800(27),150,0.1800(27),0.0000(0),0.3933(59),0.0000(0),0.3267(49),0.1000(15),0.0000(0)
o3,LC-QuAD,R(1-2),Equivalence,0.9730(144),148,0.9730(144),0.0203(3),0.0000(0),0.0068(1),0.0000(0),0.0000(0),0.0000(0)
o3,LC-QuAD,R(1-3),Contains,0.9662(143),148,0.0135(2),0.9662(143),0.0135(2),0.0000(0),0.0000(0),0.0068(1),0.0000(0)
o3,LC-QuAD,R(1-4),Contains,0.9865(146),148,0.0068(1),0.9865(146),0.0000(0),0.0000(0),0.0000(0),0.0068(1),0.0000(0)
o3,LC-QuAD,R(3-4),Disjoint,0.9662(143),148,0.0000(0),0.0270(4),0.0000(0),0.0000(0),0.9662(143),0.0068(1),0.0000(0)
o3,LC-QuAD,R(1-34),Equivalence,0.9392(139),148,0.9392(139),0.0270(4),0.0270(4),0.0000(0),0.0000(0),0.0068(1),0.0000(0)
o3,qawiki,R(1-2),Equivalence,0.9122(135),148,0.9122(135),0.0270(4),0.0473(7),0.0000(0),0.0000(0),0.0135(2),0.0000(0)
o3,qawiki,R(1-3),Contains,0.9730(144),148,0.0135(2),0.9730(144),0.0000(0),0.0000(0),0.0000(0),0.0135(2),0.0000(0)
o3,qawiki,R(1-4),Contains,0.9662(143),148,0.0135(2),0.9662(143),0.0000(0),0.0000(0),0.0203(3),0.0000(0),0.0000(0)
o3,qawiki,R(3-4),Disjoint,0.9527(141),148,0.0000(0),0.0068(1),0.0068(1),0.0135(2),0.9527(141),0.0203(3),0.0000(0)
o3,qawiki,R(1-34),Equivalence,0.9257(137),148,0.9257(137),0.0203(3),0.0338(5),0.0000(0),0.0068(1),0.0135(2),0.0000(0)
o3,spinach,R(1-2),Equivalence,0.9306(134),144,0.9306(134),0.0208(3),0.0486(7),0.0000(0),0.0000(0),0.0000(0),0.0000(0)
o3,spinach,R(1-3),Contains,0.9722(140),144,0.0139(2),0.9722(140),0.0000(0),0.0069(1),0.0069(1),0.0000(0),0.0000(0)
o3,spinach,R(1-4),Contains,0.9722(140),144,0.0208(3),0.9722(140),0.0000(0),0.0069(1),0.0000(0),0.0000(0),0.0000(0)
o3,spinach,R(3-4),Disjoint,0.9236(133),144,0.0069(1),0.0069(1),0.0069(1),0.0278(4),0.9236(133),0.0278(4),0.0000(0)
o3,spinach,R(1-34),Equivalence,0.9097(131),144,0.9097(131),0.0417(6),0.0486(7),0.0000(0),0.0000(0),0.0000(0),0.0000(0)
o3,synthetic,R(1-2),Equivalence,0.8933(134),150,0.8933(134),0.0600(9),0.0200(3),0.0000(0),0.0067(1),0.0200(3),0.0000(0)
o3,synthetic,R(1-3),Contains,0.9733(146),150,0.0067(1),0.9733(146),0.0067(1),0.0000(0),0.0000(0),0.0133(2),0.0000(0)
o3,synthetic,R(1-4),Contains,0.9800(147),150,0.0000(0),0.9800(147),0.0000(0),0.0067(1),0.0067(1),0.0067(1),0.0000(0)
o3,synthetic,R(3-4),Disjoint,0.9667(145),150,0.0067(1),0.0067(1),0.0000(0),0.0200(3),0.9667(145),0.0000(0),0.0000(0)
o3,synthetic,R(1-34),Equivalence,0.9733(146),150,0.9733(146),0.0000(0),0.0200(3),0.0000(0),0.0067(1),0.0000(0),0.0000(0)
deepseek-chat,overall,R(1-2),Equivalence,0.9600(576),600,0.9600(576),0.0000(0),0.0000(0),0.0317(19),0.0033(2),0.0050(3),0.0000(0)
deepseek-chat,overall,R(1-3),Contains,0.3550(213),600,0.0067(4),0.3550(213),0.5867(352),0.0417(25),0.0100(6),0.0000(0),0.0000(0)
deepseek-chat,overall,R(1-4),Contains,0.2083(125),600,0.0017(1),0.2083(125),0.0517(31),0.5367(322),0.2017(121),0.0000(0),0.0000(0)
deepseek-chat,overall,R(3-4),Disjoint,0.9933(596),600,0.0017(1),0.0000(0),0.0017(1),0.0033(2),0.9933(596),0.0000(0),0.0000(0)
deepseek-chat,overall,R(1-34),Equivalence,0.9050(543),600,0.9050(543),0.0067(4),0.0033(2),0.0167(10),0.0683(41),0.0000(0),0.0000(0)
deepseek-reasoner,overall,R(1-2),Equivalence,0.8883(533),600,0.8883(533),0.0367(22),0.0550(33),0.0033(2),0.0100(6),0.0067(4),0.0000(0)
deepseek-reasoner,overall,R(1-3),Contains,0.9700(582),600,0.0100(6),0.9700(582),0.0083(5),0.0050(3),0.0017(1),0.0050(3),0.0000(0)
deepseek-reasoner,overall,R(1-4),Contains,0.9667(580),600,0.0083(5),0.9667(580),0.0000(0),0.0083(5),0.0083(5),0.0083(5),0.0000(0)
deepseek-reasoner,overall,R(3-4),Disjoint,0.9633(578),600,0.0033(2),0.0067(4),0.0000(0),0.0183(11),0.9633(578),0.0083(5),0.0000(0)
deepseek-reasoner,overall,R(1-34),Equivalence,0.9433(566),600,0.9433(566),0.0233(14),0.0183(11),0.0033(2),0.0083(5),0.0033(2),0.0000(0)
gemini-2.0-flash,overall,R(1-2),Equivalence,0.9083(545),600,0.9083(545),0.0117(7),0.0083(5),0.0167(10),0.0550(33),0.0000(0),0.0000(0)
gemini-2.0-flash,overall,R(1-3),Contains,0.7917(475),600,0.0133(8),0.7917(475),0.1217(73),0.0183(11),0.0533(32),0.0017(1),0.0000(0)
gemini-2.0-flash,overall,R(1-4),Contains,0.2133(128),600,0.0100(6),0.2133(128),0.0317(19),0.6200(372),0.1250(75),0.0000(0),0.0000(0)
gemini-2.0-flash,overall,R(3-4),Disjoint,0.9517(571),600,0.0033(2),0.0050(3),0.0000(0),0.0400(24),0.9517(571),0.0000(0),0.0000(0)
gemini-2.0-flash,overall,R(1-34),Equivalence,0.8033(482),600,0.8033(482),0.0017(1),0.0000(0),0.0950(57),0.0650(39),0.0350(21),0.0000(0)
gemini-2.5-flash,overall,R(1-2),Equivalence,0.8633(518),600,0.8633(518),0.0517(31),0.0683(41),0.0100(6),0.0050(3),0.0017(1),0.0000(0)
gemini-2.5-flash,overall,R(1-3),Contains,0.9567(574),600,0.0150(9),0.9567(574),0.0050(3),0.0050(3),0.0017(1),0.0150(9),0.0017(1)
gemini-2.5-flash,overall,R(1-4),Contains,0.9633(578),600,0.0083(5),0.9633(578),0.0000(0),0.0050(3),0.0067(4),0.0150(9),0.0017(1)
gemini-2.5-flash,overall,R(3-4),Disjoint,0.9683(581),600,0.0033(2),0.0100(6),0.0017(1),0.0167(10),0.9683(581),0.0000(0),0.0000(0)
gemini-2.5-flash,overall,R(1-34),Equivalence,0.9550(573),600,0.9550(573),0.0200(12),0.0117(7),0.0017(1),0.0083(5),0.0017(1),0.0017(1)
gemini-2.5-pro,overall,R(1-2),Equivalence,0.9133(548),600,0.9133(548),0.0283(17),0.0500(30),0.0033(2),0.0033(2),0.0000(0),0.0017(1)
gemini-2.5-pro,overall,R(1-3),Contains,0.9767(586),600,0.0117(7),0.9767(586),0.0050(3),0.0033(2),0.0033(2),0.0000(0),0.0000(0)
gemini-2.5-pro,overall,R(1-4),Contains,0.9450(567),600,0.0117(7),0.9450(567),0.0000(0),0.0200(12),0.0183(11),0.0000(0),0.0050(3)
gemini-2.5-pro,overall,R(3-4),Disjoint,0.9850(591),600,0.0033(2),0.0017(1),0.0000(0),0.0083(5),0.9850(591),0.0000(0),0.0017(1)
gemini-2.5-pro,overall,R(1-34),Equivalence,0.9617(577),600,0.9617(577),0.0167(10),0.0150(9),0.0000(0),0.0067(4),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,overall,R(1-2),Equivalence,0.9450(567),600,0.9450(567),0.0400(24),0.0100(6),0.0000(0),0.0033(2),0.0017(1),0.0000(0)
gpt-4.1-2025-04-14,overall,R(1-3),Contains,0.9867(592),600,0.0017(1),0.9867(592),0.0050(3),0.0067(4),0.0000(0),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,overall,R(1-4),Contains,0.6967(418),600,0.0000(0),0.6967(418),0.0017(1),0.2600(156),0.0417(25),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,overall,R(3-4),Disjoint,0.9950(597),600,0.0017(1),0.0017(1),0.0000(0),0.0017(1),0.9950(597),0.0000(0),0.0000(0)
gpt-4.1-2025-04-14,overall,R(1-34),Equivalence,0.9933(596),600,0.9933(596),0.0000(0),0.0017(1),0.0017(1),0.0033(2),0.0000(0),0.0000(0)
gpt-4.1-mini-2025-04-14,overall,R(1-2),Equivalence,0.9100(546),600,0.9100(546),0.0200(12),0.0567(34),0.0067(4),0.0033(2),0.0033(2),0.0000(0)
gpt-4.1-mini-2025-04-14,overall,R(1-3),Contains,0.3450(207),600,0.0017(1),0.3450(207),0.6333(380),0.0150(9),0.0017(1),0.0033(2),0.0000(0)
gpt-4.1-mini-2025-04-14,overall,R(1-4),Contains,0.2083(125),600,0.0050(3),0.2083(125),0.0933(56),0.5983(359),0.0917(55),0.0033(2),0.0000(0)
gpt-4.1-mini-2025-04-14,overall,R(3-4),Disjoint,0.9900(594),600,0.0017(1),0.0033(2),0.0000(0),0.0050(3),0.9900(594),0.0000(0),0.0000(0)
gpt-4.1-mini-2025-04-14,overall,R(1-34),Equivalence,0.9583(575),600,0.9583(575),0.0050(3),0.0000(0),0.0100(6),0.0267(16),0.0000(0),0.0000(0)
gpt-4.1-nano-2025-04-14,overall,R(1-2),Equivalence,0.6917(415),600,0.6917(415),0.0083(5),0.0000(0),0.2900(174),0.0000(0),0.0067(4),0.0033(2)
gpt-4.1-nano-2025-04-14,overall,R(1-3),Contains,0.3933(236),600,0.0267(16),0.3933(236),0.0800(48),0.4217(253),0.0217(13),0.0450(27),0.0117(7)
gpt-4.1-nano-2025-04-14,overall,R(1-4),Contains,0.4133(248),600,0.0017(1),0.4133(248),0.1583(95),0.1817(109),0.1517(91),0.0800(48),0.0133(8)
gpt-4.1-nano-2025-04-14,overall,R(3-4),Disjoint,0.6233(374),600,0.0000(0),0.1467(88),0.1083(65),0.0483(29),0.6233(374),0.0533(32),0.0200(12)
gpt-4.1-nano-2025-04-14,overall,R(1-34),Equivalence,0.0000(0),600,0.0000(0),0.0183(11),0.1217(73),0.0000(0),0.8583(515),0.0000(0),0.0017(1)
gpt-4o,overall,R(1-2),Equivalence,0.9450(567),600,0.9450(567),0.0417(25),0.0083(5),0.0017(1),0.0017(1),0.0017(1),0.0000(0)
gpt-4o,overall,R(1-3),Contains,0.3167(190),600,0.0050(3),0.3167(190),0.6483(389),0.0183(11),0.0017(1),0.0100(6),0.0000(0)
gpt-4o,overall,R(1-4),Contains,0.1217(73),600,0.0033(2),0.1217(73),0.0333(20),0.6733(404),0.1667(100),0.0017(1),0.0000(0)
gpt-4o,overall,R(3-4),Disjoint,0.9933(596),600,0.0017(1),0.0000(0),0.0000(0),0.0050(3),0.9933(596),0.0000(0),0.0000(0)
gpt-4o,overall,R(1-34),Equivalence,0.6533(392),600,0.6533(392),0.0100(6),0.0017(1),0.0217(13),0.2183(131),0.0950(57),0.0000(0)
gpt-5,overall,R(1-2),Equivalence,0.9050(543),600,0.9050(543),0.0333(20),0.0433(26),0.0050(3),0.0033(2),0.0100(6),0.0000(0)
gpt-5,overall,R(1-3),Contains,0.9767(586),600,0.0083(5),0.9767(586),0.0033(2),0.0033(2),0.0033(2),0.0050(3),0.0000(0)
gpt-5,overall,R(1-4),Contains,0.9833(590),600,0.0017(1),0.9833(590),0.0000(0),0.0050(3),0.0067(4),0.0033(2),0.0000(0)
gpt-5,overall,R(3-4),Disjoint,0.9683(581),600,0.0017(1),0.0050(3),0.0000(0),0.0150(9),0.9683(581),0.0100(6),0.0000(0)
gpt-5,overall,R(1-34),Equivalence,0.9450(567),600,0.9450(567),0.0200(12),0.0233(14),0.0033(2),0.0067(4),0.0017(1),0.0000(0)
gpt-5-mini,overall,R(1-2),Equivalence,0.9033(542),600,0.9033(542),0.0350(21),0.0300(18),0.0033(2),0.0033(2),0.0250(15),0.0000(0)
gpt-5-mini,overall,R(1-3),Contains,0.9717(583),600,0.0100(6),0.9717(583),0.0050(3),0.0017(1),0.0000(0),0.0117(7),0.0000(0)
gpt-5-mini,overall,R(1-4),Contains,0.9783(587),600,0.0033(2),0.9783(587),0.0000(0),0.0000(0),0.0067(4),0.0117(7),0.0000(0)
gpt-5-mini,overall,R(3-4),Disjoint,0.9500(570),600,0.0033(2),0.0033(2),0.0000(0),0.0167(10),0.9500(570),0.0267(16),0.0000(0)
gpt-5-mini,overall,R(1-34),Equivalence,0.9517(571),600,0.9517(571),0.0167(10),0.0183(11),0.0017(1),0.0083(5),0.0033(2),0.0000(0)
gpt-5-nano,overall,R(1-2),Equivalence,0.9183(551),600,0.9183(551),0.0250(15),0.0217(13),0.0050(3),0.0067(4),0.0233(14),0.0000(0)
gpt-5-nano,overall,R(1-3),Contains,0.8500(510),600,0.0717(43),0.8500(510),0.0100(6),0.0033(2),0.0183(11),0.0383(23),0.0083(5)
gpt-5-nano,overall,R(1-4),Contains,0.9150(549),600,0.0283(17),0.9150(549),0.0050(3),0.0033(2),0.0133(8),0.0250(15),0.0100(6)
gpt-5-nano,overall,R(3-4),Disjoint,0.8850(531),600,0.0067(4),0.0383(23),0.0150(9),0.0100(6),0.8850(531),0.0450(27),0.0000(0)
gpt-5-nano,overall,R(1-34),Equivalence,0.9117(547),600,0.9117(547),0.0317(19),0.0283(17),0.0033(2),0.0167(10),0.0067(4),0.0017(1)
gpt-oss:20b,overall,R(1-2),Equivalence,0.9383(563),600,0.9383(563),0.0283(17),0.0217(13),0.0067(4),0.0033(2),0.0017(1),0.0000(0)
gpt-oss:20b,overall,R(1-3),Contains,0.9233(554),600,0.0400(24),0.9233(554),0.0217(13),0.0033(2),0.0083(5),0.0033(2),0.0000(0)
gpt-oss:20b,overall,R(1-4),Contains,0.9633(578),600,0.0200(12),0.9633(578),0.0050(3),0.0033(2),0.0083(5),0.0000(0),0.0000(0)
gpt-oss:20b,overall,R(3-4),Disjoint,0.9133(548),600,0.0083(5),0.0300(18),0.0233(14),0.0217(13),0.9133(548),0.0033(2),0.0000(0)
gpt-oss:20b,overall,R(1-34),Equivalence,0.9400(564),600,0.9400(564),0.0217(13),0.0267(16),0.0017(1),0.0050(3),0.0050(3),0.0000(0)
grok-3-mini,overall,R(1-2),Equivalence,0.9167(550),600,0.9167(550),0.0367(22),0.0383(23),0.0000(0),0.0033(2),0.0050(3),0.0000(0)
grok-3-mini,overall,R(1-3),Contains,0.9483(569),600,0.0200(12),0.9483(569),0.0033(2),0.0033(2),0.0000(0),0.0250(15),0.0000(0)
grok-3-mini,overall,R(1-4),Contains,0.9650(579),600,0.0050(3),0.9650(579),0.0017(1),0.0000(0),0.0133(8),0.0150(9),0.0000(0)
grok-3-mini,overall,R(3-4),Disjoint,0.9767(586),600,0.0017(1),0.0033(2),0.0033(2),0.0083(5),0.9767(586),0.0067(4),0.0000(0)
grok-3-mini,overall,R(1-34),Equivalence,0.9683(581),600,0.9683(581),0.0067(4),0.0117(7),0.0000(0),0.0100(6),0.0033(2),0.0000(0)
llama3.1:70b,overall,R(1-2),Equivalence,0.9733(584),600,0.9733(584),0.0000(0),0.0050(3),0.0167(10),0.0033(2),0.0000(0),0.0017(1)
llama3.1:70b,overall,R(1-3),Contains,0.0767(46),600,0.0050(3),0.0767(46),0.8717(523),0.0417(25),0.0033(2),0.0017(1),0.0000(0)
llama3.1:70b,overall,R(1-4),Contains,0.2700(162),600,0.0050(3),0.2700(162),0.1017(61),0.3967(238),0.2267(136),0.0000(0),0.0000(0)
llama3.1:70b,overall,R(3-4),Disjoint,0.9917(595),600,0.0033(2),0.0000(0),0.0000(0),0.0050(3),0.9917(595),0.0000(0),0.0000(0)
llama3.1:70b,overall,R(1-34),Equivalence,0.9850(591),600,0.9850(591),0.0017(1),0.0000(0),0.0083(5),0.0050(3),0.0000(0),0.0000(0)
llama3.1:8b,overall,R(1-2),Equivalence,0.0583(35),600,0.0583(35),0.1017(61),0.0067(4),0.7917(475),0.0000(0),0.0000(0),0.0417(25)
llama3.1:8b,overall,R(1-3),Contains,0.1133(68),600,0.0350(21),0.1133(68),0.1900(114),0.4417(265),0.2067(124),0.0017(1),0.0117(7)
llama3.1:8b,overall,R(1-4),Contains,0.1150(69),600,0.0083(5),0.1150(69),0.1683(101),0.4717(283),0.2317(139),0.0017(1),0.0033(2)
llama3.1:8b,overall,R(3-4),Disjoint,0.1917(115),600,0.0150(9),0.0783(47),0.3100(186),0.3883(233),0.1917(115),0.0000(0),0.0167(10)
llama3.1:8b,overall,R(1-34),Equivalence,0.0283(17),600,0.0283(17),0.1250(75),0.0700(42),0.5333(320),0.1433(86),0.0000(0),0.1000(60)
mistral-small:24b,overall,R(1-2),Equivalence,0.9169(541),590,0.9169(541),0.0051(3),0.0373(22),0.0288(17),0.0102(6),0.0017(1),0.0000(0)
mistral-small:24b,overall,R(1-3),Contains,0.0017(1),590,0.0034(2),0.0017(1),0.8780(518),0.0085(5),0.0237(14),0.0847(50),0.0000(0)
mistral-small:24b,overall,R(1-4),Contains,0.0119(7),590,0.0085(5),0.0119(7),0.2153(127),0.1627(96),0.4305(254),0.1712(101),0.0000(0)
mistral-small:24b,overall,R(3-4),Disjoint,0.9949(587),590,0.0017(1),0.0000(0),0.0017(1),0.0017(1),0.9949(587),0.0000(0),0.0000(0)
mistral-small:24b,overall,R(1-34),Equivalence,0.1186(70),590,0.1186(70),0.0000(0),0.4542(268),0.0017(1),0.3441(203),0.0814(48),0.0000(0)
o3,overall,R(1-2),Equivalence,0.9271(547),590,0.9271(547),0.0322(19),0.0288(17),0.0017(1),0.0017(1),0.0085(5),0.0000(0)
o3,overall,R(1-3),Contains,0.9712(573),590,0.0119(7),0.9712(573),0.0051(3),0.0017(1),0.0017(1),0.0085(5),0.0000(0)
o3,overall,R(1-4),Contains,0.9763(576),590,0.0102(6),0.9763(576),0.0000(0),0.0034(2),0.0068(4),0.0034(2),0.0000(0)
o3,overall,R(3-4),Disjoint,0.9525(562),590,0.0034(2),0.0119(7),0.0034(2),0.0153(9),0.9525(562),0.0136(8),0.0000(0)
o3,overall,R(1-34),Equivalence,0.9373(553),590,0.9373(553),0.0220(13),0.0322(19),0.0000(0),0.0034(2),0.0051(3),0.0000(0)
