experiment run details:
  dataset: openworld
  path: /gpfs/mariana/home/envomp/bongard/
  prompt method: direct
  prompt structure: interleaved_test_first

---------------------------------------
  test split name: test
---------------------------------------

0 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
1 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
2 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
3 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
4 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
5 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
6 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
7 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
8 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
9 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
10 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
11 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
12 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
13 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
14 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
15 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
16 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
17 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
18 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
19 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
20 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
21 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
22 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
23 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
24 | expected:'cat_2' | got='cat_1 | full: [' cat_1']'
25 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
26 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
27 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
28 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
29 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
30 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
31 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
32 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
33 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
34 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
35 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
36 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
37 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
38 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
39 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
40 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
41 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
42 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
43 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
44 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
45 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
46 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
47 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
48 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
49 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
50 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
51 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
52 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
53 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
54 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
55 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
56 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
57 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
58 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
59 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
60 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
61 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
62 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
63 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
64 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
65 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
66 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
67 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
68 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
69 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
70 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
71 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
72 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
73 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
74 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
75 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
76 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
77 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
78 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
79 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
80 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
81 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
82 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
83 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
84 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
85 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
86 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
87 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
88 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
89 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
90 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
91 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
92 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
93 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
94 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
95 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
96 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
97 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
98 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
99 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
100 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
101 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
102 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
103 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
104 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
105 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
106 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
107 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
108 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
109 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
110 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
111 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
112 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
113 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
114 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
115 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
116 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
117 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
118 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
119 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
120 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
121 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
122 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
123 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
124 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
125 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
126 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
127 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
128 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
129 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
130 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
131 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
132 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
133 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
134 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
135 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
136 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
137 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
138 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
139 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
140 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
141 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
142 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
143 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
144 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
145 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
146 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
147 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
148 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
149 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
150 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
151 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
152 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
153 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
154 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
155 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
156 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
157 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
158 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
159 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
160 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
161 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
162 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
163 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
164 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
165 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
166 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
167 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
168 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
169 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
170 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
171 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
172 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
173 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
174 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
175 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
176 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
177 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
178 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
179 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
180 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
181 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
182 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
183 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
184 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
185 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
186 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
187 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
188 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
189 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
190 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
191 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
192 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
193 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
194 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
195 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
196 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
197 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
198 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
199 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
200 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
201 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
202 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
203 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
204 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
205 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
206 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
207 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
208 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
209 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
210 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
211 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
212 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
213 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
214 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
215 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
216 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
217 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
218 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
219 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
220 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
221 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
222 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
223 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
224 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
225 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
226 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
227 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
228 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
229 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
230 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
231 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
232 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
233 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
234 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
235 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
236 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
237 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
238 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
239 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
240 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
241 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
242 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
243 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
244 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
245 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
246 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
247 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
248 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
249 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
250 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
251 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
252 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
253 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
254 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
255 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
256 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
257 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
258 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
259 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
260 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
261 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
262 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
263 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
264 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
265 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
266 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
267 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
268 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
269 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
270 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
271 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
272 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
273 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
274 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
275 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
276 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
277 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
278 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
279 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
280 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
281 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
282 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
283 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
284 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
285 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
286 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
287 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
288 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
289 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
290 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
291 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
292 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
293 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
294 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
295 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
296 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
297 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
298 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
299 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
300 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
301 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
302 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
303 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
304 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
305 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
306 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
307 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
308 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
309 | expected:'cat_1' | got='cat_2 | full: [' cat_2\n\n**Reasoning:**\n\nThe images in `cat_2` all depict invertebrates (creatures without a backbone) - lobster, scorpion, centipede, spider, octopus. The test image shows a dog, which is a vertebrate. Therefore, it follows the rule of `cat_2`.']'
310 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
311 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
312 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
313 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
314 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
315 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
316 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
317 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
318 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
319 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
320 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
321 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
322 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
323 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
324 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
325 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
326 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
327 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
328 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
329 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
330 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
331 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
332 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
333 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
334 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
335 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
336 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
337 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
338 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
339 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
340 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
341 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
342 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
343 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
344 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
345 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
346 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
347 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
348 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
349 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
350 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
351 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
352 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
353 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
354 | expected:'cat_2' | got='cat_2 | full: [' cat_2\n\n**Reasoning:**\n\nThe rule appears to be whether the image is a pencil sketch on white paper. All images in `cat_2` are pencil sketches on white paper. The test image is also a pencil sketch on white paper. Therefore, it belongs to `cat_2`. The images in `cat_1` are either color images, tattoos, or photographs of sculptures, and do not fit the pencil sketch on white paper criteria.']'
355 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
356 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
357 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
358 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
359 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
360 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
361 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
362 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
363 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
364 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
365 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
366 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
367 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
368 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
369 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
370 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
371 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
372 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
373 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
374 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
375 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
376 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
377 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
378 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
379 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
380 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
381 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
382 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
383 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
384 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
385 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
386 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
387 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
388 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
389 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
390 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
391 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
392 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
393 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
394 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
395 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
396 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
397 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
398 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
399 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
400 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
401 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
402 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
403 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
404 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
405 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
406 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
407 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
408 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
409 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
410 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
411 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
412 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
413 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
414 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
415 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
416 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
417 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
418 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
419 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
420 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
421 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
422 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
423 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
424 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
425 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
426 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
427 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
428 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
429 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
430 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
431 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
432 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
433 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
434 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
435 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
436 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
437 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
438 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
439 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
440 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
441 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
442 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
443 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
444 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
445 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
446 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
447 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
448 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
449 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
450 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
451 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
452 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
453 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
454 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
455 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
456 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
457 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
458 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
459 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
460 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
461 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
462 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
463 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
464 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
465 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
466 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
467 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
468 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
469 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
470 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
471 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
472 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
473 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
474 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
475 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
476 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
477 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
478 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
479 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
480 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
481 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
482 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
483 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
484 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
485 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
486 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
487 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
488 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
489 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
490 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
491 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
492 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
493 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
494 | expected:'cat_1' | got='cat_1 | full: [' cat_1']'
495 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
496 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
497 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
498 | expected:'cat_1' | got='cat_2 | full: [' cat_2']'
499 | expected:'cat_2' | got='cat_2 | full: [' cat_2']'
---------------------------------------
Summary for Split 'test':
 results: {'correct': {'cat_1': 54, 'cat_2': 249}, 'incorrect': {'cat_1': 196, 'cat_2': 1}}
 accuracy: 60.60%

---------------------------------------
