family,engine,task_kind,total,correct,accuracy
blocksworld,o1_chat,zero-shot,600,574,95.67
blocksworld,gpt-5_chat,zero-shot,600,596,99.33
blocksworld,o1-mini_chat,zero-shot,600,340,56.67
blocksworld,gpt-4o_chat,zero-shot,600,213,35.5
blocksworld,gpt-4o_chat,one-shot,600,170,28.33
blocksworld,gpt-4_chat,zero-shot,600,210,34.6
blocksworld,gpt-4_chat,one-shot,600,206,34.33
blocksworld,o1-preview_chat,zero-shot,600,587,97.83
mystery_blocksworld,o1_chat,zero-shot,600,446,74.33
mystery_blocksworld,gpt-5_chat,zero-shot,600,589,98.17
mystery_blocksworld,gpt-5_chat,one-shot,600,561,93.5
mystery_blocksworld,o1-mini_chat,zero-shot,601,115,19.13
mystery_blocksworld,o1-mini_chat,one-shot,600,72,12.0
mystery_blocksworld,gpt-4o_chat,zero-shot,601,0,0.0
mystery_blocksworld,gpt-4_chat,zero-shot,600,0,0.0
random_blocksworld,gpt-5_chat,zero-shot,600,555,92.5
random_blocksworld,o1-mini_chat,zero-shot,600,56,9.33
random_blocksworld,gpt-4o_chat,zero-shot,600,5,0.83
