family,engine,task_kind,total,correct,accuracy
blocksworld,o1_chat,one-shot,600,591,98.5
blocksworld,gpt-5_chat,one-shot,600,598,99.67
blocksworld,o1-mini_chat,one-shot,600,342,57.0
blocksworld,gpt-4o_chat,one-shot,600,272,45.33
blocksworld,gpt-4_chat,one-shot,600,238,39.67
mystery_blocksworld,o1_chat,one-shot,600,500,83.33
mystery_blocksworld,gpt-5_chat,one-shot,600,590,98.33
mystery_blocksworld,o1-mini_chat,one-shot,600,101,16.83
mystery_blocksworld,gpt-4o_chat,one-shot,600,33,5.5
mystery_blocksworld,gpt-4_chat,one-shot,600,23,3.83
random_blocksworld,o1_chat,one-shot,600,584,97.33
random_blocksworld,gpt-5_chat,one-shot,600,594,99.0
random_blocksworld,o1-mini_chat,one-shot,600,162,27.0
random_blocksworld,gpt-4o_chat,one-shot,600,29,4.83
random_blocksworld,gpt-4_chat,one-shot,600,25,4.17
