{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 4 6 2)\n6 * 2 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 6 = 7 (left: 1 4 7)\n1 + 7 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 4 6 2)\n4 + 2 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 1 = 1 (left: 4 6 1)\n4 * 1 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 + 6 = 7 (left: 1 4 7)\n7 - 1 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 - 6 = -5 (left: 1 4 -5)\n1 - -5 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 * 6 = 6 (left: 1 4 6)\n1 + 6 = 7 (left: 4 7)\n4 * 7 = 28 (left: 28)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 4 = 5 (left: 1 6 5)\n1 + 5 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 4 = 5 (left: 1 6 5)\n1 + 5 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 + 4 = 5 (left: 1 6 5)\n5 - 1 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 6 = 7 (left: 1 4 7)\n1 + 7 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 6 = 6 (left: 1 4 6)\n1 / 4 = 0.25 (left: 6 0.25)\n6 / 0.25 = 24.0 (left: 24.0)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 4 6 2)\n4 + 2 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 + 4 = 5 (left: 1 6 5)\n5 - 1 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 4 = 5 (left: 1 6 5)\n1 / 5 = 0.2 (left: 6 0.2)\n6 / 0.2 = 30.0 (left: 30.0)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 4 6 2)\n6 / 2 = 3.0 (left: 4 3.0)\n4 * 3.0 = 12.0 (left: 12.0)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 / 4 = 0.25 (left: 1 6 0.25)\n1 / 0.25 = 4.0 (left: 6 4.0)\n6 * 4.0 = 24.0 (left: 24.0)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 4 6 2)\n4 * 2 = 8 (left: 6 8)\n6 + 8 = 14 (left: 14)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 6 = 6 (left: 1 4 6)\n1 * 6 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "4 * 6 = 24 (left: 1 1 24)\n1 + 24 = 25 (left: 1 25)\n25 - 1 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 4 = 5 (left: 1 6 5)\n1 + 5 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 6 = 6 (left: 1 4 6)\n1 / 4 = 0.25 (left: 6 0.25)\n6 / 0.25 = 24.0 (left: 24.0)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 * 6 = 6 (left: 1 4 6)\n1 + 4 = 5 (left: 6 5)\n6 * 5 = 30 (left: 30)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 6 = 6 (left: 1 4 6)\n1 * 6 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 6 = 6 (left: 1 4 6)\n1 / 4 = 0.25 (left: 6 0.25)\n6 / 0.25 = 24.0 (left: 24.0)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 1 = 1 (left: 4 6 1)\n6 * 1 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 4 6 2)\n6 * 2 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "4 * 6 = 24 (left: 1 1 24)\n1 + 1 = 2 (left: 24 2)\n24 * 2 = 48 (left: 48)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "4 * 6 = 24 (left: 1 1 24)\n1 + 24 = 25 (left: 1 25)\n25 - 1 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 1 = 1 (left: 4 6 1)\n6 * 1 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 4 6 2)\n4 * 6 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 6 = 6 (left: 1 4 6)\n1 * 4 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 4 6 2)\n4 + 6 = 10 (left: 2 10)\n2 * 10 = 20 (left: 20)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 1 = 1 (left: 4 6 1)\n4 * 1 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 4 6 2)\n4 * 6 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 4 6 2)\n4 * 6 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 4 = 4 (left: 1 6 4)\n1 / 4 = 0.25 (left: 6 0.25)\n6 / 0.25 = 24.0 (left: 24.0)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 1 = 1 (left: 4 6 1)\n4 * 1 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 + 4 = 5 (left: 1 6 5)\n5 - 1 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "6 - 1 = 5 (left: 1 4 5)\n1 + 5 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 + 4 = 5 (left: 1 6 5)\n5 - 1 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 + 6 = 7 (left: 1 4 7)\n7 - 1 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 4 = 5 (left: 1 6 5)\n1 + 5 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 6 = 6 (left: 1 4 6)\n1 * 6 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 1 = 1 (left: 4 6 1)\n6 * 1 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 4 = 5 (left: 1 6 5)\n6 + 5 = 11 (left: 1 11)\n1 + 11 = 12 (left: 12)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 4 6 2)\n4 * 6 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 4 6 2)\n4 * 6 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 1 = 1 (left: 4 6 1)\n1 / 4 = 0.25 (left: 6 0.25)\n6 / 0.25 = 24.0 (left: 24.0)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 + 4 = 5 (left: 1 6 5)\n5 - 1 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 6 = 7 (left: 1 4 7)\n1 + 7 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 + 6 = 7 (left: 1 4 7)\n7 - 1 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "4 * 6 = 24 (left: 1 1 24)\n1 + 24 = 25 (left: 1 25)\n25 - 1 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 4 6 2)\n6 * 2 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 1 = 1 (left: 4 6 1)\n4 * 6 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 6 = 6 (left: 1 4 6)\n1 * 4 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 4 = 4 (left: 1 6 4)\n1 / 4 = 0.25 (left: 6 0.25)\n6 / 0.25 = 24.0 (left: 24.0)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 4 = 4 (left: 1 6 4)\n1 / 4 = 0.25 (left: 6 0.25)\n6 / 0.25 = 24.0 (left: 24.0)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "6 - 1 = 5 (left: 1 4 5)\n1 * 5 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 / 4 = 0.25 (left: 1 6 0.25)\n1 / 0.25 = 4.0 (left: 6 4.0)\n6 * 4.0 = 24.0 (left: 24.0)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 1 = 1 (left: 4 6 1)\n4 * 1 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 6 = 6 (left: 1 4 6)\n1 * 4 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "4 * 6 = 24 (left: 1 1 24)\n1 * 24 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "6 - 1 = 5 (left: 1 4 5)\n1 + 5 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 - 4 = -3 (left: 1 6 -3)\n1 - -3 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 4 6 2)\n4 * 6 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 4 = 4 (left: 1 6 4)\n1 / 4 = 0.25 (left: 6 0.25)\n6 / 0.25 = 24.0 (left: 24.0)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 6 = 7 (left: 1 4 7)\n1 + 7 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 4 = 5 (left: 1 6 5)\n1 / 5 = 0.2 (left: 6 0.2)\n6 / 0.2 = 30.0 (left: 30.0)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 4 6 2)\n4 * 6 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 6 = 6 (left: 1 4 6)\n4 * 6 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 4 6 2)\n4 * 6 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 1 = 1 (left: 4 6 1)\n4 * 1 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "4 + 6 = 10 (left: 1 1 10)\n1 + 1 = 2 (left: 10 2)\n10 + 2 = 12 (left: 12)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 4 6 2)\n4 * 6 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 4 = 5 (left: 1 6 5)\n6 - 1 = 5 (left: 5 5)\n5 + 5 = 10 (left: 10)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 4 6 2)\n4 * 6 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 / 4 = 0.25 (left: 1 6 0.25)\n1 / 0.25 = 4.0 (left: 6 4.0)\n6 * 4.0 = 24.0 (left: 24.0)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "6 / 4 = 1.5 (left: 1 1 1.5)\n1 + 1.5 = 2.5 (left: 1 2.5)\n1 * 2.5 = 2.5 (left: 2.5)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 4 6 2)\n4 * 6 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 - 4 = -3 (left: 1 6 -3)\n1 - -3 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 - 6 = -5 (left: 1 4 -5)\n1 - -5 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "4 * 6 = 24 (left: 1 1 24)\n1 * 24 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 4 = 4 (left: 1 6 4)\n1 / 4 = 0.25 (left: 6 0.25)\n6 / 0.25 = 24.0 (left: 24.0)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 4 6 2)\n4 * 6 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 1 = 1 (left: 4 6 1)\n4 * 6 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 - 1 = 0 (left: 4 6 0)\n6 + 0 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 4 = 5 (left: 1 6 5)\n1 + 5 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 / 4 = 0.25 (left: 1 6 0.25)\n1 / 0.25 = 4.0 (left: 6 4.0)\n6 * 4.0 = 24.0 (left: 24.0)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 4 = 5 (left: 1 6 5)\n6 + 5 = 11 (left: 1 11)\n1 + 11 = 12 (left: 12)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 * 4 = 4 (left: 1 6 4)\n1 / 4 = 0.25 (left: 6 0.25)\n6 / 0.25 = 24.0 (left: 24.0)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 4 6 2)\n6 * 2 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 / 4 = 0.25 (left: 1 6 0.25)\n1 / 0.25 = 4.0 (left: 6 4.0)\n6 * 4.0 = 24.0 (left: 24.0)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 4 6 2)\n2 / 4 = 0.5 (left: 6 0.5)\n6 / 0.5 = 12.0 (left: 12.0)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 4 = 5 (left: 1 6 5)\n1 + 5 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 + 4 = 5 (left: 1 6 5)\n5 - 1 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 - 1 = 0 (left: 4 6 0)\n4 * 6 = 24 (left: 0 24)\n0 + 24 = 24 (left: 24)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 4 = 5 (left: 1 6 5)\n1 + 5 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 0, "input": "1 1 4 6", "reward": 0.001, "generate_data": "1 + 6 = 7 (left: 1 4 7)\n1 + 4 = 5 (left: 7 5)\n7 + 5 = 12 (left: 12)\nThe answer is (1 + 6) + (1 + 4) = 11 (left: 11)"}
{"idx": 0, "input": "1 1 4 6", "reward": 100, "generate_data": "1 + 4 = 5 (left: 1 6 5)\n5 - 1 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n1 - 12 = -11 (left: 11 -11)\n11 - -11 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 1 = 2 (left: 11 11 2)\n11 + 2 = 13 (left: 11 13)\n11 + 13 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n1 + 11 = 12 (left: 11 12)\n11 + 12 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n11 + 12 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n1 + 11 = 12 (left: 11 12)\n11 + 12 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n11 + 12 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 1 = 2 (left: 11 11 2)\n11 + 2 = 13 (left: 11 13)\n11 + 13 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n11 + 12 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 1 11 -10)\n-10 - 1 = -11 (left: 11 -11)\n11 - -11 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n11 * 11 = 121 (left: 1 121)\n1 * 121 = 121 (left: 121)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n1 + 12 = 13 (left: 11 13)\n11 + 13 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 1 22)\n22 - 1 = 21 (left: 1 21)\n1 + 21 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 - 1 = 0 (left: 11 11 0)\n11 + 11 = 22 (left: 0 22)\n0 + 22 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n1 + 11 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n1 + 12 = 13 (left: 11 13)\n11 + 13 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n1 + 11 = 12 (left: 11 12)\n11 + 12 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n11 + 12 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n1 + 11 = 12 (left: 11 12)\n11 + 12 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "11 + 11 = 22 (left: 1 1 22)\n1 + 1 = 2 (left: 22 2)\n22 + 2 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "11 + 11 = 22 (left: 1 1 22)\n1 + 22 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 1 11 -10)\n1 - -10 = 11 (left: 11 11)\n11 + 11 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "11 + 11 = 22 (left: 1 1 22)\n1 + 1 = 2 (left: 22 2)\n22 + 2 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n1 * 12 = 12 (left: 11 12)\n11 + 12 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n11 + 12 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n12 - 1 = 11 (left: 11 11)\n11 + 11 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n11 + 12 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "11 + 11 = 22 (left: 1 1 22)\n1 + 1 = 2 (left: 22 2)\n22 + 2 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n11 + 11 = 22 (left: 1 22)\n1 + 22 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n1 + 11 = 12 (left: 11 12)\n11 + 12 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 1 11 -10)\n-10 - 1 = -11 (left: 11 -11)\n11 - -11 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n1 + 11 = 12 (left: 11 12)\n11 + 12 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 1 = 2 (left: 11 11 2)\n11 + 11 = 22 (left: 2 22)\n2 + 22 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n11 + 11 = 22 (left: 1 22)\n1 + 22 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 11 11 1)\n11 + 11 = 22 (left: 1 22)\n1 + 22 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n11 + 12 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n11 + 12 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n11 + 11 = 22 (left: 1 22)\n1 + 22 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 1 = 2 (left: 11 11 2)\n11 + 11 = 22 (left: 2 22)\n2 + 22 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 1 11 -10)\n1 + 11 = 12 (left: -10 12)\n12 - -10 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 1 11 -10)\n-10 - 11 = -21 (left: 1 -21)\n1 - -21 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 1 11 10)\n1 + 10 = 11 (left: 11 11)\n11 + 11 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 1 = 2 (left: 11 11 2)\n11 + 11 = 22 (left: 2 22)\n2 + 22 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 - 1 = 0 (left: 11 11 0)\n11 + 11 = 22 (left: 0 22)\n0 + 22 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n1 * 11 = 11 (left: 11 11)\n11 + 11 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n11 + 11 = 22 (left: 1 22)\n1 + 22 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n1 + 11 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n12 - 1 = 11 (left: 11 11)\n11 + 11 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n1 + 11 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n11 + 11 = 22 (left: 1 22)\n1 + 22 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n11 + 12 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "11 + 11 = 22 (left: 1 1 22)\n1 + 1 = 2 (left: 22 2)\n22 + 2 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n11 + 12 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n12 - 1 = 11 (left: 11 11)\n11 + 11 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n11 + 12 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "11 + 11 = 22 (left: 1 1 22)\n1 + 22 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n11 + 12 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n11 + 11 = 22 (left: 1 22)\n1 + 22 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 1 11 -10)\n-10 - 1 = -11 (left: 11 -11)\n11 - -11 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n1 * 11 = 11 (left: 11 11)\n11 + 11 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n1 - 11 = -10 (left: 12 -10)\n12 - -10 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n1 * 12 = 12 (left: 11 12)\n11 + 12 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n1 + 11 = 12 (left: 11 12)\n11 + 12 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "11 + 11 = 22 (left: 1 1 22)\n1 + 22 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n1 * 11 = 11 (left: 11 11)\n11 + 11 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "11 + 11 = 22 (left: 1 1 22)\n1 + 1 = 2 (left: 22 2)\n22 + 2 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n11 + 12 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "11 + 11 = 22 (left: 1 1 22)\n1 + 22 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n11 + 11 = 22 (left: 1 22)\n1 + 22 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n1 - 11 = -10 (left: 12 -10)\n12 - -10 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n1 * 12 = 12 (left: 11 12)\n11 + 12 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n12 - 1 = 11 (left: 11 11)\n11 + 11 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n11 - 1 = 10 (left: 11 10)\n11 + 10 = 21 (left: 21)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 1 22)\n22 - 1 = 21 (left: 1 21)\n1 + 21 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n1 + 11 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n1 * 11 = 11 (left: 11 11)\n11 + 11 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 1 11 -10)\n1 + 11 = 12 (left: -10 12)\n12 - -10 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 11 11 1)\n11 * 1 = 11 (left: 11 11)\n11 + 11 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "11 + 11 = 22 (left: 1 1 22)\n1 + 22 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 11 11 2)\n11 * 11 = 121 (left: 2 121)\n121 / 2 = 60.5 (left: 60.5)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n11 + 11 = 22 (left: 1 22)\n1 + 22 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 1 22)\n1 - 22 = -21 (left: 1 -21)\n1 - -21 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 1 22)\n1 * 22 = 22 (left: 1 22)\n1 + 22 = 23 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n1 * 12 = 12 (left: 11 12)\n11 + 12 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "11 + 11 = 22 (left: 1 1 22)\n1 + 1 = 2 (left: 22 2)\n22 + 2 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n1 + 12 = 13 (left: 11 13)\n11 + 13 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n1 + 11 = 12 (left: 11 12)\n11 + 12 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 11 11 1)\n11 + 11 = 22 (left: 1 22)\n1 + 22 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 1 11 -10)\n11 + -10 = 1 (left: 1 1)\n1 * 1 = 1 (left: 1)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n1 + 11 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 11 11 1)\n11 + 11 = 22 (left: 1 22)\n1 + 22 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n11 + 11 = 22 (left: 1 22)\n1 + 22 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n12 - 1 = 11 (left: 11 11)\n11 + 11 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n1 - 11 = -10 (left: 12 -10)\n12 - -10 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 1 22)\n1 - 1 = 0 (left: 22 0)\n22 + 0 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 100, "generate_data": "11 + 11 = 22 (left: 1 1 22)\n1 + 22 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 1 11 -10)\n1 - 11 = -10 (left: -10 -10)\n-10 - -10 = 20 (left: 20)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 1 11 12)\n11 - 1 = 10 (left: 12 10)\n12 + 10 = 22 (left: 22)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n1 + 11 = 12 (left: 11 12)\n11 + 12 = 23 (left: 23)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 1 11 -10)\n1 - 11 = -10 (left: -10 -10)\n-10 - -10 = 20 (left: 20)"}
{"idx": 1, "input": "1 1 11 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 1 11 11)\n1 + 11 = 12 (left: 11 12)\n11 + 12 = 23 (left: 23)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 - 1 = 0 (left: 3 8 0)\n8 + 0 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 * 8 = 8 (left: 1 3 8)\n1 * 3 = 3 (left: 8 3)\n8 * 3 = 24 (left: 24)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 1 8 4)\n8 * 4 = 32 (left: 1 32)\n32 - 1 = 31 (left: 31)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 * 3 = 3 (left: 1 8 3)\n1 * 8 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 3 8 1)\n3 + 8 = 11 (left: 1 11)\n1 + 11 = 12 (left: 12)\n12 (left: 12)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "8 - 3 = 5 (left: 1 1 5)\n1 + 5 = 6 (left: 1 6)\n1 * 6 = 6 (left: 6)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 3 9)\n1 + 9 = 10 (left: 3 10)\n3 + 10 = 13 (left: 13)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 * 3 = 3 (left: 1 8 3)\n1 + 3 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "8 - 1 = 7 (left: 1 3 7)\n1 + 3 = 4 (left: 7 4)\n7 + 4 = 11 (left: 11)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 * 3 = 3 (left: 1 8 3)\n1 * 8 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 * 1 = 1 (left: 3 8 1)\n1 / 8 = 0.125 (left: 3 0.125)\n3 / 0.125 = 24.0 (left: 24.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 1 8 4)\n1 * 8 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 * 1 = 1 (left: 3 8 1)\n8 * 1 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n3 * 8 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 * 3 = 3 (left: 1 8 3)\n1 + 8 = 9 (left: 3 9)\n3 * 9 = 27 (left: 27)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 + 8 = 9 (left: 1 3 9)\n9 - 1 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 / 3 = 0.33333333333333 (left: 1 8 0.33333333333333)\n1 / 0.33333333333333 = 3.0 (left: 8 3.0)\n8 * 3.0 = 24.0 (left: 24.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 / 8 = 0.125 (left: 1 3 0.125)\n1 + 3 = 4 (left: 0.125 4)\n4 / 0.125 = 32.0 (left: 32.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n3 / 2 = 1.5 (left: 8 1.5)\n8 * 1.5 = 12.0 (left: 12.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 * 8 = 8 (left: 1 3 8)\n1 / 8 = 0.125 (left: 3 0.125)\n3 / 0.125 = 24.0 (left: 24.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 * 8 = 8 (left: 1 3 8)\n1 + 8 = 9 (left: 3 9)\n3 * 9 = 27 (left: 27)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 * 8 = 8 (left: 1 3 8)\n1 / 8 = 0.125 (left: 3 0.125)\n3 / 0.125 = 24.0 (left: 24.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 * 8 = 8 (left: 1 3 8)\n3 * 8 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "8 - 1 = 7 (left: 1 3 7)\n1 * 3 = 3 (left: 7 3)\n7 * 3 = 21 (left: 21)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 3 9)\n1 + 3 = 4 (left: 9 4)\n9 * 4 = 36 (left: 36)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 1 3 -7)\n1 - 3 = -2 (left: -7 -2)\n-7 * -2 = 14 (left: 14)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 1 3 -7)\n1 * 3 = 3 (left: -7 3)\n3 - -7 = 10 (left: 3 10)\n3 + 10 = 13 (left: 13)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 3 9)\n1 + 9 = 10 (left: 3 10)\n3 * 10 = 30 (left: 30)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 * 8 = 8 (left: 1 3 8)\n3 * 8 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n8 - 2 = 6 (left: 3 6)\n3 * 6 = 18 (left: 18)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n8 * 2 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n8 + 2 = 10 (left: 3 10)\n3 * 10 = 30 (left: 30)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 / 8 = 0.125 (left: 1 3 0.125)\n3 - 1 = 2 (left: 0.125 2)\n2 / 0.125 = 16.0 (left: 16.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 3 9)\n1 + 9 = 10 (left: 3 10)\n3 * 10 = 30 (left: 30)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 3 9)\n1 + 9 = 10 (left: 3 10)\n3 * 10 = 30 (left: 30)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 / 8 = 0.125 (left: 1 3 0.125)\n1 / 0.125 = 8.0 (left: 3 8.0)\n3 * 8.0 = 24.0 (left: 24.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n8 + 2 = 10 (left: 3 10)\n3 * 10 = 30 (left: 30)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "3 * 8 = 24 (left: 1 1 24)\n1 + 24 = 25 (left: 1 25)\n25 - 1 = 24 (left: 24)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n8 - 2 = 6 (left: 3 6)\n3 * 6 = 18 (left: 18)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n8 * 2 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 1 8 4)\n8 * 4 = 32 (left: 1 32)\n32 - 1 = 31 (left: 31)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 1 8 4)\n8 * 4 = 32 (left: 1 32)\n32 - 1 = 31 (left: 31)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 - 3 = -2 (left: 1 8 -2)\n1 * 8 = 8 (left: -2 8)\n8 - -2 = 10 (left: 10)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 / 8 = 0.125 (left: 1 3 0.125)\n1 + 3 = 4 (left: 0.125 4)\n4 / 0.125 = 32.0 (left: 32.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 / 8 = 0.125 (left: 1 3 0.125)\n1 * 0.125 = 0.125 (left: 3 0.125)\n3 / 0.125 = 24.0 (left: 24.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 3 8 1)\n3 + 1 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 * 8 = 8 (left: 1 3 8)\n1 / 8 = 0.125 (left: 3 0.125)\n3 / 0.125 = 24.0 (left: 24.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n8 * 2 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 / 8 = 0.125 (left: 1 3 0.125)\n1 * 3 = 3 (left: 0.125 3)\n3 / 0.125 = 24.0 (left: 24.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 / 8 = 0.125 (left: 1 3 0.125)\n1 / 0.125 = 8.0 (left: 3 8.0)\n3 * 8.0 = 24.0 (left: 24.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n8 * 2 = 16 (left: 3 16)\n3 + 16 = 19 (left: 19)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 1 8 4)\n1 / 4 = 0.25 (left: 8 0.25)\n8 / 0.25 = 32.0 (left: 32.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n3 / 2 = 1.5 (left: 8 1.5)\n8 * 1.5 = 12.0 (left: 12.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 * 3 = 3 (left: 1 8 3)\n1 / 8 = 0.125 (left: 3 0.125)\n3 / 0.125 = 24.0 (left: 24.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n8 * 2 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 / 8 = 0.125 (left: 1 3 0.125)\n1 / 0.125 = 8.0 (left: 3 8.0)\n3 * 8.0 = 24.0 (left: 24.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n8 * 2 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 1 8 4)\n1 / 8 = 0.125 (left: 4 0.125)\n4 / 0.125 = 32.0 (left: 32.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n3 + 8 = 11 (left: 2 11)\n2 * 11 = 22 (left: 22)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "3 * 8 = 24 (left: 1 1 24)\n1 + 24 = 25 (left: 1 25)\n25 - 1 = 24 (left: 24)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n3 * 8 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "3 * 8 = 24 (left: 1 1 24)\n1 + 1 = 2 (left: 24 2)\n24 * 2 = 48 (left: 48)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n8 * 2 = 16 (left: 3 16)\n3 + 16 = 19 (left: 19)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "3 - 1 = 2 (left: 1 8 2)\n1 / 2 = 0.5 (left: 8 0.5)\n8 / 0.5 = 16.0 (left: 16.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 3 8 1)\n3 + 1 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 1 8 4)\n1 / 8 = 0.125 (left: 4 0.125)\n4 / 0.125 = 32.0 (left: 32.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "3 / 8 = 0.375 (left: 1 1 0.375)\n1 * 0.375 = 0.375 (left: 1 0.375)\n1 / 0.375 = 27.0 (left: 27.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 3 9)\n1 + 9 = 10 (left: 3 10)\n3 + 10 = 13 (left: 13)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 * 8 = 8 (left: 1 3 8)\n1 * 3 = 3 (left: 8 3)\n8 * 3 = 24 (left: 24)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n8 * 2 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 * 8 = 8 (left: 1 3 8)\n3 * 8 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 3 8 1)\n3 - 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 / 8 = 0.125 (left: 1 3 0.125)\n1 / 0.125 = 8.0 (left: 3 8.0)\n3 * 8.0 = 24.0 (left: 24.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n3 + 2 = 5 (left: 8 5)\n8 * 5 = 40 (left: 40)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n3 / 2 = 1.5 (left: 8 1.5)\n8 * 1.5 = 12.0 (left: 12.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 * 8 = 8 (left: 1 3 8)\n1 + 3 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 * 8 = 8 (left: 1 3 8)\n1 * 8 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n3 + 2 = 5 (left: 8 5)\n8 * 5 = 40 (left: 40)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 1 8 4)\n1 * 8 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 - 1 = 0 (left: 3 8 0)\n3 + 0 = 3 (left: 8 3)\n8 * 3 = 24 (left: 24)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "8 - 1 = 7 (left: 1 3 7)\n1 + 7 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 * 3 = 3 (left: 1 8 3)\n1 + 8 = 9 (left: 3 9)\n3 * 9 = 27 (left: 27)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 * 3 = 3 (left: 1 8 3)\n1 / 8 = 0.125 (left: 3 0.125)\n3 / 0.125 = 24.0 (left: 24.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 * 1 = 1 (left: 3 8 1)\n8 * 1 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 3 9)\n1 + 3 = 4 (left: 9 4)\n9 * 4 = 36 (left: 36)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 + 8 = 9 (left: 1 3 9)\n9 - 1 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n3 / 2 = 1.5 (left: 8 1.5)\n8 * 1.5 = 12.0 (left: 12.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 1 8 4)\n1 / 4 = 0.25 (left: 8 0.25)\n8 / 0.25 = 32.0 (left: 32.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 - 3 = -2 (left: 1 8 -2)\n1 - -2 = 3 (left: 8 3)\n8 * 3 = 24 (left: 24)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 * 1 = 1 (left: 3 8 1)\n1 / 8 = 0.125 (left: 3 0.125)\n3 / 0.125 = 24.0 (left: 24.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 * 8 = 8 (left: 1 3 8)\n1 / 8 = 0.125 (left: 3 0.125)\n3 / 0.125 = 24.0 (left: 24.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "3 * 8 = 24 (left: 1 1 24)\n1 * 1 = 1 (left: 24 1)\n24 * 1 = 24 (left: 24)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n3 / 2 = 1.5 (left: 8 1.5)\n8 * 1.5 = 12.0 (left: 12.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n8 * 2 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 / 1 = 1.0 (left: 3 8 1.0)\n8 * 1.0 = 8.0 (left: 3 8.0)\n3 * 8.0 = 24.0 (left: 24.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 / 8 = 0.125 (left: 1 3 0.125)\n1 * 3 = 3 (left: 0.125 3)\n3 / 0.125 = 24.0 (left: 24.0)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 1 8 4)\n1 * 8 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 - 3 = -2 (left: 1 8 -2)\n1 - -2 = 3 (left: 8 3)\n8 * 3 = 24 (left: 24)"}
{"idx": 2, "input": "1 1 3 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 3 8 2)\n3 * 8 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 2, "input": "1 1 3 8", "reward": 100, "generate_data": "1 * 8 = 8 (left: 1 3 8)\n1 * 8 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 + 2 = 3 (left: 8 3)\n8 * 3 = 24 (left: 24)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n8 * 2 = 16 (left: 1 16)\n1 * 16 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 1 1 -7)\n1 + 1 = 2 (left: -7 2)\n2 - -7 = 9 (left: 9)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n8 * 2 = 16 (left: 1 16)\n1 * 16 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 8 = 0.125 (left: 2 0.125)\n2 / 0.125 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 * 8 = 8 (left: 2 8)\n2 * 8 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 8 = 8 (left: 1 1 8)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 1 9)\n1 + 1 = 2 (left: 9 2)\n9 * 2 = 18 (left: 18)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 / 8 = 0.125 (left: 1 1 0.125)\n1 + 1 = 2 (left: 0.125 2)\n2 / 0.125 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 1 9)\n9 / 1 = 9.0 (left: 1 9.0)\n1 * 9.0 = 9.0 (left: 9.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 8 = 0.125 (left: 2 0.125)\n2 / 0.125 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 1 9)\n1 + 1 = 2 (left: 9 2)\n9 * 2 = 18 (left: 18)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 1 9)\n1 + 1 = 2 (left: 9 2)\n9 * 2 = 18 (left: 18)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 8 = 8 (left: 1 1 8)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 2 = 0.5 (left: 8 0.5)\n8 / 0.5 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 + 2 = 3 (left: 8 3)\n8 * 3 = 24 (left: 24)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 / 8 = 0.125 (left: 1 1 0.125)\n1 + 1 = 2 (left: 0.125 2)\n2 / 0.125 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 1 9)\n1 + 1 = 2 (left: 9 2)\n9 * 2 = 18 (left: 18)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 1 9)\n1 * 1 = 1 (left: 9 1)\n9 * 1 = 9 (left: 9)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 1 9)\n1 + 1 = 2 (left: 9 2)\n9 * 2 = 18 (left: 18)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 8 = 8 (left: 1 1 8)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 1 9)\n1 + 9 = 10 (left: 1 10)\n1 + 10 = 11 (left: 11)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 8 = 8 (left: 1 1 8)\n1 * 8 = 8 (left: 1 8)\n1 * 8 = 8 (left: 8)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 8 = 8 (left: 1 1 8)\n1 + 8 = 9 (left: 1 9)\n1 * 9 = 9 (left: 9)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n8 * 2 = 16 (left: 1 16)\n1 * 16 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 8 = 0.125 (left: 2 0.125)\n2 / 0.125 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 2 = 0.5 (left: 8 0.5)\n8 / 0.5 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 1 9)\n1 * 1 = 1 (left: 9 1)\n9 + 1 = 10 (left: 10)"}
{"idx": 3, "input": "1 1 1 8", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 + 2 = 3 (left: 8 3)\n8 * 3 = 24 (left: 24)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 1 1 -7)\n1 * -7 = -7 (left: 1 -7)\n1 - -7 = 8 (left: 8)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 / 8 = 0.125 (left: 1 1 0.125)\n1 * 0.125 = 0.125 (left: 1 0.125)\n1 / 0.125 = 8.0 (left: 8.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 2 = 0.5 (left: 8 0.5)\n8 / 0.5 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 2 = 0.5 (left: 8 0.5)\n8 / 0.5 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 8 = 8 (left: 1 1 8)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n8 * 2 = 16 (left: 1 16)\n1 * 16 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 + 8 = 9 (left: 2 9)\n2 * 9 = 18 (left: 18)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 2 = 0.5 (left: 8 0.5)\n8 / 0.5 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 2 = 0.5 (left: 8 0.5)\n8 / 0.5 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 8 = 8 (left: 1 1 8)\n1 * 8 = 8 (left: 1 8)\n1 * 8 = 8 (left: 8)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 * 2 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 2 = 0.5 (left: 8 0.5)\n8 / 0.5 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 1 9)\n1 * 1 = 1 (left: 9 1)\n9 + 1 = 10 (left: 10)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 8 = 0.125 (left: 2 0.125)\n2 / 0.125 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 1 9)\n1 * 1 = 1 (left: 9 1)\n9 + 1 = 10 (left: 10)"}
{"idx": 3, "input": "1 1 1 8", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 + 2 = 3 (left: 8 3)\n8 * 3 = 24 (left: 24)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 8 = 8 (left: 1 1 8)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 8 = 0.125 (left: 2 0.125)\n2 / 0.125 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 + 2 = 3 (left: 8 3)\n8 * 3 = 24 (left: 24)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 1 9)\n1 + 1 = 2 (left: 9 2)\n9 * 2 = 18 (left: 18)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 * 2 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n8 * 2 = 16 (left: 1 16)\n1 * 16 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 / 8 = 0.125 (left: 1 1 0.125)\n1 + 1 = 2 (left: 0.125 2)\n2 / 0.125 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 8 = 8 (left: 1 1 8)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 / 1 = 1.0 (left: 1 8 1.0)\n1 + 1.0 = 2.0 (left: 8 2.0)\n8 * 2.0 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 2 = 0.5 (left: 8 0.5)\n8 / 0.5 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 8 = 0.125 (left: 2 0.125)\n2 / 0.125 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 2 = 0.5 (left: 8 0.5)\n8 / 0.5 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 + 8 = 9 (left: 2 9)\n2 * 9 = 18 (left: 18)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 8 1)\n1 * 8 = 8 (left: 1 8)\n1 * 8 = 8 (left: 8)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 8 = 0.125 (left: 2 0.125)\n2 / 0.125 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 1 9)\n1 + 1 = 2 (left: 9 2)\n9 * 2 = 18 (left: 18)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 2 = 0.5 (left: 8 0.5)\n8 / 0.5 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 1 9)\n1 + 1 = 2 (left: 9 2)\n9 * 2 = 18 (left: 18)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 8 = 0.125 (left: 2 0.125)\n2 / 0.125 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 1 9)\n1 * 9 = 9 (left: 1 9)\n1 * 9 = 9 (left: 9)"}
{"idx": 3, "input": "1 1 1 8", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 + 2 = 3 (left: 8 3)\n8 * 3 = 24 (left: 24)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 * 8 = 8 (left: 2 8)\n2 * 8 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 / 8 = 0.125 (left: 1 1 0.125)\n1 + 1 = 2 (left: 0.125 2)\n2 / 0.125 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 2 = 0.5 (left: 8 0.5)\n8 / 0.5 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 2 = 0.5 (left: 8 0.5)\n8 / 0.5 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n8 * 2 = 16 (left: 1 16)\n1 * 16 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 8 = 8 (left: 1 1 8)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 8 = 8 (left: 1 1 8)\n1 * 1 = 1 (left: 8 1)\n8 * 1 = 8 (left: 8)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 1 9)\n1 + 9 = 10 (left: 1 10)\n10 - 1 = 9 (left: 9)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 / 1 = 1.0 (left: 1 8 1.0)\n1 + 1.0 = 2.0 (left: 8 2.0)\n8 * 2.0 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 1 1 9)\n1 + 1 = 2 (left: 9 2)\n9 * 2 = 18 (left: 18)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n8 * 2 = 16 (left: 1 16)\n1 * 16 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 2 = 0.5 (left: 8 0.5)\n8 / 0.5 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 + 2 = 3 (left: 8 3)\n8 * 3 = 24 (left: 24)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 / 8 = 0.125 (left: 1 1 0.125)\n1 + 1 = 2 (left: 0.125 2)\n2 / 0.125 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 8 = 0.125 (left: 2 0.125)\n2 / 0.125 = 16.0 (left: 16.0)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 * 8 = 8 (left: 2 8)\n2 * 8 = 16 (left: 16)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 8 = 8 (left: 1 1 8)\n1 * 1 = 1 (left: 8 1)\n8 * 1 = 8 (left: 8)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 * 8 = 8 (left: 1 1 8)\n1 + 8 = 9 (left: 1 9)\n1 * 9 = 9 (left: 9)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 + 8 = 9 (left: 2 9)\n2 * 9 = 18 (left: 18)"}
{"idx": 3, "input": "1 1 1 8", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 8 2)\n1 / 2 = 0.5 (left: 8 0.5)\n8 / 0.5 = 16.0 (left: 16.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 / 6 = 1.0 (left: 6 6 1.0)\n6 + 6 = 12 (left: 1.0 12)\n1.0 * 12 = 12.0 (left: 12.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 - 6 = 0 (left: 6 6 0)\n6 + 6 = 12 (left: 0 12)\n0 + 12 = 12 (left: 12)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n6 * 6 = 36 (left: 36 36)\n36 - 36 = 0 (left: 0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 - 6 = 0 (left: 6 6 0)\n6 + 0 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 * 6 = 36 (left: 12 36)\n36 - 12 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n6 / 6 = 1.0 (left: 36 1.0)\n36 - 1.0 = 35.0 (left: 35.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 * 6 = 36 (left: 12 36)\n36 - 12 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 - 6 = 0 (left: 6 6 0)\n6 + 6 = 12 (left: 0 12)\n0 + 12 = 12 (left: 12)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 * 6 = 36 (left: 12 36)\n36 - 12 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 - 6 = 0 (left: 6 6 0)\n6 + 0 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n6 / 6 = 1.0 (left: 36 1.0)\n36 - 1.0 = 35.0 (left: 35.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 - 6 = 0 (left: 6 6 0)\n6 + 6 = 12 (left: 0 12)\n0 + 12 = 12 (left: 12)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 - 6 = 0 (left: 6 6 0)\n6 * 6 = 36 (left: 0 36)\n0 + 36 = 36 (left: 36)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 + 12 = 18 (left: 6 18)\n6 + 18 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 + 12 = 18 (left: 6 18)\n6 + 18 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 / 6 = 1.0 (left: 6 6 1.0)\n6 * 1.0 = 6.0 (left: 6 6.0)\n6 + 6.0 = 12.0 (left: 12.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 - 6 = 0 (left: 6 6 0)\n6 + 0 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n6 + 6 = 12 (left: 36 12)\n36 - 12 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 / 6 = 1.0 (left: 6 6 1.0)\n6 * 1.0 = 6.0 (left: 6 6.0)\n6 + 6.0 = 12.0 (left: 12.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 + 6 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n36 - 6 = 30 (left: 6 30)\n30 - 6 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 - 6 = 0 (left: 6 6 0)\n6 + 0 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n12 - 6 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 + 6 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n6 + 6 = 12 (left: 36 12)\n36 - 12 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n36 - 6 = 30 (left: 6 30)\n30 - 6 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 * 6 = 36 (left: 12 36)\n36 - 12 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n6 + 6 = 12 (left: 36 12)\n36 - 12 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 / 6 = 1.0 (left: 6 6 1.0)\n6 * 6 = 36 (left: 1.0 36)\n36 / 1.0 = 36.0 (left: 36.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 / 6 = 1.0 (left: 12 1.0)\n12 * 1.0 = 12.0 (left: 12.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n6 - 36 = -30 (left: 6 -30)\n6 - -30 = 36 (left: 36)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 / 6 = 1.0 (left: 12 1.0)\n12 * 1.0 = 12.0 (left: 12.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n36 / 6 = 6.0 (left: 6 6.0)\n6 + 6 = 12 (left: 6.0 12)\n6.0 + 12 = 18 (left: 18)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 - 6 = 0 (left: 6 6 0)\n6 * 0 = 0 (left: 6 0)\n6 + 0 = 6 (left: 6)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 / 6 = 1.0 (left: 12 1.0)\n12 * 1.0 = 12.0 (left: 12.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 + 12 = 18 (left: 6 18)\n6 + 18 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 - 6 = 0 (left: 6 6 0)\n6 + 6 = 12 (left: 0 12)\n0 + 12 = 12 (left: 12)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 / 6 = 1.0 (left: 6 6 1.0)\n6 * 1.0 = 6.0 (left: 6 6.0)\n6 + 6.0 = 12.0 (left: 12.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n6 / 6 = 1.0 (left: 36 1.0)\n36 - 1.0 = 35.0 (left: 35.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 / 6 = 1.0 (left: 6 6 1.0)\n6 + 6 = 12 (left: 1.0 12)\n1.0 * 12 = 12.0 (left: 12.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 / 6 = 1.0 (left: 6 6 1.0)\n6 + 6 = 12 (left: 1.0 12)\n1.0 * 12 = 12.0 (left: 12.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n36 / 6 = 6.0 (left: 6 6.0)\n6 + 6 = 12 (left: 6.0 12)\n6.0 + 12 = 18 (left: 18)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 / 6 = 1.0 (left: 6 6 1.0)\n6 + 6 = 12 (left: 1.0 12)\n1.0 * 12 = 12.0 (left: 12.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n12 - 6 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 + 12 = 18 (left: 6 18)\n6 + 18 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 / 6 = 1.0 (left: 6 6 1.0)\n6 + 6 = 12 (left: 1.0 12)\n1.0 * 12 = 12.0 (left: 12.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 * 12 = 72 (left: 6 72)\n72 / 6 = 12.0 (left: 12.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n6 * 36 = 216 (left: 6 216)\n216 / 6 = 36.0 (left: 36.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n12 - 6 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 / 6 = 1.0 (left: 6 6 1.0)\n6 * 1.0 = 6.0 (left: 6 6.0)\n6 + 6.0 = 12.0 (left: 12.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n6 * 36 = 216 (left: 6 216)\n216 / 6 = 36.0 (left: 36.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 - 6 = 0 (left: 6 6 0)\n6 + 6 = 12 (left: 0 12)\n0 + 12 = 12 (left: 12)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n36 / 6 = 6.0 (left: 6 6.0)\n6 + 6 = 12 (left: 6.0 12)\n6.0 + 12 = 18 (left: 18)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 + 12 = 18 (left: 6 18)\n6 + 18 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 / 6 = 1.0 (left: 6 6 1.0)\n6 * 6 = 36 (left: 1.0 36)\n36 / 1.0 = 36.0 (left: 36.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n36 / 6 = 6.0 (left: 6 6.0)\n6 + 6 = 12 (left: 6.0 12)\n6.0 + 12 = 18 (left: 18)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 - 6 = 0 (left: 6 6 0)\n6 + 6 = 12 (left: 0 12)\n0 + 12 = 12 (left: 12)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 + 12 = 18 (left: 6 18)\n6 + 18 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 / 6 = 1.0 (left: 6 6 1.0)\n6 * 6 = 36 (left: 1.0 36)\n36 / 1.0 = 36.0 (left: 36.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 * 6 = 36 (left: 12 36)\n36 - 12 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 - 6 = 0 (left: 6 6 0)\n6 * 6 = 36 (left: 0 36)\n0 + 36 = 36 (left: 36)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 + 6 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 / 6 = 1.0 (left: 6 6 1.0)\n6 * 6 = 36 (left: 1.0 36)\n36 / 1.0 = 36.0 (left: 36.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n6 - 36 = -30 (left: 6 -30)\n6 - -30 = 36 (left: 36)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n36 / 6 = 6.0 (left: 6 6.0)\n6 + 6 = 12 (left: 6.0 12)\n6.0 + 12 = 18 (left: 18)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 / 6 = 1.0 (left: 12 1.0)\n12 * 1.0 = 12.0 (left: 12.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n36 - 6 = 30 (left: 6 30)\n30 - 6 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 + 12 = 18 (left: 6 18)\n6 + 18 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 / 6 = 1.0 (left: 6 6 1.0)\n6 * 6 = 36 (left: 1.0 36)\n36 - 1.0 = 35.0 (left: 35.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 - 6 = 0 (left: 6 6 0)\n6 * 6 = 36 (left: 0 36)\n0 + 36 = 36 (left: 36)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n6 / 6 = 1.0 (left: 36 1.0)\n36 - 1.0 = 35.0 (left: 35.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 - 6 = 0 (left: 6 6 0)\n6 + 0 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 - 6 = 0 (left: 6 6 0)\n6 + 6 = 12 (left: 0 12)\n0 + 12 = 12 (left: 12)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n36 / 6 = 6.0 (left: 6 6.0)\n6 + 6 = 12 (left: 6.0 12)\n6.0 + 12 = 18 (left: 18)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n12 / 6 = 2.0 (left: 6 2.0)\n6 * 2.0 = 12.0 (left: 12.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 - 6 = 0 (left: 6 6 0)\n6 + 6 = 12 (left: 0 12)\n0 + 12 = 12 (left: 12)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n36 - 6 = 30 (left: 6 30)\n30 - 6 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 - 6 = 0 (left: 6 6 0)\n6 * 0 = 0 (left: 6 0)\n6 + 0 = 6 (left: 6)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 + 12 = 18 (left: 6 18)\n6 + 18 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 + 12 = 18 (left: 6 18)\n6 + 18 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 / 6 = 1.0 (left: 6 6 1.0)\n6 * 6 = 36 (left: 1.0 36)\n36 / 1.0 = 36.0 (left: 36.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "12 / 6 = 2.0 (left: 6 6 2.0)\n6 + 6 = 12 (left: 2.0 12)\n2.0 * 12 = 24.0 (left: 24.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 / 6 = 1.0 (left: 6 6 1.0)\n6 + 6 = 12 (left: 1.0 12)\n1.0 * 12 = 12.0 (left: 12.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n6 / 6 = 1.0 (left: 36 1.0)\n36 - 1.0 = 35.0 (left: 35.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 * 6 = 36 (left: 12 36)\n36 - 12 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 / 6 = 1.0 (left: 6 6 1.0)\n6 * 6 = 36 (left: 1.0 36)\n36 / 1.0 = 36.0 (left: 36.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 - 6 = 0 (left: 6 6 0)\n6 + 6 = 12 (left: 0 12)\n0 + 12 = 12 (left: 12)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 / 6 = 1.0 (left: 6 6 1.0)\n6 + 6 = 12 (left: 1.0 12)\n1.0 * 12 = 12.0 (left: 12.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 - 6 = 0 (left: 6 6 0)\n6 + 0 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n12 - 6 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n12 - 6 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 / 6 = 1.0 (left: 6 6 1.0)\n6 * 1.0 = 6.0 (left: 6 6.0)\n6 + 6.0 = 12.0 (left: 12.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 100, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 + 6 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n6 * 36 = 216 (left: 6 216)\n216 / 6 = 36.0 (left: 36.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 + 6 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 / 6 = 1.0 (left: 6 6 1.0)\n6 + 1.0 = 7.0 (left: 6 7.0)\n6 * 7.0 = 42.0 (left: 42.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 - 6 = 0 (left: 6 6 0)\n6 + 0 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n6 - 6 = 0 (left: 36 0)\n36 - 0 = 36 (left: 36)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 * 6 = 36 (left: 6 6 36)\n6 / 6 = 1.0 (left: 36 1.0)\n36 - 1.0 = 35.0 (left: 35.0)"}
{"idx": 4, "input": "6 6 6 6", "reward": 0.001, "generate_data": "6 + 6 = 12 (left: 6 6 12)\n6 / 6 = 1.0 (left: 12 1.0)\n12 * 1.0 = 12.0 (left: 12.0)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 2 12 1)\n12 * 1 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 2 12 2)\n2 * 12 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "2 - 1 = 1 (left: 1 12 1)\n1 + 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 12 = 12 (left: 1 2 12)\n1 / 2 = 0.5 (left: 12 0.5)\n12 / 0.5 = 24.0 (left: 24.0)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 2 12 2)\n2 * 12 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 12 = 13 (left: 1 2 13)\n1 - 13 = -12 (left: 2 -12)\n2 - -12 = 14 (left: 14)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 12 = 13 (left: 1 2 13)\n13 - 2 = 11 (left: 1 11)\n1 + 11 = 12 (left: 12)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 2 12 2)\n2 * 12 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 2 12 1)\n1 / 2 = 0.5 (left: 12 0.5)\n12 / 0.5 = 24.0 (left: 24.0)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 2 12 2)\n2 * 12 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 2 12 1)\n1 / 2 = 0.5 (left: 12 0.5)\n12 / 0.5 = 24.0 (left: 24.0)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 * 2 = 2 (left: 1 12 2)\n1 + 2 = 3 (left: 12 3)\n12 * 3 = 36 (left: 36)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 / 2 = 0.5 (left: 1 12 0.5)\n1 * 12 = 12 (left: 0.5 12)\n12 / 0.5 = 24.0 (left: 24.0)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 2 = 2 (left: 1 12 2)\n1 / 2 = 0.5 (left: 12 0.5)\n12 / 0.5 = 24.0 (left: 24.0)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 2 12 2)\n2 + 12 = 14 (left: 2 14)\n2 * 14 = 28 (left: 28)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 * 2 = 2 (left: 1 12 2)\n1 - 2 = -1 (left: 12 -1)\n12 - -1 = 13 (left: 13)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 2 12 2)\n2 + 12 = 14 (left: 2 14)\n2 * 14 = 28 (left: 28)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 2 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 1 12 3)\n1 * 12 = 12 (left: 3 12)\n3 * 12 = 36 (left: 36)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 2 12 2)\n2 * 12 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 1 12 3)\n12 * 3 = 36 (left: 1 36)\n36 - 1 = 35 (left: 35)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "12 / 2 = 6.0 (left: 1 1 6.0)\n1 + 1 = 2 (left: 6.0 2)\n6.0 * 2 = 12.0 (left: 12.0)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 * 12 = 12 (left: 1 2 12)\n1 + 2 = 3 (left: 12 3)\n12 * 3 = 36 (left: 36)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 * 12 = 12 (left: 1 2 12)\n2 - 1 = 1 (left: 12 1)\n12 * 1 = 12 (left: 12)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "12 - 1 = 11 (left: 1 2 11)\n1 + 2 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 12 = 12 (left: 1 2 12)\n2 * 12 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 12 = 12 (left: 1 2 12)\n1 / 2 = 0.5 (left: 12 0.5)\n12 / 0.5 = 24.0 (left: 24.0)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "2 * 12 = 24 (left: 1 1 24)\n1 * 1 = 1 (left: 24 1)\n24 * 1 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 2 = 2 (left: 1 12 2)\n1 * 2 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 1 12 3)\n12 * 3 = 36 (left: 1 36)\n36 - 1 = 35 (left: 35)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 2 12 1)\n2 * 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 * 2 = 2 (left: 1 12 2)\n1 + 2 = 3 (left: 12 3)\n12 * 3 = 36 (left: 36)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 12 = 12 (left: 1 2 12)\n1 * 2 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 1 12 3)\n1 / 3 = 0.33333 (left: 12 0.33333)\n12 / 0.33333 = 36.0 (left: 36.0)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 + 12 = 13 (left: 1 2 13)\n13 - 1 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 12 = 12 (left: 1 2 12)\n1 * 2 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 2 12 1)\n2 * 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "12 - 1 = 11 (left: 1 2 11)\n1 * 11 = 11 (left: 2 11)\n2 * 11 = 22 (left: 22)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 2 12 1)\n1 / 2 = 0.5 (left: 12 0.5)\n12 / 0.5 = 24.0 (left: 24.0)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 2 12 1)\n12 * 1 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 - 1 = 0 (left: 2 12 0)\n2 + 0 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 2 12 2)\n2 * 12 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 12 = 13 (left: 1 2 13)\n1 + 2 = 3 (left: 13 3)\n13 + 3 = 16 (left: 16)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 2 12 2)\n2 * 12 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 1 12 3)\n1 * 12 = 12 (left: 3 12)\n3 * 12 = 36 (left: 36)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 2 12 1)\n2 * 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 12 = 12 (left: 1 2 12)\n1 / 2 = 0.5 (left: 12 0.5)\n12 / 0.5 = 24.0 (left: 24.0)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 12 = 13 (left: 1 2 13)\n2 * 13 = 26 (left: 1 26)\n26 - 1 = 25 (left: 25)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 12 = 13 (left: 1 2 13)\n2 + 13 = 15 (left: 1 15)\n1 * 15 = 15 (left: 15)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "2 * 12 = 24 (left: 1 1 24)\n1 + 24 = 25 (left: 1 25)\n25 - 1 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 2 = 2 (left: 1 12 2)\n1 * 12 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 12 = 13 (left: 1 2 13)\n1 + 13 = 14 (left: 2 14)\n2 + 14 = 16 (left: 16)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 2 12 1)\n12 * 1 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 * 2 = 2 (left: 1 12 2)\n1 + 2 = 3 (left: 12 3)\n12 * 3 = 36 (left: 36)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 2 12 1)\n2 * 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 2 12 1)\n12 * 1 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 2 12 1)\n2 * 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "2 * 12 = 24 (left: 1 1 24)\n1 + 24 = 25 (left: 1 25)\n25 - 1 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 12 = 13 (left: 1 2 13)\n1 + 13 = 14 (left: 2 14)\n2 * 14 = 28 (left: 28)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 1 12 3)\n12 * 3 = 36 (left: 1 36)\n36 - 1 = 35 (left: 35)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "12 - 1 = 11 (left: 1 2 11)\n1 + 2 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 12 = 13 (left: 1 2 13)\n1 + 2 = 3 (left: 13 3)\n13 + 3 = 16 (left: 16)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 2 12 2)\n2 + 12 = 14 (left: 2 14)\n2 * 14 = 28 (left: 28)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 2 12 1)\n2 * 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 * 12 = 12 (left: 1 2 12)\n2 - 1 = 1 (left: 12 1)\n12 * 1 = 12 (left: 12)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 2 12 2)\n2 * 12 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 - 1 = 0 (left: 2 12 0)\n12 + 0 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 1 12 3)\n12 * 3 = 36 (left: 1 36)\n36 - 1 = 35 (left: 35)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 2 12 2)\n2 * 12 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 - 2 = -1 (left: 1 12 -1)\n1 - -1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 / 2 = 0.5 (left: 1 12 0.5)\n12 * 0.5 = 6.0 (left: 1 6.0)\n1 * 6.0 = 6.0 (left: 6.0)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 2 12 1)\n2 * 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 2 12 1)\n1 / 2 = 0.5 (left: 12 0.5)\n12 / 0.5 = 24.0 (left: 24.0)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 + 12 = 13 (left: 1 2 13)\n13 - 1 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 2 = 2 (left: 1 12 2)\n12 * 2 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 12 = 12 (left: 1 2 12)\n2 * 12 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "2 * 12 = 24 (left: 1 1 24)\n1 + 24 = 25 (left: 1 25)\n25 - 1 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 + 12 = 13 (left: 1 2 13)\n13 - 1 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "2 * 12 = 24 (left: 1 1 24)\n1 / 24 = 0.0416666666666666 (left: 1 0.0416666666666666)\n1 / 0.0416666666666666 = 24.0 (left: 24.0)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 2 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 2 12 2)\n2 + 12 = 14 (left: 2 14)\n2 * 14 = 28 (left: 28)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 2 12 2)\n2 + 2 = 4 (left: 12 4)\n12 * 4 = 48 (left: 48)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 + 12 = 13 (left: 1 2 13)\n13 - 1 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 + 12 = 13 (left: 1 2 13)\n13 - 1 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 2 = 2 (left: 1 12 2)\n12 * 2 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 12 = 12 (left: 1 2 12)\n1 / 2 = 0.5 (left: 12 0.5)\n12 / 0.5 = 24.0 (left: 24.0)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 2 12 1)\n2 * 12 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 2 = 2 (left: 1 12 2)\n1 / 2 = 0.5 (left: 12 0.5)\n12 / 0.5 = 24.0 (left: 24.0)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 1 12 3)\n12 / 3 = 4.0 (left: 1 4.0)\n1 * 4.0 = 4.0 (left: 4.0)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 12 = 12 (left: 1 2 12)\n1 * 2 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 12 = 13 (left: 1 2 13)\n2 + 13 = 15 (left: 1 15)\n1 * 15 = 15 (left: 15)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 12 = 12 (left: 1 2 12)\n2 * 12 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "2 * 12 = 24 (left: 1 1 24)\n1 / 24 = 0.04166666666666666 (left: 1 0.0416666666666666)\n1 / 0.04166666666666666 = 24.0 (left: 24.0)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 / 12 = 0.083333333333333\n2 / 0.083333333333333 = 24.0"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "12 - 1 = 11 (left: 1 2 11)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 * 12 = 12 (left: 1 2 12)\n12 - 1 = 11 (left: 2 11)\n2 * 11 = 22 (left: 22)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 1 12 3)\n1 * 3 = 3 (left: 12 3)\n12 * 3 = 36 (left: 36)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 * 12 = 12 (left: 1 2 12)\n1 * 2 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 100, "generate_data": "1 - 1 = 0 (left: 2 12 0)\n2 + 0 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 5, "input": "1 1 2 12", "reward": 0.001, "generate_data": "1 - 2 = -1 (left: 1 12 -1)\n1 + 12 = 13 (left: -1 13)\n13 - -1 = 14 (left: 14)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "1 * 2 = 2 (left: 2 6 2)\n2 * 6 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 * 6 = 12 (left: 1 2 12)\n2 / 12 = 0.16 (left: 1 0.16)\n1 / 0.16 = 6.25 (left: 6.25)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 + 6 = 8 (left: 1 2 8)\n2 * 8 = 16 (left: 1 16)\n1 * 16 = 16 (left: 16)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 * 2 = 4 (left: 1 6 4)\n1 * 4 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 + 2 = 4 (left: 1 6 4)\n1 * 6 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 + 6 = 8 (left: 1 2 8)\n1 + 2 = 3 (left: 8 3)\n8 * 3 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 * 6 = 12 (left: 1 2 12)\n1 / 2 = 0.5 (left: 12 0.5)\n12 / 0.5 = 24.0 (left: 24.0)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 * 6 = 12 (left: 1 2 12)\n1 + 12 = 13 (left: 2 13)\n2 * 13 = 26 (left: 26)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "1 * 6 = 6 (left: 2 2 6)\n2 * 6 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 / 2 = 1.0 (left: 1 6 1.0)\n1 * 1.0 = 1.0 (left: 6 1.0)\n6 * 1.0 = 6.0 (left: 6.0)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 / 2 = 1.0 (left: 1 6 1.0)\n6 * 1.0 = 6.0 (left: 1 6.0)\n1 * 6.0 = 6.0 (left: 6.0)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 * 6 = 12 (left: 1 2 12)\n1 + 2 = 3 (left: 12 3)\n12 * 3 = 36 (left: 36)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 * 6 = 12 (left: 1 2 12)\n1 + 2 = 3 (left: 12 3)\n12 * 3 = 36 (left: 36)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "1 * 2 = 2 (left: 2 6 2)\n2 * 6 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "1 * 2 = 2 (left: 2 6 2)\n2 + 2 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 / 2 = 1.0 (left: 1 6 1.0)\n1 + 1.0 = 2.0 (left: 6 2.0)\n6 * 2.0 = 12.0 (left: 12.0)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 / 2 = 1.0 (left: 1 6 1.0)\n1 + 1.0 = 2.0 (left: 6 2.0)\n6 * 2.0 = 12.0 (left: 12.0)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "1 * 6 = 6 (left: 2 2 6)\n2 * 6 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 + 6 = 8 (left: 1 2 8)\n1 * 2 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 * 6 = 12 (left: 1 2 12)\n2 * 12 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 * 6 = 12 (left: 1 2 12)\n2 * 12 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "1 * 2 = 2 (left: 2 6 2)\n2 + 2 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 - 1 = 1 (left: 2 6 1)\n2 * 1 = 2 (left: 6 2)\n6 * 2 = 12 (left: 12)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 * 6 = 12 (left: 1 2 12)\n1 * 2 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 / 2 = 1.0 (left: 1 6 1.0)\n1 + 1.0 = 2.0 (left: 6 2.0)\n6 * 2.0 = 12.0 (left: 12.0)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "1 * 2 = 2 (left: 2 6 2)\n2 + 2 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 2 6 3)\n2 * 6 = 12 (left: 3 12)\n3 * 12 = 36 (left: 36)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "1 * 6 = 6 (left: 2 2 6)\n2 + 2 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 * 6 = 12 (left: 1 2 12)\n1 * 2 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "1 * 6 = 6 (left: 2 2 6)\n2 * 6 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 + 6 = 8 (left: 1 2 8)\n1 + 2 = 3 (left: 8 3)\n8 * 3 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 / 2 = 1.0 (left: 1 6 1.0)\n1 + 1.0 = 2.0 (left: 6 2.0)\n6 * 2.0 = 12.0 (left: 12.0)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 * 6 = 12 (left: 1 2 12)\n1 * 2 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 + 2 = 4 (left: 1 6 4)\n6 * 4 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "1 * 2 = 2 (left: 2 6 2)\n2 + 2 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 - 1 = 1 (left: 2 6 1)\n2 * 6 = 12 (left: 1 12)\n1 * 12 = 12 (left: 12)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 - 1 = 1 (left: 2 6 1)\n2 * 6 = 12 (left: 1 12)\n1 * 12 = 12 (left: 12)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "1 * 2 = 2 (left: 2 6 2)\n2 + 2 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 / 6 = 0.33333 (left: 1 2 0.33333)\n1 + 2 = 3 (left: 0.33333 3)\n3 / 0.33333 = 9.0 (left: 9.0)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 * 2 = 4 (left: 1 6 4)\n4 - 1 = 3 (left: 6 3)\n6 * 3 = 18 (left: 18)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 - 1 = 1 (left: 2 6 1)\n2 + 1 = 3 (left: 6 3)\n6 * 3 = 18 (left: 18)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 * 2 = 4 (left: 1 6 4)\n6 * 4 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "1 * 2 = 2 (left: 2 6 2)\n2 + 2 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 * 6 = 12 (left: 1 2 12)\n1 * 2 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "1 * 6 = 6 (left: 2 2 6)\n2 + 6 = 8 (left: 2 8)\n2 * 8 = 16 (left: 16)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "1 * 2 = 2 (left: 2 6 2)\n2 + 2 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 + 6 = 8 (left: 1 2 8)\n1 / 2 = 0.5 (left: 8 0.5)\n8 * 0.5 = 4.0 (left: 4.0)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 - 1 = 1 (left: 2 6 1)\n6 * 1 = 6 (left: 2 6)\n2 * 6 = 12 (left: 12)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 + 2 = 4 (left: 1 6 4)\n1 / 4 = 0.25 (left: 6 0.25)\n6 / 0.25 = 24.0 (left: 24.0)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 2 6 3)\n2 * 3 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 / 2 = 1.0 (left: 1 6 1.0)\n1 + 1.0 = 2.0 (left: 6 2.0)\n6 * 2.0 = 12.0 (left: 12.0)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "1 * 6 = 6 (left: 2 2 6)\n2 + 2 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 - 1 = 1 (left: 2 6 1)\n2 * 1 = 2 (left: 6 2)\n6 * 2 = 12 (left: 12)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "1 + 6 = 7 (left: 2 2 7)\n2 + 2 = 4 (left: 7 4)\n7 + 4 = 11 (left: 11)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 / 2 = 1.0 (left: 1 6 1.0)\n1 + 1.0 = 2.0 (left: 6 2.0)\n6 * 2.0 = 12.0 (left: 12.0)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "1 + 6 = 7 (left: 2 2 7)\n2 + 7 = 9 (left: 2 9)\n2 * 9 = 18 (left: 18)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "1 * 2 = 2 (left: 2 6 2)\n2 + 2 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 / 2 = 1.0 (left: 1 6 1.0)\n1 + 1.0 = 2.0 (left: 6 2.0)\n6 * 2.0 = 12.0 (left: 12.0)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 + 6 = 8 (left: 1 2 8)\n1 + 2 = 3 (left: 8 3)\n8 * 3 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 + 6 = 8 (left: 1 2 8)\n1 + 2 = 3 (left: 8 3)\n8 * 3 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 * 6 = 12 (left: 1 2 12)\n1 * 2 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 - 1 = 1 (left: 2 6 1)\n2 * 1 = 2 (left: 6 2)\n6 * 2 = 12 (left: 12)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 + 6 = 8 (left: 1 2 8)\n1 + 2 = 3 (left: 8 3)\n8 * 3 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 * 6 = 12 (left: 1 2 12)\n2 * 12 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 / 2 = 1.0 (left: 1 6 1.0)\n1 * 6 = 6 (left: 1.0 6)\n1.0 * 6 = 6.0 (left: 6.0)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 + 6 = 8 (left: 1 2 8)\n1 + 8 = 9 (left: 2 9)\n2 * 9 = 18 (left: 18)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "1 * 2 = 2 (left: 2 6 2)\n6 - 2 = 4 (left: 2 4)\n2 * 4 = 8 (left: 8)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "1 * 2 = 2 (left: 2 6 2)\n2 * 6 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "1 / 2 = 0.5 (left: 2 6 0.5)\n2 * 6 = 12 (left: 0.5 12)\n12 / 0.5 = 24.0 (left: 24.0)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "1 * 6 = 6 (left: 2 2 6)\n2 * 6 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 - 1 = 1 (left: 2 6 1)\n2 * 1 = 2 (left: 6 2)\n6 * 2 = 12 (left: 12)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "1 * 2 = 2 (left: 2 6 2)\n2 + 2 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "6 - 2 = 4 (left: 1 2 4)\n1 * 2 = 2 (left: 4 2)\n4 * 2 = 8 (left: 8)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 / 2 = 1.0 (left: 1 6 1.0)\n1 + 1.0 = 2.0 (left: 6 2.0)\n6 * 2.0 = 12.0 (left: 12.0)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "6 / 2 = 3.0 (left: 1 2 3.0)\n1 + 2 = 3 (left: 3.0 3)\n3.0 * 3 = 9.0 (left: 9.0)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "1 + 6 = 7 (left: 2 2 7)\n2 + 2 = 4 (left: 7 4)\n7 + 4 = 11 (left: 11)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 - 1 = 1 (left: 2 6 1)\n2 * 1 = 2 (left: 6 2)\n6 * 2 = 12 (left: 12)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 - 1 = 1 (left: 2 6 1)\n2 * 1 = 2 (left: 6 2)\n6 * 2 = 12 (left: 12)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 + 6 = 8 (left: 1 2 8)\n1 + 2 = 3 (left: 8 3)\n8 * 3 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 - 1 = 1 (left: 2 6 1)\n2 * 1 = 2 (left: 6 2)\n6 * 2 = 12 (left: 12)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 2 6 3)\n2 * 6 = 12 (left: 3 12)\n3 * 12 = 36 (left: 36)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 * 6 = 12 (left: 1 2 12)\n2 * 12 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 2 6 3)\n6 + 3 = 9 (left: 2 9)\n2 * 9 = 18 (left: 18)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "1 - 2 = -1 (left: 2 6 -1)\n2 - -1 = 3 (left: 6 3)\n6 * 3 = 18 (left: 18)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 + 6 = 8 (left: 1 2 8)\n1 + 2 = 3 (left: 8 3)\n8 * 3 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 - 1 = 1 (left: 2 6 1)\n2 * 1 = 2 (left: 6 2)\n6 * 2 = 12 (left: 12)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 * 6 = 12 (left: 1 2 12)\n2 * 12 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "1 + 6 = 7 (left: 2 2 7)\n7 - 2 = 5 (left: 2 5)\n2 * 5 = 10 (left: 10)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "1 + 6 = 7 (left: 2 2 7)\n2 * 7 = 14 (left: 2 14)\n2 * 14 = 28 (left: 28)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 * 6 = 12 (left: 1 2 12)\n1 * 2 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 / 2 = 1.0 (left: 1 6 1.0)\n1 + 1.0 = 2.0 (left: 6 2.0)\n6 * 2.0 = 12.0 (left: 12.0)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "2 * 6 = 12 (left: 1 2 12)\n1 * 2 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 2 6 3)\n2 * 3 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 * 6 = 12 (left: 1 2 12)\n2 / 12 = 0.1666666666666666 (left: 1 0.1666666666666666)\n1 / 0.1666666666666666 = 6.25 (left: 6.25)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "2 + 6 = 8 (left: 1 2 8)\n1 * 2 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 2 6 3)\n2 * 6 = 12 (left: 3 12)\n3 * 12 = 36 (left: 36)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 2 6 3)\n6 * 3 = 18 (left: 2 18)\n2 + 18 = 20 (left: 20)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "6 / 2 = 3.0 (left: 1 2 3.0)\n1 + 2 = 3 (left: 3.0 3)\n3.0 * 3 = 9.0 (left: 9.0)"}
{"idx": 6, "input": "1 2 2 6", "reward": 0.001, "generate_data": "1 * 6 = 6 (left: 2 2 6)\n2 + 6 = 8 (left: 2 8)\n2 * 8 = 16 (left: 16)"}
{"idx": 6, "input": "1 2 2 6", "reward": 100, "generate_data": "1 * 6 = 6 (left: 2 2 6)\n2 * 6 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 10 = 11 (left: 1 12 11)\n1 + 11 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 10 12 1)\n12 - 1 = 11 (left: 10 11)\n10 + 11 = 21 (left: 21)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 10 12 2)\n12 + 2 = 14 (left: 10 14)\n10 + 14 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 10 12 2)\n10 + 2 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 + 12 = 13 (left: 1 10 13)\n1 - 10 = -9 (left: 13 -9)\n13 - -9 = 22 (left: 22)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 1 12 9)\n1 + 9 = 10 (left: 12 10)\n12 + 10 = 22 (left: 22)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 10 12 1)\n10 + 1 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 10 12 2)\n10 + 2 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 10 = 10 (left: 1 12 10)\n1 + 10 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 10 12 2)\n10 + 2 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 10 12 1)\n10 + 12 = 22 (left: 1 22)\n1 + 22 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 12 = 12 (left: 1 10 12)\n1 + 10 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "10 + 12 = 22 (left: 1 1 22)\n1 + 1 = 2 (left: 22 2)\n22 + 2 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 1 12 9)\n1 + 9 = 10 (left: 12 10)\n12 + 10 = 22 (left: 22)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 10 = 10 (left: 1 12 10)\n1 + 10 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 10 = 11 (left: 1 12 11)\n1 + 11 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 / 1 = 1.0 (left: 10 12 1.0)\n10 + 1.0 = 11.0 (left: 12 11.0)\n12 + 11.0 = 23.0 (left: 23.0)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 10 12 1)\n10 + 1 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 10 12 1)\n10 + 1 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 12 = 12 (left: 1 10 12)\n1 + 10 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 10 = 10 (left: 1 12 10)\n1 + 10 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "10 + 12 = 22 (left: 1 1 22)\n1 + 22 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 10 12 2)\n10 + 2 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 10 = 11 (left: 1 12 11)\n1 + 12 = 13 (left: 11 13)\n11 + 13 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 10 12 2)\n12 + 2 = 14 (left: 10 14)\n10 + 14 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 10 12 2)\n10 + 2 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "10 + 12 = 22 (left: 1 1 22)\n1 * 22 = 22 (left: 1 22)\n1 + 22 = 23 (left: 22)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 / 10 = 0.1 (left: 1 12 0.1)\n1 / 0.1 = 10.0 (left: 12 10.0)\n12 + 10.0 = 22.0 (left: 22.0)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 10 12 1)\n10 + 1 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 10 12 1)\n10 + 1 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 1 12 9)\n1 + 12 = 13 (left: 9 13)\n9 + 13 = 22 (left: 22)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 10 12 1)\n12 * 1 = 12 (left: 10 12)\n10 + 12 = 22 (left: 22)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 10 = 11 (left: 1 12 11)\n1 + 11 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 10 = 11 (left: 1 12 11)\n12 + 11 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 10 = 10 (left: 1 12 10)\n1 * 12 = 12 (left: 10 12)\n10 + 12 = 22 (left: 22)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 10 12 1)\n10 + 1 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 1 12 9)\n1 + 9 = 10 (left: 12 10)\n12 + 10 = 22 (left: 22)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 10 = 10 (left: 1 12 10)\n1 + 10 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 10 12 1)\n1 - 10 = -9 (left: 12 -9)\n12 - -9 = 21 (left: 21)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 10 12 2)\n10 + 2 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 10 12 2)\n12 + 2 = 14 (left: 10 14)\n10 + 14 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "10 + 12 = 22 (left: 1 1 22)\n1 + 22 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 10 = 10 (left: 1 12 10)\n1 * 10 = 10 (left: 12 10)\n12 + 10 = 22 (left: 22)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 10 = 11 (left: 1 12 11)\n1 + 11 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "10 + 12 = 22 (left: 1 1 22)\n1 + 1 = 2 (left: 22 2)\n22 + 2 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 10 12 2)\n10 + 2 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 10 = 10 (left: 1 12 10)\n1 + 10 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 10 = 11 (left: 1 12 11)\n1 + 11 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 12 = 13 (left: 1 10 13)\n10 + 13 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "10 + 12 = 22 (left: 1 1 22)\n1 + 1 = 2 (left: 22 2)\n22 + 2 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "10 + 12 = 22 (left: 1 1 22)\n1 + 22 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 / 10 = 0.1 (left: 1 12 0.1)\n1 * 12 = 12 (left: 0.1 12)\n12 / 0.1 = 120.0 (left: 120.0)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "10 + 12 = 22 (left: 1 1 22)\n1 + 22 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 12 = 12 (left: 1 10 12)\n1 + 12 = 13 (left: 10 13)\n10 + 13 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "10 + 12 = 22 (left: 1 1 22)\n1 - 1 = 0 (left: 22 0)\n22 + 0 = 22 (left: 22)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 10 = 10 (left: 1 12 10)\n1 + 10 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 10 = 11 (left: 1 12 11)\n1 + 12 = 13 (left: 11 13)\n11 + 13 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 10 12 1)\n10 + 1 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 10 12 1)\n10 + 1 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 10 = 11 (left: 1 12 11)\n1 + 11 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 + 12 = 13 (left: 1 10 13)\n13 - 1 = 12 (left: 10 12)\n10 + 12 = 22 (left: 22)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 10 = 11 (left: 1 12 11)\n1 + 11 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 10 12 2)\n10 + 2 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 10 = 11 (left: 1 12 11)\n1 + 11 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 10 12 2)\n10 + 2 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 10 = 11 (left: 1 12 11)\n1 + 11 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 10 = 11 (left: 1 12 11)\n12 + 11 = 23 (left: 1 23)\n1 + 23 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 10 12 1)\n12 + 1 = 13 (left: 10 13)\n10 + 13 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 10 12 2)\n12 + 2 = 14 (left: 10 14)\n10 + 14 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "10 + 12 = 22 (left: 1 1 22)\n1 * 1 = 1 (left: 22 1)\n22 + 1 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 10 = 11 (left: 1 12 11)\n1 + 12 = 13 (left: 11 13)\n11 + 13 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 10 12 2)\n12 + 2 = 14 (left: 10 14)\n10 + 14 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 10 = 10 (left: 1 12 10)\n1 * 12 = 12 (left: 10 12)\n10 + 12 = 22 (left: 22)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 10 = 10 (left: 1 12 10)\n1 + 10 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 12 = 12 (left: 1 10 12)\n1 * 10 = 10 (left: 12 10)\n12 + 10 = 22 (left: 22)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 10 12 2)\n10 + 12 = 22 (left: 2 22)\n2 + 22 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 10 = 11 (left: 1 12 11)\n1 + 11 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 10 12 1)\n10 + 1 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 10 12 2)\n10 + 2 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 1 12 9)\n1 + 9 = 10 (left: 12 10)\n12 + 10 = 22 (left: 22)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 10 12 2)\n12 + 2 = 14 (left: 10 14)\n10 + 14 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 10 12 2)\n10 + 12 = 22 (left: 2 22)\n2 + 22 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 12 = 12 (left: 1 10 12)\n1 + 10 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 10 12 1)\n10 + 12 = 22 (left: 1 22)\n1 + 22 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 10 = 10 (left: 1 12 10)\n1 + 10 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "10 + 12 = 22 (left: 1 1 22)\n1 + 1 = 2 (left: 22 2)\n22 + 2 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 10 = 11 (left: 1 12 11)\n1 + 12 = 13 (left: 11 13)\n11 + 13 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 10 12 1)\n10 + 12 = 22 (left: 1 22)\n1 + 22 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 10 = 10 (left: 1 12 10)\n1 + 10 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 12 = 12 (left: 1 10 12)\n1 + 10 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 1 12 9)\n12 + 9 = 21 (left: 1 21)\n1 + 21 = 22 (left: 22)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 12 = 12 (left: 1 10 12)\n10 * 12 = 120 (left: 1 120)\n1 * 120 = 120 (left: 120)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 - 12 = -11 (left: 1 10 -11)\n1 - 10 = -9 (left: -11 -9)\n-11 - -9 = 20 (left: 20)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "10 + 12 = 22 (left: 1 1 22)\n1 * 1 = 1 (left: 22 1)\n22 + 1 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "10 + 12 = 22 (left: 1 1 22)\n1 + 1 = 2 (left: 22 2)\n22 + 2 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 12 = 12 (left: 1 10 12)\n1 + 10 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 10 12 1)\n10 + 1 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 100, "generate_data": "1 + 10 = 11 (left: 1 12 11)\n1 + 11 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 10 12 1)\n10 + 1 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 7, "input": "1 1 10 12", "reward": 0.001, "generate_data": "1 + 10 = 11 (left: 1 12 11)\n1 * 11 = 11 (left: 12 11)\n12 + 11 = 23 (left: 23)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "10 + 10 = 20 (left: 2 2 20)\n2 + 2 = 4 (left: 20 4)\n20 + 4 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 10 = 20 (left: 2 10 20)\n20 - 2 = 18 (left: 10 18)\n10 + 18 = 28 (left: 28)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n10 + 12 = 22 (left: 2 22)\n2 + 22 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 / 10 = 0.2 (left: 2 10 0.2)\n2 + 10 = 12 (left: 0.2 12)\n12 / 0.2 = 60.0 (left: 60.0)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n10 / 2 = 5.0 (left: 12 5.0)\n12 * 5.0 = 60.0 (left: 60.0)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "10 - 2 = 8 (left: 2 10 8)\n10 - 2 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "10 + 10 = 20 (left: 2 2 20)\n2 + 2 = 4 (left: 20 4)\n20 + 4 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 - 10 = -8 (left: 2 10 -8)\n-8 - 10 = -18 (left: 2 -18)\n2 - -18 = 20 (left: 20)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n12 - 2 = 10 (left: 10 10)\n10 + 10 = 20 (left: 20)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n10 + 12 = 22 (left: 2 22)\n2 + 22 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 - 10 = -8 (left: 2 10 -8)\n2 * 10 = 20 (left: -8 20)\n20 - -8 = 28 (left: 28)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "10 / 2 = 5.0 (left: 2 10 5.0)\n2 * 10 = 20 (left: 5.0 20)\n5.0 + 20 = 25.0 (left: 25.0)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 / 10 = 0.2 (left: 2 10 0.2)\n2 * 10 = 20 (left: 0.2 20)\n0.2 * 20 = 4.0 (left: 4.0)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n2 + 10 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n10 + 12 = 22 (left: 2 22)\n2 + 22 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 10 = 20 (left: 2 10 20)\n2 + 20 = 22 (left: 10 22)\n10 + 22 = 32 (left: 32)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n10 + 12 = 22 (left: 2 22)\n2 + 22 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 10 = 20 (left: 2 10 20)\n2 + 20 = 22 (left: 10 22)\n10 + 22 = 32 (left: 32)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "10 + 10 = 20 (left: 2 2 20)\n2 + 20 = 22 (left: 2 22)\n2 + 22 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n2 * 10 = 20 (left: 12 20)\n12 + 20 = 32 (left: 32)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 10 = 20 (left: 2 10 20)\n2 + 20 = 22 (left: 10 22)\n10 + 22 = 32 (left: 32)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 / 2 = 1.0 (left: 10 10 1.0)\n10 + 10 = 20 (left: 1.0 20)\n1.0 + 20 = 21.0 (left: 21.0)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n2 * 10 = 20 (left: 12 20)\n12 + 20 = 32 (left: 32)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 / 2 = 1.0 (left: 10 10 1.0)\n10 + 10 = 20 (left: 1.0 20)\n1.0 + 20 = 21.0 (left: 21.0)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 10 = 20 (left: 2 10 20)\n2 + 20 = 22 (left: 10 22)\n10 + 22 = 32 (left: 32)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 10 = 20 (left: 2 10 20)\n10 + 20 = 30 (left: 2 30)\n30 - 2 = 28 (left: 28)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n10 - 2 = 8 (left: 12 8)\n12 + 8 = 20 (left: 20)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "10 + 10 = 20 (left: 2 2 20)\n2 + 20 = 22 (left: 2 22)\n2 + 22 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "10 / 2 = 5.0 (left: 2 10 5.0)\n10 - 5.0 = 5.0 (left: 2 5.0)\n2 * 5.0 = 10.0 (left: 10.0)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n2 * 10 = 20 (left: 12 20)\n12 + 20 = 32 (left: 32)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "10 - 2 = 8 (left: 2 10 8)\n10 - 2 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "10 / 2 = 5.0 (left: 2 10 5.0)\n2 * 10 = 20 (left: 5.0 20)\n5.0 + 20 = 25.0 (left: 25.0)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 - 10 = -8 (left: 2 10 -8)\n10 - -8 = 18 (left: 2 18)\n2 + 18 = 20 (left: 20)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n10 + 12 = 22 (left: 2 22)\n2 + 22 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 - 10 = -8 (left: 2 10 -8)\n2 - 10 = -8 (left: -8 -8)\n-8 * -8 = 64 (left: 64)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 10 = 20 (left: 2 10 20)\n20 - 10 = 10 (left: 2 10)\n2 * 10 = 20 (left: 20)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n10 + 12 = 22 (left: 2 22)\n2 + 22 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n2 + 10 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n2 + 12 = 14 (left: 10 14)\n10 + 14 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n10 - 2 = 8 (left: 12 8)\n12 + 8 = 20 (left: 20)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 10 = 20 (left: 2 10 20)\n2 + 20 = 22 (left: 10 22)\n10 + 22 = 32 (left: 32)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 2 = 4 (left: 10 10 4)\n10 * 4 = 40 (left: 10 40)\n40 - 10 = 30 (left: 30)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 10 = 20 (left: 2 10 20)\n10 - 2 = 8 (left: 20 8)\n20 + 8 = 28 (left: 28)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n2 + 10 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 10 = 20 (left: 2 10 20)\n2 + 10 = 12 (left: 20 12)\n20 + 12 = 32 (left: 32)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n10 + 12 = 22 (left: 2 22)\n2 + 22 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n10 + 12 = 22 (left: 2 22)\n2 + 22 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n10 + 12 = 22 (left: 2 22)\n2 + 22 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 10 = 20 (left: 2 10 20)\n20 - 2 = 18 (left: 10 18)\n10 + 18 = 28 (left: 28)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "10 / 10 = 1.0 (left: 2 2 1.0)\n2 * 1.0 = 2.0 (left: 2 2.0)\n2 + 2.0 = 4.0 (left: 4.0)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 10 = 20 (left: 2 10 20)\n2 + 20 = 22 (left: 10 22)\n10 + 22 = 32 (left: 32)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 10 = 20 (left: 2 10 20)\n10 + 20 = 30 (left: 2 30)\n30 - 2 = 28 (left: 28)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n2 + 10 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n2 + 10 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 10 = 20 (left: 2 10 20)\n10 + 20 = 30 (left: 2 30)\n30 - 2 = 28 (left: 28)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n2 * 10 = 20 (left: 12 20)\n12 + 20 = 32 (left: 32)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "10 + 10 = 20 (left: 2 2 20)\n2 + 20 = 22 (left: 2 22)\n2 + 22 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 - 10 = -8 (left: 2 10 -8)\n2 - 10 = -8 (left: -8 -8)\n-8 * -8 = 64 (left: 64)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 2 = 4 (left: 10 10 4)\n10 + 4 = 14 (left: 10 14)\n10 + 14 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "10 / 2 = 5.0 (left: 2 10 5.0)\n10 - 5.0 = 5.0 (left: 2 5.0)\n2 * 5.0 = 10.0 (left: 10.0)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 - 10 = -8 (left: 2 10 -8)\n10 - -8 = 18 (left: 2 18)\n2 + 18 = 20 (left: 20)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "10 + 10 = 20 (left: 2 2 20)\n2 + 20 = 22 (left: 2 22)\n2 + 22 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n2 + 12 = 14 (left: 10 14)\n10 + 14 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "10 + 10 = 20 (left: 2 2 20)\n2 + 2 = 4 (left: 20 4)\n20 + 4 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 10 = 20 (left: 2 10 20)\n10 + 20 = 30 (left: 2 30)\n30 - 2 = 28 (left: 28)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 10 = 20 (left: 2 10 20)\n20 - 10 = 10 (left: 2 10)\n2 * 10 = 20 (left: 20)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 10 = 20 (left: 2 10 20)\n2 + 20 = 22 (left: 10 22)\n10 + 22 = 32 (left: 32)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n2 * 12 = 24 (left: 10 24)\n10 + 24 = 34 (left: 34)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "10 + 10 = 20 (left: 2 2 20)\n2 + 2 = 4 (left: 20 4)\n20 + 4 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 - 10 = -8 (left: 2 10 -8)\n-8 - 10 = -18 (left: 2 -18)\n2 - -18 = 20 (left: 20)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 2 = 4 (left: 10 10 4)\n10 + 10 = 20 (left: 4 20)\n4 + 20 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n2 + 12 = 14 (left: 10 14)\n10 + 14 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n10 + 12 = 22 (left: 2 22)\n2 + 22 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 - 10 = -8 (left: 2 10 -8)\n2 - 10 = -8 (left: -8 -8)\n-8 * -8 = 64 (left: 64)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n2 + 10 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n2 - 10 = -8 (left: 12 -8)\n12 - -8 = 20 (left: 20)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 10 = 20 (left: 2 10 20)\n10 / 2 = 5.0 (left: 20 5.0)\n20 + 5.0 = 25.0 (left: 25.0)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "10 + 10 = 20 (left: 2 2 20)\n2 + 20 = 22 (left: 2 22)\n2 + 22 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n2 * 10 = 20 (left: 12 20)\n12 + 20 = 32 (left: 32)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n10 - 2 = 8 (left: 12 8)\n12 + 8 = 20 (left: 20)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 - 10 = -8 (left: 2 10 -8)\n2 - 10 = -8 (left: -8 -8)\n-8 * -8 = 64 (left: 64)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n2 + 12 = 14 (left: 10 14)\n10 + 14 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "10 + 10 = 20 (left: 2 2 20)\n20 - 2 = 18 (left: 2 18)\n2 + 18 = 20 (left: 20)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n10 - 2 = 8 (left: 12 8)\n12 + 8 = 20 (left: 20)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 10 = 20 (left: 2 10 20)\n2 - 10 = -8 (left: 20 -8)\n20 - -8 = 28 (left: 28)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 - 10 = -8 (left: 2 10 -8)\n10 - -8 = 18 (left: 2 18)\n2 + 18 = 20 (left: 20)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "10 + 10 = 20 (left: 2 2 20)\n2 + 2 = 4 (left: 20 4)\n20 + 4 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 2 = 4 (left: 10 10 4)\n10 + 10 = 20 (left: 4 20)\n4 + 20 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "10 / 10 = 1.0 (left: 2 2 1.0)\n2 + 1.0 = 3.0 (left: 2 3.0)\n2 * 3.0 = 6.0 (left: 6.0)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n10 - 2 = 8 (left: 12 8)\n12 * 8 = 96 (left: 96)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n10 + 12 = 22 (left: 2 22)\n2 + 22 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "10 / 2 = 5.0 (left: 2 10 5.0)\n10 + 5.0 = 15.0 (left: 2 15.0)\n2 * 15.0 = 30.0 (left: 30.0)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n2 + 10 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 10 = 20 (left: 2 10 20)\n10 - 2 = 8 (left: 20 8)\n20 + 8 = 28 (left: 28)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n2 + 12 = 14 (left: 10 14)\n10 + 14 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "10 + 10 = 20 (left: 2 2 20)\n2 + 2 = 4 (left: 20 4)\n20 + 4 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "2 + 10 = 12 (left: 2 10 12)\n10 + 12 = 22 (left: 2 22)\n2 + 22 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 10 = 20 (left: 2 10 20)\n2 + 10 = 12 (left: 20 12)\n20 + 12 = 32 (left: 32)"}
{"idx": 8, "input": "2 2 10 10", "reward": 100, "generate_data": "10 + 10 = 20 (left: 2 2 20)\n2 + 2 = 4 (left: 20 4)\n20 + 4 = 24 (left: 24)"}
{"idx": 8, "input": "2 2 10 10", "reward": 0.001, "generate_data": "2 * 10 = 20 (left: 2 10 20)\n10 - 2 = 8 (left: 20 8)\n20 + 8 = 28 (left: 28)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 + 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n12 * 2 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n12 * 2 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 + 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n1 * 12 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 - 1 = 0 (left: 12 0)\n1200 / 0 = 1200.0 (left: 1200.0)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n12 * 2 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 + 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 + 12 = 13 (left: 1 1 13)\n1 + 13 = 14 (left: 1 14)\n1 * 14 = 14 (left: 14)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n1 / 2 = 0.5 (left: 12 0.5)\n12 / 0.5 = 24.0 (left: 24.0)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n12 * 2 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n1 * 2 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n12 / 2 = 6.0 (left: 1 6.0)\n1 * 6.0 = 6.0 (left: 6.0)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n12 * 2 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n1 * 2 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 + 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 / 1 = 1.0 (left: 1 12 1.0)\n1 + 1.0 = 2.0 (left: 12 2.0)\n12 * 2.0 = 24.0 (left: 24.0)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n1 * 12 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n1 * 2 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 * 12 = 12 (left: 1 12)\n1 * 12 = 12 (left: 12)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n1 / 2 = 0.5 (left: 12 0.5)\n12 / 0.5 = 24.0 (left: 24.0)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n1 * 2 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n12 * 2 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 - 1 = 0 (left: 1 12 0)\n1 * 12 = 12 (left: 0 12)\n0 + 12 = 12 (left: 12)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n12 * 2 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 + 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 + 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n1 * 2 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n1 * 12 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 * 12 = 12 (left: 1 1 12)\n1 * 12 = 12 (left: 1 12)\n1 + 12 = 13 (left: 13)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n12 * 2 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n1 * 2 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n12 * 2 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n12 * 2 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 + 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 + 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 + 12 = 13 (left: 1 1 13)\n1 + 1 = 2 (left: 13 2)\n13 + 2 = 15 (left: 15)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 * 12 = 12 (left: 1 1 12)\n1 + 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 + 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 + 12 = 13 (left: 1 1 13)\n1 * 13 = 13 (left: 1 13)\n1 * 13 = 13 (left: 13)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n1 * 12 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "12 - 1 = 11 (left: 1 1 11)\n1 + 1 = 2 (left: 11 2)\n11 * 2 = 22 (left: 22)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 + 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n12 * 2 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n1 / 2 = 0.5 (left: 12 0.5)\n12 / 0.5 = 24.0 (left: 24.0)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 + 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 + 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n12 * 2 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n12 * 2 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n12 / 2 = 6.0 (left: 1 6.0)\n1 * 6.0 = 6.0 (left: 6.0)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 + 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n1 * 2 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 - 1 = 0 (left: 1 12 0)\n1 + 0 = 1 (left: 12 1)\n12 * 1 = 12 (left: 12)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n12 / 2 = 6.0 (left: 1 6.0)\n1 * 6.0 = 6.0 (left: 6.0)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 * 1 = 1 (left: 12 1)\n12 * 1 = 12 (left: 12)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n1 / 2 = 0.5 (left: 12 0.5)\n12 / 0.5 = 24.0 (left: 24.0)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "12 - 1 = 11 (left: 1 1 11)\n1 + 1 = 2 (left: 11 2)\n11 * 2 = 22 (left: 22)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 + 12 = 13 (left: 1 1 13)\n1 + 13 = 14 (left: 1 14)\n1 + 14 = 15 (left: 15)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 + 12 = 13 (left: 1 1 13)\n1 + 13 = 14 (left: 1 14)\n1 + 14 = 15 (left: 15)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 * 12 = 12 (left: 1 1 12)\n1 * 12 = 12 (left: 1 12)\n1 * 12 = 12 (left: 12)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n12 * 2 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n1 / 2 = 0.5 (left: 12 0.5)\n12 / 0.5 = 24.0 (left: 24.0)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 * 12 = 12 (left: 1 12)\n1 * 12 = 12 (left: 12)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "12 / 4 = 3.0 (left: 1 1 3.0)\n1 + 1 = 2 (left: 3.0 2)\n3.0 * 2 = 6.0 (left: 6.0)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 * 12 = 12 (left: 1 1 12)\n1 + 12 = 13 (left: 1 13)\n1 + 13 = 14 (left: 14)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 * 12 = 12 (left: 1 1 12)\n1 * 12 = 12 (left: 1 12)\n1 * 12 = 12 (left: 12)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 / 1 = 1.0 (left: 1 12 1.0)\n1 + 1.0 = 2.0 (left: 12 2.0)\n12 * 2.0 = 24.0 (left: 24.0)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n1 / 2 = 0.5 (left: 12 0.5)\n12 / 0.5 = 24.0 (left: 24.0)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 + 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "12 / 4 = 3.0 (left: 1 1 3.0)\n1 + 1 = 2 (left: 3.0 2)\n3.0 * 2 = 6.0 (left: 6.0)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 - 1 = 0 (left: 1 12 0)\n1 + 12 = 13 (left: 0 13)\n0 + 13 = 13 (left: 13)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n12 / 2 = 6.0 (left: 1 6.0)\n1 * 6.0 = 6.0 (left: 6.0)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 * 1 = 1 (left: 12 1)\n12 * 1 = 12 (left: 12)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n1 * 12 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 * 12 = 12 (left: 1 1 12)\n1 * 12 = 12 (left: 1 12)\n1 * 12 = 12 (left: 12)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 * 1 = 1 (left: 12 1)\n12 * 1 = 12 (left: 12)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 * 12 = 12 (left: 1 12)\n1 * 12 = 12 (left: 12)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "12 - 1 = 11 (left: 1 1 11)\n1 * 11 = 11 (left: 1 11)\n1 + 11 = 12 (left: 12)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n1 / 2 = 0.5 (left: 12 0.5)\n12 / 0.5 = 24.0 (left: 24.0)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 + 12 = 13 (left: 1 1 13)\n1 + 13 = 14 (left: 1 14)\n1 + 14 = 15 (left: 15)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "12 / 1 = 12.0 (left: 1 1 12.0)\n1 * 12.0 = 12.0 (left: 1 12.0)\n1 * 12.0 = 12.0 (left: 12.0)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 * 12 = 12 (left: 1 12)\n1 * 12 = 12 (left: 12)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n12 * 2 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 + 12 = 13 (left: 1 1 13)\n1 * 13 = 13 (left: 1 13)\n13 + 13 = 26 (left: 1 26)\n26 - 1 = 25 (left: 25)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 * 12 = 12 (left: 1 1 12)\n1 + 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 * 12 = 12 (left: 1 1 12)\n1 * 1 = 1 (left: 12 1)\n12 * 1 = 12 (left: 12)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 + 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n1 / 2 = 0.5 (left: 12 0.5)\n12 / 0.5 = 24.0 (left: 24.0)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n12 * 2 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 + 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 / 12 = 0.08333333333333 (left: 1 1 0.08333333333333)\n1 + 1 = 2 (left: 0.083333333333333 2)\n2 / 0.083333333333333 = 24.0 (left: 24.0)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 * 12 = 12 (left: 1 1 12)\n1 * 12 = 12 (left: 1 12)\n1 * 12 = 12 (left: 12)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 / 1 = 1.0 (left: 1 12 1.0)\n1 + 12 = 13 (left: 1.0 13)\n1.0 + 13 = 14.0 (left: 14.0)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n12 * 2 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 + 1 = 2 (left: 1 12 2)\n12 * 2 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "12 - 1 = 11 (left: 1 1 11)\n1 + 11 = 12 (left: 1 12)\n1 * 12 = 12 (left: 12)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 * 1 = 1 (left: 1 12 1)\n1 + 1 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 9, "input": "1 1 1 12", "reward": 100, "generate_data": "1 / 1 = 1.0 (left: 1 12 1.0)\n1 + 1.0 = 2.0 (left: 12 2.0)\n12 * 2.0 = 24.0 (left: 24.0)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 - 1 = 0 (left: 1 12 0)\n1 + 12 = 13 (left: 0 13)\n0 + 13 = 13 (left: 13)"}
{"idx": 9, "input": "1 1 1 12", "reward": 0.001, "generate_data": "1 * 12 = 12 (left: 1 1 12)\n1 * 12 = 12 (left: 1 12)\n1 * 12 = 12 (left: 12)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 10 = 15 (left: 4 6 15)\n4 * 6 = 24 (left: 15 24)\n15 + 24 = 39 (left: 39)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n5 - 4 = 1 (left: 6 1)\n6 * 1 = 6 (left: 6)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 4 = 1 (left: 6 10 1)\n10 * 1 = 10 (left: 6 10)\n6 + 10 = 16 (left: 16)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n10 / 6 = 1.6 (left: 20 1.6)\n20 * 1.6 = 32.0 (left: 32.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 6 = 24 (left: 5 10 24)\n5 / 10 = 0.5 (left: 24 0.5)\n24 * 0.5 = 12.0 (left: 12.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n10 - 6 = 4 (left: 20 4)\n20 + 4 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 / 4 = 2.5 (left: 5 6 2.5)\n5 * 2.5 = 12.5 (left: 6 12.5)\n6 + 12.5 = 18.5 (left: 18.5)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 10 = 40 (left: 5 6 40)\n5 / 40 = 0.125 (left: 6 0.125)\n6 / 0.125 = 48.0 (left: 48.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 5 = 1 (left: 4 10 1)\n10 * 1 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n6 - 20 = -14 (left: 10 -14)\n10 - -14 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n20 - 6 = 14 (left: 10 14)\n10 + 14 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n10 - 6 = 4 (left: 20 4)\n20 + 4 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 6 = -1 (left: 4 10 -1)\n10 + -1 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 5 = 1 (left: 4 10 1)\n10 * 1 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n10 - 6 = 4 (left: 20 4)\n20 + 4 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "6 - 10 = -4 (left: 4 5 -4)\n5 * -4 = -20 (left: 4 -20)\n4 - -20 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 10 = 15 (left: 4 6 15)\n15 - 6 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "10 - 4 = 6 (left: 5 6 6)\n5 * 6 = 30 (left: 6 30)\n30 - 6 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "10 - 6 = 4 (left: 4 5 4)\n4 * 5 = 20 (left: 4 20)\n4 + 20 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 6 = 24 (left: 5 10 24)\n5 - 24 = -19 (left: 10 -19)\n10 - -19 = 29 (left: 29)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n5 - 4 = 1 (left: 6 1)\n6 * 1 = 6 (left: 6)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "10 - 4 = 6 (left: 5 6 6)\n5 * 6 = 30 (left: 6 30)\n30 - 6 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 / 5 = 2.0 (left: 4 6 2.0)\n4 * 2.0 = 8.0 (left: 6 8.0)\n6 * 8.0 = 48.0 (left: 48.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n4 * 5 = 20 (left: 6 20)\n6 + 20 = 26 (left: 26)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n6 + 5 = 11 (left: 4 11)\n4 * 11 = 44 (left: 44)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 - 6 = -2 (left: 5 10 -2)\n10 - -2 = 12 (left: 5 12)\n5 + 12 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 / 5 = 2.0 (left: 4 6 2.0)\n4 * 6 = 24 (left: 2.0 24)\n2.0 * 24 = 48.0 (left: 48.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "10 - 6 = 4 (left: 4 5 4)\n4 * 5 = 20 (left: 4 20)\n4 + 20 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 6 = 24 (left: 5 10 24)\n10 / 5 = 2.0 (left: 24 2.0)\n24 * 2.0 = 48.0 (left: 48.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n11 - 4 = 7 (left: 10 7)\n10 + 7 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 6 = 24 (left: 5 10 24)\n10 - 5 = 5 (left: 24 5)\n24 * 5 = 120 (left: 120)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 4 = 1 (left: 6 10 1)\n6 + 1 = 7 (left: 10 7)\n10 * 7 = 70 (left: 70)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 10 = 15 (left: 4 6 15)\n6 - 4 = 2 (left: 15 2)\n15 + 2 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 10 = -5 (left: 4 6 -5)\n4 - -5 = 9 (left: 6 9)\n6 + 9 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 + 6 = 10 (left: 5 10 10)\n10 + 10 = 20 (left: 5 20)\n5 + 20 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 10 = 15 (left: 4 6 15)\n4 * 6 = 24 (left: 15 24)\n15 + 24 = 39 (left: 39)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n10 / 20 = 0.5 (left: 6 0.5)\n6 / 0.5 = 12.0 (left: 12.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "10 - 4 = 6 (left: 5 6 6)\n5 * 6 = 30 (left: 6 30)\n30 - 6 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n6 + 10 = 16 (left: 20 16)\n20 + 16 = 36 (left: 36)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 6 = 4 (left: 4 5 4)\n4 + 5 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 + 10 = 16 (left: 4 5 16)\n16 - 5 = 11 (left: 4 11)\n4 + 11 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n10 + 11 = 21 (left: 4 21)\n4 + 21 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n5 - 4 = 1 (left: 6 1)\n6 * 1 = 6 (left: 6)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 10 = 15 (left: 4 6 15)\n15 - 6 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n6 + 5 = 11 (left: 4 11)\n4 + 11 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 - 6 = -2 (left: 5 10 -2)\n10 + -2 = 8 (left: 5 8)\n5 * 8 = 40 (left: 40)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "6 - 10 = -4 (left: 4 5 -4)\n4 * 5 = 20 (left: -4 20)\n20 - -4 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 6 = 24 (left: 5 10 24)\n5 - 24 = -19 (left: 10 -19)\n10 - -19 = 29 (left: 29)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n10 + 11 = 21 (left: 4 21)\n4 + 21 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 - 5 = -1 (left: 6 10 -1)\n10 + -1 = 9 (left: 6 9)\n6 + 9 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 4 = 6 (left: 5 6 6)\n6 + 6 = 12 (left: 5 12)\n5 + 12 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 / 4 = 1.25 (left: 6 10 1.25)\n10 * 1.25 = 12.5 (left: 6 12.5)\n6 * 12.5 = 75.0 (left: 75.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 10 = -5 (left: 4 6 -5)\n6 + -5 = 1 (left: 4 1)\n4 * 1 = 4 (left: 4)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n4 + 10 = 14 (left: 11 14)\n11 + 14 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n5 - 4 = 1 (left: 6 1)\n6 * 1 = 6 (left: 6)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 * 10 = 60 (left: 4 5 60)\n60 / 5 = 12.0 (left: 4 12.0)\n4 * 12.0 = 48.0 (left: 48.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 + 10 = 16 (left: 4 5 16)\n5 + 16 = 21 (left: 4 21)\n4 + 21 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 4 = 2 (left: 5 10 2)\n10 - 5 = 5 (left: 2 5)\n2 * 5 = 10 (left: 10)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n5 - 4 = 1 (left: 6 1)\n6 * 1 = 6 (left: 6)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n10 + 20 = 30 (left: 6 30)\n30 - 6 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n4 + 10 = 14 (left: 11 14)\n11 + 14 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n4 * 6 = 24 (left: 5 24)\n5 + 24 = 29 (left: 29)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 10 = -5 (left: 4 6 -5)\n4 - -5 = 9 (left: 6 9)\n6 * 9 = 54 (left: 54)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n20 - 10 = 10 (left: 6 10)\n6 + 10 = 16 (left: 16)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 10 = 15 (left: 4 6 15)\n4 * 6 = 24 (left: 15 24)\n15 + 24 = 39 (left: 39)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 10 = 15 (left: 4 6 15)\n15 - 6 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 10 = -5 (left: 4 6 -5)\n4 * -5 = -20 (left: 6 -20)\n6 - -20 = 26 (left: 26)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n6 + 5 = 11 (left: 4 11)\n4 + 11 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 4 = 1 (left: 6 10 1)\n10 - 1 = 9 (left: 6 9)\n6 * 9 = 54 (left: 54)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 - 10 = -6 (left: 5 6 -6)\n5 - -6 = 11 (left: 6 11)\n6 + 11 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 10 = -5 (left: 4 6 -5)\n4 - -5 = 9 (left: 6 9)\n6 * 9 = 54 (left: 54)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 6 = 24 (left: 5 10 24)\n5 - 10 = -5 (left: 24 -5)\n24 - -5 = 29 (left: 29)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 6 = -1 (left: 4 10 -1)\n4 * -1 = -4 (left: 10 -4)\n10 - -4 = 14 (left: 14)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 / 5 = 1.2 (left: 4 10 1.2)\n10 * 1.2 = 12.0 (left: 4 12.0)\n4 * 12.0 = 48.0 (left: 48.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 - 6 = -2 (left: 5 10 -2)\n5 + 10 = 15 (left: -2 15)\n15 - -2 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 6 = 24 (left: 5 10 24)\n10 - 5 = 5 (left: 24 5)\n24 * 5 = 120 (left: 120)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 6 = 24 (left: 5 10 24)\n10 - 5 = 5 (left: 24 5)\n24 * 5 = 120 (left: 120)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 / 10 = 0.5 (left: 4 6 0.5)\n4 * 6 = 24 (left: 0.5 24)\n0.5 * 24 = 12.0 (left: 12.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 4 = 2 (left: 5 10 2)\n10 - 2 = 8 (left: 5 8)\n5 + 8 = 13 (left: 13)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "10 - 6 = 4 (left: 4 5 4)\n4 * 5 = 20 (left: 4 20)\n4 + 20 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 10 = 40 (left: 5 6 40)\n5 + 6 = 11 (left: 40 11)\n40 - 11 = 29 (left: 29)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n4 * 5 = 20 (left: 6 20)\n6 + 20 = 26 (left: 26)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 6 = -1 (left: 4 10 -1)\n4 - -1 = 5 (left: 10 5)\n10 * 5 = 50 (left: 50)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 + 5 = 9 (left: 6 10 9)\n10 + 9 = 19 (left: 6 19)\n6 + 19 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 10 = -5 (left: 4 6 -5)\n4 - -5 = 9 (left: 6 9)\n6 + 9 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 10 = 40 (left: 5 6 40)\n40 - 5 = 35 (left: 6 35)\n35 - 6 = 29 (left: 29)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n6 - 10 = -4 (left: 20 -4)\n20 - -4 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n10 - 4 = 6 (left: 11 6)\n11 + 6 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n10 - 6 = 4 (left: 20 4)\n20 + 4 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 5 = 1 (left: 4 10 1)\n4 + 1 = 5 (left: 10 5)\n10 * 5 = 50 (left: 50)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n10 + 11 = 21 (left: 4 21)\n4 + 21 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 5 = 1 (left: 4 10 1)\n10 * 1 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 10 = -5 (left: 4 6 -5)\n4 - -5 = 9 (left: 6 9)\n6 + 9 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 6 = -1 (left: 4 10 -1)\n10 + -1 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 10 = 40 (left: 5 6 40)\n6 / 5 = 1.2 (left: 40 1.2)\n40 * 1.2 = 48.0 (left: 48.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 6 = 4 (left: 4 5 4)\n4 + 4 = 8 (left: 5 8)\n5 * 8 = 40 (left: 40)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n10 - 6 = 4 (left: 20 4)\n20 + 4 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n20 - 10 = 10 (left: 6 10)\n6 + 10 = 16 (left: 16)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n6 + 10 = 16 (left: 20 16)\n20 + 16 = 36 (left: 36)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 4 = 2 (left: 5 10 2)\n5 + 2 = 7 (left: 10 7)\n10 + 7 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 10 = 40 (left: 5 6 40)\n6 - 5 = 1 (left: 40 1)\n40 - 1 = 39 (left: 39)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 5 = 1 (left: 4 10 1)\n4 * 10 = 40 (left: 1 40)\n40 - 1 = 39 (left: 39)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 10 = -5 (left: 4 6 -5)\n4 - -5 = 9 (left: 6 9)\n6 + 9 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 10 = -5 (left: 4 6 -5)\n4 - -5 = 9 (left: 6 9)\n6 + 9 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 - 5 = -1 (left: 6 10 -1)\n6 + 10 = 16 (left: -1 16)\n16 - -1 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 + 10 = 14 (left: 5 6 14)\n6 + 14 = 20 (left: 5 20)\n5 + 20 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 5 = 1 (left: 4 10 1)\n10 * 1 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n10 + 11 = 21 (left: 4 21)\n4 + 21 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 10 = 40 (left: 5 6 40)\n6 * 40 = 240 (left: 5 240)\n240 / 5 = 48.0 (left: 48.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 10 = -5 (left: 4 6 -5)\n4 * -5 = -20 (left: 6 -20)\n6 - -20 = 26 (left: 26)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "10 - 6 = 4 (left: 4 5 4)\n4 * 5 = 20 (left: 4 20)\n4 + 20 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n10 - 4 = 6 (left: 11 6)\n11 + 6 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 / 6 = 1.666666666666666666666666666666 (left: 4 5 1.66666666666666666666666666666)\n4 * 5 = 20 (left: 1.666666666666666666666666666666 * 20)\n(1.666666666666666666666666666666 * 20) / 1.666666666666666666666666666666 = 12.0 (left: 12.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 + 6 = 10 (left: 5 10 10)\n10 + 10 = 20 (left: 5 20)\n5 + 20 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "6 - 10 = -4 (left: 4 5 -4)\n5 * -4 = -20 (left: 4 -20)\n4 - -20 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 - 6 = -2 (left: 5 10 -2)\n10 * -2 = -20 (left: 5 -20)\n5 - -20 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 - 5 = -1 (left: 6 10 -1)\n6 + 10 = 16 (left: -1 16)\n16 - -1 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 10 = 40 (left: 5 6 40)\n5 + 40 = 45 (left: 6 45)\n45 - 6 = 39 (left: 39)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "10 - 4 = 6 (left: 5 6 6)\n5 * 6 = 30 (left: 6 30)\n30 - 6 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 5 = 1 (left: 4 10 1)\n10 + 1 = 11 (left: 4 11)\n4 + 11 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 + 10 = 16 (left: 4 5 16)\n16 - 5 = 11 (left: 4 11)\n4 + 11 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 + 6 = 10 (left: 5 10 10)\n10 + 10 = 20 (left: 5 20)\n5 + 20 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 6 = -1 (left: 4 10 -1)\n10 - -1 = 11 (left: 4 11)\n4 + 11 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 10 = 15 (left: 4 6 15)\n15 - 6 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 10 = 40 (left: 5 6 40)\n6 + 40 = 46 (left: 5 46)\n46 - 5 = 41 (left: 41)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 10 = -5 (left: 4 6 -5)\n4 - -5 = 9 (left: 6 9)\n6 + 9 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 10 = 15 (left: 4 6 15)\n4 + 15 = 19 (left: 6 19)\n6 + 19 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 4 = 2 (left: 5 10 2)\n5 + 2 = 7 (left: 10 7)\n10 + 7 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 6 = 24 (left: 5 10 24)\n10 - 5 = 5 (left: 24 5)\n24 * 5 = 120 (left: 120)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 5 = 1 (left: 4 10 1)\n4 * 10 = 40 (left: 1 40)\n40 - 1 = 39 (left: 39)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 - 5 = -1 (left: 6 10 -1)\n6 + -1 = 5 (left: 10 5)\n10 * 5 = 50 (left: 50)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 4 = 1 (left: 6 10 1)\n6 * 10 = 60 (left: 1 60)\n1 * 60 = 60 (left: 60)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 / 5 = 1.2 (left: 4 10 1.2)\n10 * 1.2 = 12.0 (left: 4 12.0)\n4 * 12.0 = 48.0 (left: 48.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n10 + 11 = 21 (left: 4 21)\n4 + 21 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n5 - 4 = 1 (left: 6 1)\n6 * 1 = 6 (left: 6)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 10 = 15 (left: 4 6 15)\n15 - 6 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n10 + 11 = 21 (left: 4 21)\n4 + 21 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 5 = 1 (left: 4 10 1)\n4 * 10 = 40 (left: 1 40)\n1 * 40 = 40 (left: 40)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 6 = 24 (left: 5 10 24)\n5 - 24 = -19 (left: 10 -19)\n10 - -19 = 29 (left: 29)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 / 5 = 1.2 (left: 4 10 1.2)\n4 * 1.2 = 4.8 (left: 10 4.8)\n10 * 4.8 = 48.0 (left: 48.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 6 = 4 (left: 4 5 4)\n4 + 4 = 8 (left: 5 8)\n5 * 8 = 40 (left: 40)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 - 5 = -1 (left: 6 10 -1)\n6 + -1 = 5 (left: 10 5)\n10 * 5 = 50 (left: 50)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "10 - 4 = 6 (left: 5 6 6)\n5 * 6 = 30 (left: 6 30)\n30 - 6 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n4 * 6 = 24 (left: 5 24)\n5 + 24 = 29 (left: 29)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 6 = -1 (left: 4 10 -1)\n10 - -1 = 11 (left: 4 11)\n4 + 11 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 6 = 24 (left: 5 10 24)\n10 - 5 = 5 (left: 24 5)\n24 * 5 = 120 (left: 120)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n10 + 11 = 21 (left: 4 21)\n4 + 21 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n10 - 6 = 4 (left: 20 4)\n20 + 4 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 + 6 = 10 (left: 5 10 10)\n5 + 10 = 15 (left: 10 15)\n10 + 15 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n10 + 11 = 21 (left: 4 21)\n4 + 21 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n6 + 5 = 11 (left: 4 11)\n4 * 11 = 44 (left: 44)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 5 = 1 (left: 4 10 1)\n4 + 10 = 14 (left: 1 14)\n1 + 14 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "10 - 4 = 6 (left: 5 6 6)\n5 * 6 = 30 (left: 6 30)\n30 - 6 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 10 = -5 (left: 4 6 -5)\n4 - -5 = 9 (left: 6 9)\n6 + 9 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 10 = 40 (left: 5 6 40)\n5 + 6 = 11 (left: 40 11)\n40 - 11 = 29 (left: 29)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n10 + 11 = 21 (left: 4 21)\n4 + 21 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 5 = 1 (left: 4 10 1)\n4 * 10 = 40 (left: 1 40)\n40 - 1 = 39 (left: 39)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 4 = 2 (left: 5 10 2)\n5 + 2 = 7 (left: 10 7)\n10 + 7 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 10 = 15 (left: 4 6 15)\n15 - 6 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 10 = -5 (left: 4 6 -5)\n4 - -5 = 9 (left: 6 9)\n6 + 9 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n11 - 10 = 1 (left: 4 1)\n4 * 1 = 4 (left: 4)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 - 6 = -2 (left: 5 10 -2)\n5 + -2 = 3 (left: 10 3)\n10 * 3 = 30 (left: 30)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 5 = 1 (left: 4 10 1)\n4 + 10 = 14 (left: 1 14)\n1 + 14 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 + 10 = 16 (left: 4 5 16)\n5 + 16 = 21 (left: 4 21)\n4 + 21 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 10 = 15 (left: 4 6 15)\n15 - 6 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "10 - 6 = 4 (left: 4 5 4)\n4 * 5 = 20 (left: 4 20)\n4 + 20 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n4 + 10 = 14 (left: 11 14)\n11 + 14 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "6 - 10 = -4 (left: 4 5 -4)\n4 * 5 = 20 (left: -4 20)\n20 - -4 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n6 * 5 = 30 (left: 4 30)\n30 - 4 = 26 (left: 26)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 - 5 = -1 (left: 6 10 -1)\n6 + -1 = 5 (left: 10 5)\n10 * 5 = 50 (left: 50)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 6 = -1 (left: 4 10 -1)\n10 * -1 = -10 (left: 4 -10)\n4 - -10 = 14 (left: 14)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n10 - 6 = 4 (left: 20 4)\n20 + 4 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n6 - 5 = 1 (left: 4 1)\n4 * 1 = 4 (left: 4)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 + 5 = 9 (left: 6 10 9)\n6 + 9 = 15 (left: 10 15)\n10 + 15 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 4 = 6 (left: 5 6 6)\n6 + 6 = 12 (left: 5 12)\n5 * 12 = 60 (left: 60)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n5 - 4 = 1 (left: 6 1)\n6 * 1 = 6 (left: 6)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n6 - 5 = 1 (left: 4 1)\n4 * 1 = 4 (left: 4)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 / 4 = 1.5 (left: 5 10 1.5)\n10 * 1.5 = 15.0 (left: 5 15.0)\n5 + 15.0 = 20.0 (left: 20.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 - 5 = -1 (left: 6 10 -1)\n10 + -1 = 9 (left: 6 9)\n6 + 9 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 4 = 2 (left: 5 10 2)\n5 + 2 = 7 (left: 10 7)\n10 + 7 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n20 - 6 = 14 (left: 10 14)\n10 + 14 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 5 = 1 (left: 4 10 1)\n4 * 10 = 40 (left: 1 40)\n40 - 1 = 39 (left: 39)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 10 = 15 (left: 4 6 15)\n6 + 15 = 21 (left: 4 21)\n4 + 21 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 4 = 6 (left: 5 6 6)\n6 + 6 = 12 (left: 5 12)\n5 + 12 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 10 = -5 (left: 4 6 -5)\n6 + -5 = 1 (left: 4 1)\n4 * 1 = 4 (left: 4)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n5 - 4 = 1 (left: 6 1)\n6 * 1 = 6 (left: 6)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 + 10 = 14 (left: 5 6 14)\n5 + 14 = 19 (left: 6 19)\n6 + 19 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 6 = 24 (left: 5 10 24)\n5 - 24 = -19 (left: 10 -19)\n10 - -19 = 29 (left: 29)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 4 = 2 (left: 5 10 2)\n5 + 10 = 15 (left: 2 15)\n2 * 15 = 30 (left: 30)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n4 * 6 = 24 (left: 5 24)\n5 + 24 = 29 (left: 29)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 4 = 6 (left: 5 6 6)\n6 + 6 = 12 (left: 5 12)\n5 + 12 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n6 + 5 = 11 (left: 4 11)\n4 * 11 = 44 (left: 44)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 / 5 = 1.2 (left: 4 10 1.2)\n10 * 1.2 = 12.0 (left: 4 12.0)\n4 + 12.0 = 16.0 (left: 16.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 / 4 = 2.5 (left: 5 6 2.5)\n5 + 2.5 = 7.5 (left: 6 7.5)\n6 + 7.5 = 13.5 (left: 13.5)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 4 = 6 (left: 5 6 6)\n6 + 6 = 12 (left: 5 12)\n5 + 12 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 / 4 = 1.5 (left: 5 10 1.5)\n1.5 / 5 = 0.3 (left: 10 0.3)\n10 / 0.3 = 33.3 (left: 33.3)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 / 5 = 2.0 (left: 4 6 2.0)\n6 * 2.0 = 12.0 (left: 4 12.0)\n4 * 12.0 = 48.0 (left: 48.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n20 - 10 = 10 (left: 6 10)\n6 + 10 = 16 (left: 16)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n4 + 10 = 14 (left: 11 14)\n11 + 14 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 6 = 24 (left: 5 10 24)\n10 - 5 = 5 (left: 24 5)\n24 * 5 = 120 (left: 120)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 10 = -5 (left: 4 6 -5)\n4 * -5 = -20 (left: 6 -20)\n6 - -20 = 26 (left: 26)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n4 + 10 = 14 (left: 11 14)\n11 + 14 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 4 = 1 (left: 6 10 1)\n6 + 1 = 7 (left: 10 7)\n10 + 7 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n5 - 4 = 1 (left: 6 1)\n6 * 1 = 6 (left: 6)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 4 = 6 (left: 5 6 6)\n6 / 5 = 1.2 (left: 6 1.2)\n6 * 1.2 = 7.2 (left: 7.2)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 10 = 15 (left: 4 6 15)\n15 - 6 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 5 = 1 (left: 4 10 1)\n10 - 1 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 / 4 = 2.5 (left: 5 6 2.5)\n5 * 6 = 30 (left: 2.5 30)\n30 - 2.5 = 27.5 (left: 27.5)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 + 6 = 10 (left: 5 10 10)\n10 + 10 = 20 (left: 5 20)\n5 + 20 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "10 - 6 = 4 (left: 4 5 4)\n4 * 5 = 20 (left: 4 20)\n4 + 20 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 4 = 6 (left: 5 6 6)\n6 + 6 = 12 (left: 5 12)\n5 + 12 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n5 - 4 = 1 (left: 6 1)\n6 * 1 = 6 (left: 6)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 10 = -4 (left: 4 5 -4)\n5 - -4 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n10 + 20 = 30 (left: 6 30)\n30 - 6 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 6 = 24 (left: 5 10 24)\n24 / 5 = 4.8 (left: 10 4.8)\n10 * 4.8 = 48.0 (left: 48.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 + 5 = 9 (left: 6 10 9)\n10 - 9 = 1 (left: 6 1)\n6 * 1 = 6 (left: 6)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n11 - 4 = 7 (left: 10 7)\n10 + 7 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 10 = 15 (left: 4 6 15)\n6 - 4 = 2 (left: 15 2)\n15 + 2 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 10 = -4 (left: 4 5 -4)\n5 - -4 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 / 5 = 2.0 (left: 4 6 2.0)\n6 * 2.0 = 12.0 (left: 4 12.0)\n4 * 12.0 = 48.0 (left: 48.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 6 = -1 (left: 4 10 -1)\n10 + -1 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n11 - 4 = 7 (left: 10 7)\n10 + 7 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n4 * 5 = 20 (left: 6 20)\n6 + 20 = 26 (left: 26)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 6 = -1 (left: 4 10 -1)\n10 + -1 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n6 * 5 = 30 (left: 4 30)\n30 - 4 = 26 (left: 26)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 / 5 = 2.0 (left: 4 6 2.0)\n4 * 6 = 24 (left: 2.0 24)\n2.0 * 24 = 48.0 (left: 48.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "10 - 6 = 4 (left: 4 5 4)\n4 * 5 = 20 (left: 4 20)\n4 + 20 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n10 - 6 = 4 (left: 20 4)\n20 + 4 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 / 5 = 2.0 (left: 4 6 2.0)\n6 * 2.0 = 12.0 (left: 4 12.0)\n4 * 12.0 = 48.0 (left: 48.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n10 + 11 = 21 (left: 4 21)\n4 + 21 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n10 + 11 = 21 (left: 4 21)\n4 + 21 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 - 5 = -1 (left: 6 10 -1)\n6 + 10 = 16 (left: -1 16)\n16 + -1 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 10 = 40 (left: 5 6 40)\n6 / 5 = 1.2 (left: 40 1.2)\n40 * 1.2 = 48.0 (left: 48.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 6 = 24 (left: 5 10 24)\n10 - 5 = 5 (left: 24 5)\n24 * 5 = 120 (left: 120)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 / 5 = 2.0 (left: 4 6 2.0)\n6 - 2.0 = 4.0 (left: 4 4.0)\n4 + 4.0 = 8.0 (left: 8.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 4 = 2 (left: 5 10 2)\n5 + 2 = 7 (left: 10 7)\n10 + 7 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 10 = -5 (left: 4 6 -5)\n6 - -5 = 11 (left: 4 11)\n4 + 11 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "10 - 4 = 6 (left: 5 6 6)\n5 * 6 = 30 (left: 6 30)\n30 - 6 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 5 = 1 (left: 4 10 1)\n10 * 1 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "10 - 6 = 4 (left: 4 5 4)\n4 * 5 = 20 (left: 4 20)\n4 + 20 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n5 - 4 = 1 (left: 6 1)\n6 * 1 = 6 (left: 6)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 10 = 15 (left: 4 6 15)\n15 - 4 = 11 (left: 6 11)\n6 + 11 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 - 10 = -6 (left: 5 6 -6)\n5 - -6 = 11 (left: 6 11)\n6 + 11 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 + 6 = 10 (left: 5 10 10)\n10 + 10 = 20 (left: 5 20)\n5 + 20 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n6 / 10 = 0.6 (left: 20 0.6)\n20 * 0.6 = 12.0 (left: 12.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 + 10 = 14 (left: 5 6 14)\n6 + 14 = 20 (left: 5 20)\n5 + 20 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 + 10 = 16 (left: 4 5 16)\n5 + 16 = 21 (left: 4 21)\n4 + 21 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 / 4 = 2.5 (left: 5 6 2.5)\n6 * 2.5 = 15.0 (left: 5 15.0)\n5 + 15.0 = 20.0 (left: 20.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 - 6 = -2 (left: 5 10 -2)\n5 * -2 = -10 (left: 10 -10)\n10 - -10 = 20 (left: 20)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n6 * 5 = 30 (left: 4 30)\n30 - 4 = 26 (left: 26)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 10 = 15 (left: 4 6 15)\n6 + 15 = 21 (left: 4 21)\n4 + 21 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 + 10 = 16 (left: 4 5 16)\n4 * 5 = 20 (left: 16 20)\n16 + 20 = 36 (left: 36)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 5 = 1 (left: 4 10 1)\n10 * 1 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n6 + 20 = 26 (left: 10 26)\n26 - 10 = 16 (left: 26 - 16)\n26 - 16 = 10 (left: 10)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 + 5 = 9 (left: 6 10 9)\n10 + 9 = 19 (left: 6 19)\n6 + 19 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 - 6 = -2 (left: 5 10 -2)\n5 + 10 = 15 (left: -2 15)\n15 - -2 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 5 = 1 (left: 4 10 1)\n4 * 10 = 40 (left: 1 40)\n40 - 1 = 39 (left: 39)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 6 = 24 (left: 5 10 24)\n10 - 5 = 5 (left: 24 5)\n24 * 5 = 120 (left: 120)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 10 = 15 (left: 4 6 15)\n15 - 6 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n4 * 5 = 20 (left: 6 20)\n6 + 20 = 26 (left: 26)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 / 5 = 2.0 (left: 4 6 2.0)\n4 * 6 = 24 (left: 2.0 24)\n2.0 * 24 = 48.0 (left: 48.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 - 10 = -6 (left: 5 6 -6)\n5 - -6 = 11 (left: 6 11)\n6 + 11 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 4 = 1 (left: 6 10 1)\n10 - 1 = 9 (left: 6 9)\n6 + 9 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 + 10 = 16 (left: 4 5 16)\n16 - 5 = 11 (left: 4 11)\n4 * 11 = 44 (left: 44)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n6 + 20 = 26 (left: 10 26)\n26 - 10 = 16 (left: 26 - 16)\n26 - 16 = 10 (left: 10)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 6 = 11 (left: 4 10 11)\n4 + 11 = 15 (left: 10 15)\n10 + 15 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 / 5 = 1.2 (left: 4 10 1.2)\n4 + 10 = 14 (left: 1.2 14)\n1.2 * 14 = 16.8 (left: 16.8)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 6 = 24 (left: 5 10 24)\n24 - 10 = 14 (left: 5 14)\n5 + 14 = 19 (left: 19)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 4 = 6 (left: 5 6 6)\n6 + 6 = 12 (left: 5 12)\n5 * 12 = 60 (left: 60)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 10 = 40 (left: 5 6 40)\n40 / 5 = 8.0 (left: 6 8.0)\n6 * 8.0 = 48.0 (left: 48.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n10 - 6 = 4 (left: 20 4)\n20 + 4 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 10 = 40 (left: 5 6 40)\n6 / 5 = 1.2 (left: 40 1.2)\n40 * 1.2 = 48.0 (left: 48.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 / 5 = 1.2 (left: 4 10 1.2)\n4 * 1.2 = 4.8 (left: 10 4.8)\n10 * 4.8 = 48.0 (left: 48.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 6 = 24 (left: 5 10 24)\n5 - 24 = -19 (left: 10 -19)\n10 - -19 = 29 (left: 29)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 10 = 40 (left: 5 6 40)\n40 - 5 = 35 (left: 6 35)\n35 - 6 = 29 (left: 29)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 + 5 = 9 (left: 6 10 9)\n6 + 9 = 15 (left: 10 15)\n10 + 15 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 6 = -1 (left: 4 10 -1)\n10 - -1 = 11 (left: 4 11)\n4 + 11 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n5 - 4 = 1 (left: 6 1)\n6 * 1 = 6 (left: 6)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 - 5 = -1 (left: 6 10 -1)\n6 + 10 = 16 (left: -1 16)\n16 - -1 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 + 10 = 14 (left: 5 6 14)\n5 + 6 = 11 (left: 14 11)\n14 + 11 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "4 * 5 = 20 (left: 6 10 20)\n10 - 6 = 4 (left: 20 4)\n20 + 4 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 6 = 24 (left: 5 10 24)\n5 - 10 = -5 (left: 24 -5)\n24 - -5 = 29 (left: 29)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 10 = -5 (left: 4 6 -5)\n4 - -5 = 9 (left: 6 9)\n6 + 9 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 6 = -1 (left: 4 10 -1)\n4 - -1 = 5 (left: 10 5)\n10 * 5 = 50 (left: 50)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 + 10 = 15 (left: 4 6 15)\n15 - 6 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 6 = 24 (left: 5 10 24)\n5 + 24 = 29 (left: 10 29)\n29 - 10 = 19 (left: 29)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 10 = -4 (left: 4 5 -4)\n5 - -4 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 / 5 = 2.0 (left: 4 6 2.0)\n2.0 / 4 = 0.5 (left: 6 0.5)\n6 / 0.5 = 12.0 (left: 12.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 / 4 = 1.5 (left: 5 10 1.5)\n5 + 1.5 = 6.5 (left: 10 6.5)\n10 * 6.5 = 65.0 (left: 65.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 6 = 4 (left: 4 5 4)\n4 + 4 = 8 (left: 5 8)\n5 * 8 = 40 (left: 40)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 * 10 = 40 (left: 5 6 40)\n5 * 6 = 30 (left: 40 30)\n40 - 30 = 10 (left: 10)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 - 5 = 1 (left: 4 10 1)\n10 * 1 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 4 = 6 (left: 5 6 6)\n6 + 6 = 12 (left: 5 12)\n5 + 12 = 17 (left: 17)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "5 - 10 = -5 (left: 4 6 -5)\n4 * -5 = -20 (left: 6 -20)\n6 - -20 = 26 (left: 26)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n6 + 5 = 11 (left: 4 11)\n4 + 11 = 15 (left: 15)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 - 5 = 5 (left: 4 6 5)\n5 - 4 = 1 (left: 6 1)\n6 * 1 = 6 (left: 6)"}
{"idx": 9, "input": "4 5 6 10", "reward": 100, "generate_data": "10 - 4 = 6 (left: 5 6 6)\n5 * 6 = 30 (left: 6 30)\n30 - 6 = 24 (left: 24)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "6 / 10 = 0.6 (left: 4 5 0.6)\n4 * 5 = 20 (left: 0.6 20)\n0.6 * 20 = 12.0 (left: 12.0)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "4 + 5 = 9 (left: 6 10 9)\n10 + 9 = 19 (left: 6 19)\n6 + 19 = 25 (left: 25)"}
{"idx": 9, "input": "4 5 6 10", "reward": 0.001, "generate_data": "10 / 4 = 2.5 (left: 5 6 2.5)\n5 * 6 = 30 (left: 2.5 30)\n30 / 2.5 = 12.0 (left: 12.0)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n4 + 8 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 4 = 4 (left: 2 7 4)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 4 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 4 7 3)\n7 - 3 = 4 (left: 4 4)\n4 + 4 = 8 (left: 8)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n1 + 14 = 15 (left: 4 15)\n4 + 15 = 19 (left: 19)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 - 4 = -3 (left: 2 7 -3)\n2 + 7 = 9 (left: -3 9)\n9 - -3 = 12 (left: 12)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 - 2 = -1 (left: 4 7 -1)\n4 - -1 = 5 (left: 7 5)\n7 + 5 = 12 (left: 12)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 - 2 = 2 (left: 1 7 2)\n7 - 1 = 6 (left: 2 6)\n2 * 6 = 12 (left: 12)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n1 + 4 = 5 (left: 14 5)\n14 + 5 = 19 (left: 19)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n4 + 8 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 4 = 4 (left: 2 7 4)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n4 * 8 = 32 (left: 2 32)\n32 - 2 = 30 (left: 30)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 - 7 = -6 (left: 2 4 -6)\n2 * 4 = 8 (left: -6 8)\n8 - -6 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 1 = 6 (left: 2 4 6)\n4 * 6 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 2 = 2 (left: 4 7 2)\n7 + 2 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 2 = 5 (left: 1 4 5)\n4 * 5 = 20 (left: 1 20)\n1 + 20 = 21 (left: 21)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 9 = 10 (left: 4 10)\n410 (left: 410)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n4 * 9 = 36 (left: 1 36)\n36 - 1 = 35 (left: 35)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 - 2 = 2 (left: 1 7 2)\n1 + 7 = 8 (left: 2 8)\n2 * 8 = 16 (left: 16)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 / 4 = 0.25 (left: 2 7 0.25)\n2 + 7 = 9 (left: 0.25 9)\n9 / 0.25 = 36.0 (left: 36.0)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 - 7 = -6 (left: 2 4 -6)\n2 - -6 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 - 4 = -3 (left: 2 7 -3)\n2 + 7 = 9 (left: -3 9)\n9 - -3 = 12 (left: 12)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 1 = 6 (left: 2 4 6)\n2 * 4 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 4 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n4 + 8 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 9 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 2 = 2 (left: 4 7 2)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 4 = 8 (left: 1 7 8)\n1 + 7 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "7 - 2 = 5 (left: 1 4 5)\n1 + 5 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 4 7 3)\n7 - 3 = 4 (left: 4 4)\n4 + 4 = 8 (left: 8)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 4 = 8 (left: 1 7 8)\n7 - 1 = 6 (left: 8 6)\n8 + 6 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 4 = 8 (left: 1 7 8)\n7 + 8 = 15 (left: 1 15)\n1 + 15 = 16 (left: 16)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 4 = 6 (left: 1 7 6)\n1 + 7 = 8 (left: 6 8)\n6 + 8 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n1 + 14 = 15 (left: 4 15)\n4 + 15 = 19 (left: 19)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n1 + 14 = 15 (left: 4 15)\n4 + 15 = 19 (left: 19)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 - 7 = -6 (left: 2 4 -6)\n2 - 4 = -2 (left: -6 -2)\n-6 * -2 = 12 (left: 12)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 9 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 - 2 = -1 (left: 4 7 -1)\n7 - -1 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 + 2 = 3 (left: 11 3)\n11 + 3 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 2 = 2 (left: 4 7 2)\n4 * 7 = 28 (left: 2 28)\n28 - 2 = 26 (left: 26)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 - 1 = 3 (left: 2 7 3)\n2 * 7 = 14 (left: 3 14)\n3 * 14 = 42 (left: 42)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 9 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 - 7 = -6 (left: 2 4 -6)\n2 - -6 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "7 - 2 = 5 (left: 1 4 5)\n1 + 5 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n2 * 4 = 8 (left: 7 8)\n7 + 8 = 15 (left: 15)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n2 + 8 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 4 = 4 (left: 2 7 4)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n4 + 9 = 13 (left: 1 13)\n13 - 1 = 12 (left: 12)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 2 = 2 (left: 4 7 2)\n4 * 7 = 28 (left: 2 28)\n28 - 2 = 26 (left: 26)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 4 = 6 (left: 1 7 6)\n7 - 1 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 4 = 5 (left: 2 7 5)\n2 * 7 = 14 (left: 5 14)\n5 + 14 = 19 (left: 19)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 + 2 = 3 (left: 11 3)\n11 + 3 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n11 - 1 = 10 (left: 2 10)\n2 * 10 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 - 1 = 1 (left: 4 7 1)\n4 * 7 = 28 (left: 1 28)\n28 - 1 = 27 (left: 27)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 9 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 9 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 9 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 4 = 8 (left: 1 7 8)\n7 - 1 = 6 (left: 8 6)\n8 + 6 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 4 = 8 (left: 1 7 8)\n1 - 7 = -6 (left: 8 -6)\n8 - -6 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n2 + 4 = 6 (left: 8 6)\n8 * 6 = 48 (left: 48)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n2 * 7 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 4 = 6 (left: 1 7 6)\n7 - 1 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 4 = 6 (left: 1 7 6)\n7 - 1 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 4 7 3)\n7 + 3 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 4 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n8 - 2 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n4 + 14 = 18 (left: 1 18)\n1 + 18 = 19 (left: 19)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n14 - 1 = 13 (left: 4 13)\n4 + 13 = 17 (left: 17)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n2 + 7 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 4 = 6 (left: 1 7 6)\n1 + 7 = 8 (left: 6 8)\n6 + 8 = 14 (left: 14)\nThe answer is (2 + 4) + (1 + 7) = 12 (left: 12)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 2 = 2 (left: 4 7 2)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 - 2 = -1 (left: 4 7 -1)\n4 + 7 = 11 (left: -1 11)\n11 - -1 = 12 (left: 12)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n2 * 7 = 14 (left: 4 14)\n4 + 14 = 18 (left: 7 18)\n7 + 18 = 25 (left: 25)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 - 11 = -10 (left: 2 -10)\n2 - -10 = 12 (left: 12)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n4 * 8 = 32 (left: 2 32)\n32 - 2 = 30 (left: 30)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 4 = 6 (left: 1 7 6)\n7 - 1 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 4 = 4 (left: 2 7 4)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n1 + 4 = 5 (left: 14 5)\n14 + 5 = 19 (left: 19)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n2 * 8 = 16 (left: 4 16)\n4 * 16 = 64 (left: 64)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n4 + 14 = 18 (left: 1 18)\n1 * 18 = 18 (left: 18)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 2 = 2 (left: 4 7 2)\n4 + 7 = 11 (left: 2 11)\n2 * 11 = 22 (left: 22)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 2 = 5 (left: 1 4 5)\n1 * 4 = 4 (left: 5 4)\n5 * 4 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 4 7 3)\n7 - 3 = 4 (left: 4 4)\n4 + 4 = 8 (left: 8)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "1 - 2 = -1 (left: 4 7 -1)\n7 + -1 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n14 - 1 = 13 (left: 4 13)\n4 * 13 = 52 (left: 52)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "7 - 2 = 5 (left: 1 4 5)\n1 + 5 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 4 7 3)\n7 + 3 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 - 7 = -3 (left: 1 2 -3)\n1 - -3 = 4 (left: 2 4)\n2 * 4 = 8 (left: 8)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n2 + 7 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 9 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 2 = 2 (left: 4 7 2)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 9 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 + 2 = 3 (left: 11 3)\n11 + 3 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 4 = 6 (left: 1 7 6)\n1 + 7 = 8 (left: 6 8)\n6 + 8 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 4 = 4 (left: 2 7 4)\n2 + 4 = 6 (left: 7 6)\n7 + 6 = 13 (left: 13)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 2 = 2 (left: 4 7 2)\n7 + 2 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n4 + 8 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 4 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n2 + 8 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 * 11 = 11 (left: 2 11)\n2 * 11 = 22 (left: 22)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "2 - 7 = -5 (left: 1 4 -5)\n1 - -5 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 - 7 = -6 (left: 2 4 -6)\n2 * 4 = 8 (left: -6 8)\n8 - -6 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 4 = 4 (left: 2 7 4)\n2 * 7 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "1 + 4 = 5 (left: 2 7 5)\n7 + 5 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 9 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "7 - 2 = 5 (left: 1 4 5)\n1 + 5 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 2 = 2 (left: 4 7 2)\n4 * 7 = 28 (left: 2 28)\n28 - 2 = 26 (left: 26)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 2 = 2 (left: 4 7 2)\n4 + 7 = 11 (left: 2 11)\n2 * 11 = 22 (left: 22)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 - 1 = 1 (left: 4 7 1)\n7 + 1 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 2 = 2 (left: 4 7 2)\n4 * 7 = 28 (left: 2 28)\n28 - 2 = 26 (left: 26)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 - 1 = 1 (left: 4 7 1)\n7 + 1 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 1 = 6 (left: 2 4 6)\n6 - 2 = 4 (left: 4 4)\n4 + 4 = 8 (left: 8)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n9 - 1 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n4 + 9 = 13 (left: 1 13)\n13 - 1 = 12 (left: 12)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 + 2 = 3 (left: 11 3)\n11 + 3 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 * 7 = 28 (left: 1 2 28)\n1 * 2 = 2 (left: 28 2)\n28 - 2 = 26 (left: 26)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 - 7 = -6 (left: 2 4 -6)\n2 - -6 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 - 7 = -6 (left: 2 4 -6)\n2 - -6 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n9 - 1 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n14 - 1 = 13 (left: 4 13)\n4 * 13 = 52 (left: 52)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n2 + 4 = 6 (left: 8 6)\n8 * 6 = 48 (left: 48)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n4 + 8 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n1 - 4 = -3 (left: 14 -3)\n14 - -3 = 17 (left: 17)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 1 = 6 (left: 2 4 6)\n4 * 6 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n8 - 4 = 4 (left: 2 4)\n2 * 4 = 8 (left: 8)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 - 1 = 3 (left: 2 7 3)\n2 * 7 = 14 (left: 3 14)\n3 * 14 = 42 (left: 42)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 1 = 6 (left: 2 4 6)\n2 * 4 = 8 (left: 6 8)\n6 + 8 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n1 + 4 = 5 (left: 14 5)\n14 + 5 = 19 (left: 19)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 - 1 = 3 (left: 2 7 3)\n2 + 7 = 9 (left: 3 9)\n3 * 9 = 27 (left: 27)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 - 1 = 3 (left: 2 7 3)\n2 * 3 = 6 (left: 7 6)\n7 + 6 = 13 (left: 13)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 - 1 = 1 (left: 4 7 1)\n4 * 7 = 28 (left: 1 28)\n28 - 1 = 27 (left: 27)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 1 = 6 (left: 2 4 6)\n2 + 6 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 4 7 3)\n4 + 7 = 11 (left: 3 11)\n3 + 11 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n1 * 14 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 4 = 3 (left: 1 2 3)\n2 * 3 = 6 (left: 1 6)\n1 * 6 = 6 (left: 6)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 9 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 2 = 2 (left: 4 7 2)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "1 + 4 = 5 (left: 2 7 5)\n7 + 5 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "1 + 4 = 5 (left: 2 7 5)\n7 + 5 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "2 - 1 = 1 (left: 4 7 1)\n7 - 1 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n1 - 4 = -3 (left: 14 -3)\n14 - -3 = 17 (left: 17)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 4 7 3)\n7 - 3 = 4 (left: 4 4)\n4 + 4 = 8 (left: 8)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n2 + 4 = 6 (left: 8 6)\n8 * 6 = 48 (left: 48)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n1 * 14 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 9 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 4 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "7 - 2 = 5 (left: 1 4 5)\n1 + 5 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n2 + 7 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 * 7 = 28 (left: 1 2 28)\n1 + 2 = 3 (left: 28 3)\n28 - 3 = 25 (left: 25)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "7 - 2 = 5 (left: 1 4 5)\n1 + 5 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 - 1 = 3 (left: 2 7 3)\n2 + 7 = 9 (left: 3 9)\n3 * 9 = 27 (left: 27)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 + 2 = 3 (left: 11 3)\n11 + 3 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n4 + 8 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 4 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 4 7 3)\n7 - 3 = 4 (left: 4 4)\n4 + 4 = 8 (left: 8)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 / 2 = 0.5 (left: 4 7 0.5)\n7 / 0.5 = 14.0 (left: 4 14.0)\n4 + 14.0 = 18.0 (left: 18.0)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 4 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 - 4 = -3 (left: 2 7 -3)\n2 - 7 = -5 (left: -3 -5)\n-3 * -5 = 15 (left: 15)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "1 - 2 = -1 (left: 4 7 -1)\n7 + -1 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 4 7 3)\n7 - 3 = 4 (left: 4 4)\n4 + 4 = 8 (left: 8)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 4 = 4 (left: 2 7 4)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 - 1 = 3 (left: 2 7 3)\n7 + 3 = 10 (left: 2 10)\n2 * 10 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n4 + 8 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 9 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 2 = 2 (left: 4 7 2)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 2 = 5 (left: 1 4 5)\n1 * 5 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 2 = 2 (left: 4 7 2)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 2 = 5 (left: 1 4 5)\n1 * 5 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 4 7 3)\n7 - 3 = 4 (left: 4 4)\n4 + 4 = 8 (left: 8)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 9 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 + 2 = 3 (left: 11 3)\n11 + 3 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 4 = 6 (left: 1 7 6)\n1 + 7 = 8 (left: 6 8)\n6 + 8 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 2 = 5 (left: 1 4 5)\n5 - 1 = 4 (left: 4 4)\n4 + 4 = 8 (left: 8)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 4 = 4 (left: 2 7 4)\n2 * 7 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n2 + 4 = 6 (left: 8 6)\n8 * 6 = 48 (left: 48)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 + 2 = 3 (left: 11 3)\n11 + 3 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n2 * 7 = 14 (left: 4 14)\n4 + 14 = 18 (left: 7 18)\n7 + 18 = 25 (left: 25)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 9 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n1 * 14 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 2 = 2 (left: 4 7 2)\n7 + 2 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n4 + 9 = 13 (left: 1 13)\n13 - 1 = 12 (left: 12)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 4 = 8 (left: 1 7 8)\n7 - 1 = 6 (left: 8 6)\n8 * 6 = 48 (left: 48)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 - 7 = -6 (left: 2 4 -6)\n2 * 4 = 8 (left: -6 8)\n8 - -6 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n2 + 8 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n1 + 14 = 15 (left: 4 15)\n4 + 15 = 19 (left: 19)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n9 - 1 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 + 2 = 3 (left: 11 3)\n11 + 3 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 2 = 5 (left: 1 4 5)\n1 * 5 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n4 * 9 = 36 (left: 1 36)\n36 - 1 = 35 (left: 35)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 4 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n1 + 4 = 5 (left: 14 5)\n14 + 5 = 19 (left: 19)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 * 11 = 11 (left: 2 11)\n2 * 11 = 22 (left: 22)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 + 2 = 3 (left: 11 3)\n11 + 3 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 4 7 3)\n4 + 7 = 11 (left: 3 11)\n3 + 11 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n2 + 7 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 4 7 3)\n7 - 3 = 4 (left: 4 4)\n4 + 4 = 8 (left: 8)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n4 * 9 = 36 (left: 1 36)\n36 - 1 = 35 (left: 35)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n1 + 14 = 15 (left: 4 15)\n4 + 15 = 19 (left: 19)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 - 1 = 1 (left: 4 7 1)\n7 + 1 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 1 = 6 (left: 2 4 6)\n2 * 4 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 2 = 5 (left: 1 4 5)\n5 - 1 = 4 (left: 4 4)\n4 + 4 = 8 (left: 8)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 4 = 3 (left: 1 2 3)\n1 + 2 = 3 (left: 3 3)\n3 * 3 = 9 (left: 9)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 - 7 = -6 (left: 2 4 -6)\n4 - -6 = 10 (left: 2 10)\n2 * 10 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n9 - 1 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 4 = 4 (left: 2 7 4)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 4 = 4 (left: 2 7 4)\n7 + 4 = 11 (left: 2 11)\n2 * 11 = 22 (left: 22)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n4 * 8 = 32 (left: 2 32)\n32 - 2 = 30 (left: 30)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 4 7 3)\n7 + 3 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 4 = 4 (left: 2 7 4)\n2 + 7 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 9 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 2 = 2 (left: 4 7 2)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "1 - 2 = -1 (left: 4 7 -1)\n7 + -1 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "7 - 2 = 5 (left: 1 4 5)\n1 + 5 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 2 = 5 (left: 1 4 5)\n1 * 5 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n2 / 4 = 0.5 (left: 8 0.5)\n8 / 0.5 = 16.0 (left: 16.0)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "7 - 2 = 5 (left: 1 4 5)\n1 + 5 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 / 7 = 0.142857 (left: 2 4 0.142857)\n2 + 4 = 6 (left: 0.142857 6)\n6 / 0.142857 = 42.0 (left: 42.0)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 - 1 = 3 (left: 2 7 3)\n7 + 3 = 10 (left: 2 10)\n2 * 10 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 9 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 1 = 6 (left: 2 4 6)\n2 * 4 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 * 7 = 28 (left: 1 2 28)\n1 * 28 = 28 (left: 2 28)\n28 - 2 = 26 (left: 26)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 4 = 6 (left: 1 7 6)\n7 - 1 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 - 1 = 1 (left: 4 7 1)\n7 + 1 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n4 * 7 = 28 (left: 2 28)\n28 - 2 = 26 (left: 26)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n1 + 14 = 15 (left: 4 15)\n4 + 15 = 19 (left: 19)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n1 + 4 = 5 (left: 14 5)\n14 + 5 = 19 (left: 19)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 4 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n8 - 2 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 4 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 - 7 = -6 (left: 2 4 -6)\n2 - -6 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "7 - 2 = 5 (left: 1 4 5)\n1 + 5 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 - 1 = 1 (left: 4 7 1)\n7 + 1 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 1 = 6 (left: 2 4 6)\n6 / 2 = 3.0 (left: 4 3.0)\n4 * 3.0 = 12.0 (left: 12.0)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n9 / 1 = 9.0 (left: 4 9.0)\n4 * 9.0 = 36.0 (left: 36.0)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 4 7 3)\n7 - 3 = 4 (left: 4 4)\n4 + 4 = 8 (left: 8)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "7 - 2 = 5 (left: 1 4 5)\n1 + 5 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 4 = 6 (left: 1 7 6)\n7 - 1 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 - 7 = -6 (left: 2 4 -6)\n2 * -6 = -12 (left: 4 -12)\n4 - -12 = 16 (left: 16)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n2 + 7 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n2 + 11 = 13 (left: 1 13)\n13 - 1 = 12 (left: 12)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 - 2 = 2 (left: 1 7 2)\n7 - 1 = 6 (left: 2 6)\n2 * 6 = 12 (left: 12)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 + 2 = 3 (left: 11 3)\n11 + 3 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 4 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 4 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 - 2 = -1 (left: 4 7 -1)\n7 - -1 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n8 - 2 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 1 = 6 (left: 2 4 6)\n2 * 4 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 9 = 10 (left: 4 10)\n4 + 10 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 4 = 4 (left: 2 7 4)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 4 = 4 (left: 2 7 4)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n1 * 14 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n4 * 8 = 32 (left: 2 32)\n32 - 2 = 30 (left: 30)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n2 + 11 = 13 (left: 1 13)\n13 - 1 = 12 (left: 12)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n11 - 1 = 10 (left: 2 10)\n2 * 10 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 1 = 6 (left: 2 4 6)\n4 * 6 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 4 = 4 (left: 2 7 4)\n2 * 7 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 4 = 5 (left: 2 7 5)\n2 * 5 = 10 (left: 7 10)\n7 + 10 = 17 (left: 17)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 1 = 6 (left: 2 4 6)\n2 * 6 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n1 + 4 = 5 (left: 14 5)\n14 + 5 = 19 (left: 19)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 4 = 4 (left: 2 7 4)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n4 + 8 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "4 - 1 = 3 (left: 2 7 3)\n7 - 2 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 + 7 = 9 (left: 1 4 9)\n1 + 4 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 1 = 6 (left: 2 4 6)\n2 * 6 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 7 = 8 (left: 2 4 8)\n4 - 2 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 * 7 = 7 (left: 2 4 7)\n7 - 2 = 5 (left: 4 5)\n4 * 5 = 20 (left: 20)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 - 4 = -3 (left: 2 7 -3)\n2 * 7 = 14 (left: -3 14)\n14 - -3 = 17 (left: 17)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 - 1 = 1 (left: 4 7 1)\n7 + 1 = 8 (left: 4 8)\n4 * 8 = 32 (left: 32)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n14 - 4 = 10 (left: 1 10)\n1 * 10 = 10 (left: 10)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "1 + 2 = 3 (left: 4 7 3)\n4 + 7 = 11 (left: 3 11)\n3 + 11 = 14 (left: 14)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "7 - 1 = 6 (left: 2 4 6)\n4 * 6 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "7 - 2 = 5 (left: 1 4 5)\n1 + 5 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "7 - 2 = 5 (left: 1 4 5)\n1 + 5 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "7 - 2 = 5 (left: 1 4 5)\n1 + 5 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 100, "generate_data": "4 + 7 = 11 (left: 1 2 11)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 10, "input": "1 2 4 7", "reward": 0.001, "generate_data": "2 * 7 = 14 (left: 1 4 14)\n1 + 14 = 15 (left: 4 15)\n4 + 15 = 19 (left: 19)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 5 = -3 (left: 8 11 -3)\n11 - -3 = 14 (left: 8 14)\n8 + 14 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 / 8 = 0.25 (left: 16 0.25)\n16 / 0.25 = 64.0 (left: 64.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 5 = 10 (left: 8 11 10)\n11 - 10 = 1 (left: 8 1)\n8 * 1 = 8 (left: 8)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 / 8 = 0.25 (left: 16 0.25)\n16 * 0.25 = 4.0 (left: 4.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 11 = -9 (left: 5 8 -9)\n8 + -9 = 1 (left: 5 1)\n5 * 1 = 5 (left: 5)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 5 = 10 (left: 8 11 10)\n11 + 10 = 21 (left: 8 21)\n8 + 21 = 29 (left: 29)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 * 8 = 16 (left: 16 16)\n16 + 16 = 32 (left: 32)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n2 - 13 = -11 (left: 11 -11)\n11 - -11 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 - 8 = -6 (left: 16 -6)\n16 - -6 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n8 + 16 = 24 (left: 2 24)\n2 * 24 = 48 (left: 48)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 2 = 3 (left: 8 11 3)\n11 + 3 = 14 (left: 8 14)\n8 + 14 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 8 = -6 (left: 5 11 -6)\n5 + 11 = 16 (left: -6 16)\n16 - -6 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 11 = 13 (left: 5 8 13)\n13 - 8 = 5 (left: 5 5)\n5 + 5 = 10 (left: 10)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "11 - 2 = 9 (left: 5 8 9)\n5 + 8 = 13 (left: 9 13)\n9 + 13 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 8 = -3 (left: 2 11 -3)\n11 - -3 = 14 (left: 2 14)\n2 * 14 = 28 (left: 28)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n5 - 2 = 3 (left: 19 3)\n19 + 3 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 5 = 7 (left: 8 11 7)\n8 + 7 = 15 (left: 11 15)\n11 + 15 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n5 - 2 = 3 (left: 19 3)\n19 + 3 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 + 8 = 10 (left: 16 10)\n16 + 10 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 8 = 10 (left: 5 11 10)\n11 + 10 = 21 (left: 5 21)\n5 + 21 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n8 + 16 = 24 (left: 2 24)\n2 * 24 = 48 (left: 48)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 5 = 7 (left: 8 11 7)\n8 + 7 = 15 (left: 11 15)\n11 + 15 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 - 11 = -3 (left: 2 5 -3)\n2 * 5 = 10 (left: -3 10)\n10 - -3 = 13 (left: 13)"}
{"idx": 11, "input": "2 5 8 11", "reward": 100, "generate_data": "8 / 2 = 4.0 (left: 5 11 4.0)\n11 - 5 = 6 (left: 4.0 6)\n4.0 * 6 = 24.0 (left: 24.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 - 5 = 3 (left: 2 11 3)\n2 * 11 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n5 - 2 = 3 (left: 19 3)\n19 + 3 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n8 + 16 = 24 (left: 2 24)\n2 * 24 = 48 (left: 48)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 8 = -3 (left: 2 11 -3)\n11 - -3 = 14 (left: 2 14)\n2 * 14 = 28 (left: 28)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 8 = -6 (left: 5 11 -6)\n-6 - 11 = -17 (left: 5 -17)\n5 - -17 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 100, "generate_data": "11 - 5 = 6 (left: 2 8 6)\n8 / 2 = 4.0 (left: 6 4.0)\n6 * 4.0 = 24.0 (left: 24.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n8 + 16 = 24 (left: 2 24)\n2 * 24 = 48 (left: 48)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 - 2 = 6 (left: 5 11 6)\n11 - 5 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 / 8 = 0.25 (left: 16 0.25)\n16 * 0.25 = 4.0 (left: 4.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "11 - 5 = 6 (left: 2 8 6)\n2 + 8 = 10 (left: 6 10)\n6 + 10 = 16 (left: 16)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 8 = 10 (left: 5 11 10)\n10 - 5 = 5 (left: 11 5)\n11 + 5 = 16 (left: 16)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 5 = 10 (left: 8 11 10)\n11 + 10 = 21 (left: 8 21)\n8 + 21 = 29 (left: 29)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 8 = 10 (left: 5 11 10)\n11 + 10 = 21 (left: 5 21)\n5 + 21 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 11 = -9 (left: 5 8 -9)\n-9 - 5 = -14 (left: 8 -14)\n8 - -14 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 2 = 3 (left: 8 11 3)\n8 + 11 = 19 (left: 3 19)\n3 + 19 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 2 = 3 (left: 8 11 3)\n8 + 3 = 11 (left: 11 11)\n11 + 11 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "11 - 8 = 3 (left: 2 5 3)\n5 - 2 = 3 (left: 3 3)\n3 * 3 = 9 (left: 9)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 2 = 3 (left: 8 11 3)\n8 + 3 = 11 (left: 11 11)\n11 + 11 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n2 - 19 = -17 (left: 5 -17)\n5 - -17 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 11 = 13 (left: 5 8 13)\n8 + 13 = 21 (left: 5 21)\n5 + 21 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 11 = 22 (left: 5 8 22)\n22 - 5 = 17 (left: 8 17)\n8 + 17 = 25 (left: 25)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n2 - 13 = -11 (left: 11 -11)\n11 - -11 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 / 8 = 0.25 (left: 16 0.25)\n16 * 0.25 = 4.0 (left: 4.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 11 = 22 (left: 5 8 22)\n5 / 8 = 0.625 (left: 22 0.625)\n22 / 0.625 = 36.0 (left: 36.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n5 + 19 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 5 = -3 (left: 8 11 -3)\n8 + 11 = 19 (left: -3 19)\n19 - -3 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 5 = -3 (left: 8 11 -3)\n8 + 11 = 19 (left: -3 19)\n19 - -3 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 11 = -9 (left: 5 8 -9)\n8 - -9 = 17 (left: 5 17)\n5 + 17 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 8 = -3 (left: 2 11 -3)\n11 - -3 = 14 (left: 2 14)\n2 + 14 = 16 (left: 16)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n2 - 19 = -17 (left: 5 -17)\n5 - -17 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 11 = 13 (left: 5 8 13)\n8 + 13 = 21 (left: 5 21)\n5 + 21 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 * 11 = 88 (left: 2 5 88)\n88 / 5 = 17.6 (left: 2 17.6)\n2 * 17.6 = 35.2 (left: 35.2)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n19 - 2 = 17 (left: 5 17)\n5 + 17 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n2 - 13 = -11 (left: 11 -11)\n11 - -11 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 11 = 22 (left: 5 8 22)\n8 - 5 = 3 (left: 22 3)\n22 + 3 = 25 (left: 25)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 11 = -9 (left: 5 8 -9)\n8 * -9 = -72 (left: 5 -72)\n5 - -72 = 77 (left: 77)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 8 = -6 (left: 5 11 -6)\n5 + 11 = 16 (left: -6 16)\n16 - -6 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 8 = -3 (left: 2 11 -3)\n2 - 11 = -9 (left: -3 -9)\n-3 * -9 = 27 (left: 27)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n11 - 2 = 9 (left: 13 9)\n13 + 9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 11 = 22 (left: 5 8 22)\n8 - 5 = 3 (left: 22 3)\n22 + 3 = 25 (left: 25)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 11 = -9 (left: 5 8 -9)\n5 + 8 = 13 (left: -9 13)\n13 - -9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n5 + 19 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 11 = -9 (left: 5 8 -9)\n5 + 8 = 13 (left: -9 13)\n13 - -9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 - 8 = -6 (left: 16 -6)\n16 - -6 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 - 16 = -14 (left: 8 -14)\n8 - -14 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n13 - 2 = 11 (left: 11 11)\n11 + 11 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 - 2 = 6 (left: 5 11 6)\n5 + 11 = 16 (left: 6 16)\n6 + 16 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 2 = 3 (left: 8 11 3)\n8 + 11 = 19 (left: 3 19)\n3 + 19 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n2 + 13 = 15 (left: 11 15)\n11 + 15 = 26 (left: 26)\n26 - 11 = 15 (left: 15)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n2 + 11 = 13 (left: 13 13)\n13 + 13 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 11 = 22 (left: 5 8 22)\n5 - 8 = -3 (left: 22 -3)\n22 - -3 = 25 (left: 25)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 8 = -6 (left: 5 11 -6)\n5 - 11 = -6 (left: -6 6)\n6 - -6 = 12 (left: 12)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 11 = 22 (left: 5 8 22)\n8 - 5 = 3 (left: 22 3)\n22 + 3 = 25 (left: 25)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 8 = -3 (left: 2 11 -3)\n2 + 11 = 13 (left: -3 13)\n13 - -3 = 16 (left: 16)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 11 = -6 (left: 2 8 -6)\n8 - -6 = 14 (left: 2 14)\n2 + 14 = 16 (left: 16)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 5 = 10 (left: 8 11 10)\n11 + 10 = 21 (left: 8 21)\n8 + 21 = 29 (left: 29)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 8 = 16 (left: 5 11 16)\n16 - 11 = 5 (left: 5 11)\n5 + 11 = 16 (left: 16)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 5 = -3 (left: 8 11 -3)\n11 - -3 = 14 (left: 8 14)\n8 + 14 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 11 = -9 (left: 5 8 -9)\n5 + 8 = 13 (left: -9 13)\n13 - -9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 8 = -6 (left: 5 11 -6)\n11 + -6 = 5 (left: 5 5)\n5 + 5 = 10 (left: 10)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n8 / 2 = 4.0 (left: 16 4.0)\n16 + 4.0 = 20.0 (left: 20.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n2 + 11 = 13 (left: 13 13)\n13 + 13 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 8 = 16 (left: 5 11 16)\n16 - 5 = 11 (left: 11 11)\n11 + 11 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n11 - 2 = 9 (left: 13 9)\n13 + 9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "11 - 8 = 3 (left: 2 5 3)\n5 + 3 = 8 (left: 2 8)\n2 * 8 = 16 (left: 16)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n2 + 19 = 21 (left: 5 21)\n5 + 21 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 5 = -3 (left: 8 11 -3)\n8 - -3 = 11 (left: 11 11)\n11 + 11 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "11 - 5 = 6 (left: 2 8 6)\n8 - 2 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 5 = 10 (left: 8 11 10)\n8 + 11 = 19 (left: 10 19)\n10 + 19 = 29 (left: 29)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 2 = 3 (left: 8 11 3)\n8 + 3 = 11 (left: 11 11)\n11 + 11 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n16 - 2 = 14 (left: 8 14)\n8 + 14 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 5 = -3 (left: 8 11 -3)\n8 - -3 = 11 (left: 11 11)\n11 + 11 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 5 = 10 (left: 8 11 10)\n11 + 10 = 21 (left: 8 21)\n8 + 21 = 29 (left: 29)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 2 = 3 (left: 8 11 3)\n8 + 11 = 19 (left: 3 19)\n3 + 19 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 - 2 = 6 (left: 5 11 6)\n11 + 6 = 17 (left: 5 17)\n5 + 17 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 8 = 16 (left: 5 11 16)\n5 + 16 = 21 (left: 11 21)\n11 + 21 = 32 (left: 32)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 5 = 7 (left: 8 11 7)\n8 + 11 = 19 (left: 7 19)\n7 + 19 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 8 = -6 (left: 5 11 -6)\n-6 - 5 = -11 (left: 11 -11)\n11 - -11 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 11 = -9 (left: 5 8 -9)\n5 - -9 = 14 (left: 8 14)\n8 + 14 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n2 - 19 = -17 (left: 5 -17)\n5 - -17 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n11 + 13 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 - 2 = 6 (left: 5 11 6)\n5 + 11 = 16 (left: 6 16)\n6 + 16 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n8 + 16 = 24 (left: 2 24)\n2 * 24 = 48 (left: 48)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 11 = 22 (left: 5 8 22)\n8 + 22 = 30 (left: 5 30)\n30 - 5 = 25 (left: 25)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 11 = -9 (left: 5 8 -9)\n-9 - 8 = -17 (left: 5 -17)\n5 - -17 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 100, "generate_data": "11 - 5 = 6 (left: 2 8 6)\n8 / 2 = 4.0 (left: 6 4.0)\n6 * 4.0 = 24.0 (left: 24.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 100, "generate_data": "11 - 5 = 6 (left: 2 8 6)\n8 * 6 = 48 (left: 2 48)\n48 / 2 = 24.0 (left: 24.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n5 + 19 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n8 + 16 = 24 (left: 2 24)\n2 * 24 = 48 (left: 48)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 11 = 13 (left: 5 8 13)\n8 + 13 = 21 (left: 5 21)\n5 + 21 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 100, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 * 16 = 32 (left: 8 32)\n32 - 8 = 24 (left: 24)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 / 8 = 0.25 (left: 16 0.25)\n16 / 0.25 = 64.0 (left: 64.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 - 5 = 3 (left: 2 11 3)\n2 * 3 = 6 (left: 11 6)\n11 * 6 = 66 (left: 66)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 - 5 = 3 (left: 2 11 3)\n2 * 11 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 8 = 16 (left: 5 11 16)\n11 - 5 = 6 (left: 16 6)\n16 + 6 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n11 - 2 = 9 (left: 13 9)\n13 + 9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 11 = -9 (left: 5 8 -9)\n8 + -9 = 1 (left: 5 1)\n5 * 1 = 5 (left: 5)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n5 + 19 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 100, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 * 16 = 32 (left: 8 32)\n32 - 8 = 24 (left: 24)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n5 + 19 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 8 = -6 (left: 5 11 -6)\n11 - -6 = 17 (left: 5 17)\n5 + 17 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 11 = -9 (left: 5 8 -9)\n5 + 8 = 13 (left: -9 13)\n13 - -9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n11 - 2 = 9 (left: 13 9)\n13 + 9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 5 = 10 (left: 8 11 10)\n11 + 10 = 21 (left: 8 21)\n8 + 21 = 29 (left: 29)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n2 + 5 = 7 (left: 19 7)\n19 + 7 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n11 - 2 = 9 (left: 13 9)\n13 + 9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n11 - 2 = 9 (left: 13 9)\n13 + 9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n2 - 5 = -3 (left: 19 -3)\n19 - -3 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n2 - 5 = -3 (left: 19 -3)\n19 - -3 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n5 - 2 = 3 (left: 19 3)\n19 + 3 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 11 = -9 (left: 5 8 -9)\n5 - -9 = 14 (left: 8 14)\n8 + 14 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 100, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 * 16 = 32 (left: 8 32)\n32 - 8 = 24 (left: 24)"}
{"idx": 11, "input": "2 5 8 11", "reward": 100, "generate_data": "11 - 5 = 6 (left: 2 8 6)\n2 / 8 = 0.25 (left: 6 0.25)\n6 / 0.25 = 24.0 (left: 24.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 11 = -6 (left: 2 8 -6)\n2 - -6 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 / 8 = 0.25 (left: 16 0.25)\n16 / 0.25 = 64.0 (left: 64.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 8 = 10 (left: 5 11 10)\n11 + 10 = 21 (left: 5 21)\n5 + 21 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 2 = 3 (left: 8 11 3)\n8 + 11 = 19 (left: 3 19)\n3 + 19 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 / 8 = 0.25 (left: 16 0.25)\n16 / 0.25 = 64.0 (left: 64.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 11 = -6 (left: 2 8 -6)\n2 - 8 = -6 (left: -6 -6)\n-6 * -6 = 36 (left: 36)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "11 - 8 = 3 (left: 2 5 3)\n5 + 3 = 8 (left: 2 8)\n2 * 8 = 16 (left: 16)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 11 = 22 (left: 5 8 22)\n5 - 8 = -3 (left: 22 -3)\n22 - -3 = 25 (left: 25)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 8 = -6 (left: 5 11 -6)\n-6 - 11 = -17 (left: 5 -17)\n5 - -17 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n5 + 19 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n11 + 13 = 24 (left: 2 24)\n2 * 24 = 48 (left: 48)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "11 - 5 = 6 (left: 2 8 6)\n2 * 8 = 16 (left: 6 16)\n6 + 16 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 11 = -6 (left: 2 8 -6)\n-6 - 8 = -14 (left: 2 -14)\n2 - -14 = 16 (left: 16)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n2 + 5 = 7 (left: 19 7)\n19 + 7 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 5 = -3 (left: 8 11 -3)\n11 + -3 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "11 - 2 = 9 (left: 5 8 9)\n5 + 9 = 14 (left: 8 14)\n8 + 14 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n2 - 11 = -9 (left: 13 -9)\n13 - -9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 8 = -6 (left: 5 11 -6)\n5 - 11 = -6 (left: -6 6)\n6 - -6 = 12 (left: 12)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n2 + 5 = 7 (left: 19 7)\n19 + 7 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n2 - 5 = -3 (left: 19 -3)\n19 - -3 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 11 = 13 (left: 5 8 13)\n8 + 13 = 21 (left: 5 21)\n5 + 21 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 2 = 3 (left: 8 11 3)\n11 - 8 = 3 (left: 3 3)\n3 * 3 = 9 (left: 9)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 11 = 13 (left: 5 8 13)\n8 + 13 = 21 (left: 5 21)\n5 + 21 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 8 = -3 (left: 2 11 -3)\n11 - -3 = 14 (left: 2 14)\n2 + 14 = 16 (left: 16)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 - 2 = 6 (left: 5 11 6)\n11 + 6 = 17 (left: 5 17)\n5 + 17 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 11 = 22 (left: 5 8 22)\n8 / 5 = 1.6 (left: 22 1.6)\n22 * 1.6 = 36.0 (left: 36.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 8 = -6 (left: 5 11 -6)\n5 - 11 = -6 (left: -6 6)\n6 - -6 = 12 (left: 12)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 / 8 = 0.25 (left: 16 0.25)\n16 / 0.25 = 64.0 (left: 64.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "11 - 8 = 3 (left: 2 5 3)\n2 + 5 = 7 (left: 3 7)\n3 * 7 = 21 (left: 21)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 5 = 10 (left: 8 11 10)\n11 - 8 = 3 (left: 10 3)\n10 + 3 = 13 (left: 13)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 2 = 3 (left: 8 11 3)\n11 - 8 = 3 (left: 3 3)\n3 * 3 = 9 (left: 9)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 5 = 7 (left: 8 11 7)\n11 + 7 = 18 (left: 8 18)\n8 + 18 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 5 = -3 (left: 8 11 -3)\n8 - -3 = 11 (left: 11 11)\n11 + 11 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 - 5 = 3 (left: 2 11 3)\n2 * 11 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 2 = 3 (left: 8 11 3)\n11 + 3 = 14 (left: 8 14)\n8 + 14 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n5 - 2 = 3 (left: 19 3)\n19 + 3 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 5 = 10 (left: 8 11 10)\n11 + 10 = 21 (left: 8 21)\n8 + 21 = 29 (left: 29)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 + 16 = 18 (left: 8 18)\n8 + 18 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 11 = 13 (left: 5 8 13)\n5 + 13 = 18 (left: 8 18)\n8 + 18 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 8 = 10 (left: 5 11 10)\n11 + 10 = 21 (left: 5 21)\n5 + 21 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 100, "generate_data": "11 - 5 = 6 (left: 2 8 6)\n8 / 2 = 4.0 (left: 6 4.0)\n6 * 4.0 = 24.0 (left: 24.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n5 + 19 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 5 = -3 (left: 8 11 -3)\n8 + 11 = 19 (left: -3 19)\n19 - -3 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 11 = -6 (left: 2 8 -6)\n2 * 8 = 16 (left: -6 16)\n16 - -6 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n2 - 13 = -11 (left: 11 -11)\n11 - -11 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n2 - 11 = -9 (left: 13 -9)\n13 - -9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 5 = 10 (left: 8 11 10)\n11 - 8 = 3 (left: 10 3)\n10 + 3 = 13 (left: 13)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "11 - 2 = 9 (left: 5 8 9)\n9 / 5 = 1.8 (left: 8 1.8)\n8 * 1.8 = 14.0 (left: 14.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 11 = 13 (left: 5 8 13)\n5 + 13 = 18 (left: 8 18)\n8 + 18 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 / 8 = 0.25 (left: 16 0.25)\n16 * 0.25 = 4.0 (left: 4.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n8 + 16 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 + 16 = 18 (left: 8 18)\n8 + 18 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n11 + 13 = 24 (left: 2 24)\n2 * 24 = 48 (left: 48)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 * 8 = 16 (left: 16 16)\n16 + 16 = 32 (left: 32)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n11 + 13 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 11 = -9 (left: 5 8 -9)\n5 - -9 = 14 (left: 8 14)\n8 + 14 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n2 - 11 = -9 (left: 13 -9)\n13 - -9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n2 + 11 = 13 (left: 13 13)\n13 + 13 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 11 = 22 (left: 5 8 22)\n5 - 22 = -17 (left: 8 -17)\n8 - -17 = 25 (left: 25)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n5 + 19 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 - 5 = 3 (left: 2 11 3)\n2 + 3 = 5 (left: 11 5)\n11 * 5 = 55 (left: 55)"}
{"idx": 11, "input": "2 5 8 11", "reward": 100, "generate_data": "8 / 2 = 4.0 (left: 5 11 4.0)\n11 - 5 = 6 (left: 4.0 6)\n4.0 * 6 = 24.0 (left: 24.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n2 + 11 = 13 (left: 13 13)\n13 + 13 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 - 5 = 3 (left: 2 11 3)\n2 * 11 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n16 - 2 = 14 (left: 8 14)\n8 + 14 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 11 = 13 (left: 5 8 13)\n5 + 13 = 18 (left: 8 18)\n8 + 18 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 8 = -3 (left: 2 11 -3)\n11 - -3 = 14 (left: 2 14)\n2 + 14 = 16 (left: 16)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n5 + 19 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 - 5 = 3 (left: 2 11 3)\n2 * 11 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 - 2 = 6 (left: 5 11 6)\n5 + 6 = 11 (left: 11 11)\n11 + 11 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n2 + 19 = 21 (left: 5 21)\n5 + 21 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 11 = -6 (left: 2 8 -6)\n2 * 8 = 16 (left: -6 16)\n16 - -6 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 5 = -3 (left: 8 11 -3)\n-3 - 11 = -14 (left: 8 -14)\n8 - -14 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n11 - 2 = 9 (left: 13 9)\n13 + 9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n8 / 2 = 4.0 (left: 16 4.0)\n16 + 4.0 = 20.0 (left: 20.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 * 8 = 16 (left: 16 16)\n16 + 16 = 32 (left: 32)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 11 = 13 (left: 5 8 13)\n5 + 8 = 13 (left: 13 13)\n13 + 13 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 11 = 22 (left: 5 8 22)\n8 - 5 = 3 (left: 22 3)\n22 + 3 = 25 (left: 25)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n2 - 5 = -3 (left: 19 -3)\n19 - -3 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 8 = 16 (left: 5 11 16)\n5 + 16 = 21 (left: 11 21)\n11 + 21 = 32 (left: 32)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 11 = 13 (left: 5 8 13)\n8 + 13 = 21 (left: 5 21)\n5 + 21 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 11 = 22 (left: 5 8 22)\n8 + 22 = 30 (left: 5 30)\n30 - 5 = 25 (left: 25)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n2 - 11 = -9 (left: 13 -9)\n13 - -9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 11 = -9 (left: 5 8 -9)\n5 + 8 = 13 (left: -9 13)\n13 - -9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 11 = 22 (left: 5 8 22)\n5 + 8 = 13 (left: 22 13)\n22 + 13 = 35 (left: 35)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n19 - 2 = 17 (left: 5 17)\n5 + 17 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 5 = -3 (left: 8 11 -3)\n-3 - 11 = -14 (left: 8 -14)\n8 - -14 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 11 = 13 (left: 5 8 13)\n5 + 13 = 18 (left: 8 18)\n8 + 18 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 11 = -9 (left: 5 8 -9)\n-9 - 8 = -17 (left: 5 -17)\n5 - -17 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 11 = 13 (left: 5 8 13)\n13 - 8 = 5 (left: 5 5)\n5 + 5 = 10 (left: 10)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 11 = -9 (left: 5 8 -9)\n5 - 8 = -3 (left: -9 -3)\n-9 * -3 = 27 (left: 27)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 11 = 22 (left: 5 8 22)\n5 - 22 = -17 (left: 8 -17)\n8 - -17 = 25 (left: 25)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n11 - 2 = 9 (left: 13 9)\n13 + 9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n11 - 2 = 9 (left: 13 9)\n13 + 9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 - 2 = 6 (left: 5 11 6)\n11 - 5 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 - 5 = 3 (left: 2 11 3)\n2 * 3 = 6 (left: 11 6)\n11 * 6 = 66 (left: 66)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n2 - 13 = -11 (left: 11 -11)\n11 - -11 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n2 - 19 = -17 (left: 5 -17)\n5 - -17 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 11 = -6 (left: 2 8 -6)\n8 - -6 = 14 (left: 2 14)\n2 * 14 = 28 (left: 28)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 / 8 = 0.25 (left: 16 0.25)\n16 * 0.25 = 4.0 (left: 4.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n2 - 19 = -17 (left: 5 -17)\n5 - -17 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 11 = 13 (left: 5 8 13)\n8 + 13 = 21 (left: 5 21)\n5 + 21 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 - 5 = 3 (left: 2 11 3)\n2 + 11 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n5 + 19 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n5 - 2 = 3 (left: 19 3)\n19 + 3 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 5 = 7 (left: 8 11 7)\n11 + 7 = 18 (left: 8 18)\n8 + 18 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n11 + 13 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 5 = -3 (left: 8 11 -3)\n8 + 11 = 19 (left: -3 19)\n19 - -3 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n2 * 5 = 10 (left: 19 10)\n19 + 10 = 29 (left: 29)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 11 = -9 (left: 5 8 -9)\n5 + 8 = 13 (left: -9 13)\n13 - -9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 5 = -3 (left: 8 11 -3)\n8 + 11 = 19 (left: -3 19)\n19 - -3 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n8 + 16 = 24 (left: 2 24)\n2 * 24 = 48 (left: 48)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n19 - 2 = 17 (left: 5 17)\n5 + 17 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "11 - 8 = 3 (left: 2 5 3)\n2 + 5 = 7 (left: 3 7)\n3 * 7 = 21 (left: 21)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 8 = -6 (left: 5 11 -6)\n5 + 11 = 16 (left: -6 16)\n16 - -6 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 11 = 22 (left: 5 8 22)\n22 - 5 = 17 (left: 8 17)\n8 + 17 = 25 (left: 25)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 - 5 = 3 (left: 2 11 3)\n2 * 11 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n2 - 5 = -3 (left: 19 -3)\n19 - -3 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "11 - 8 = 3 (left: 2 5 3)\n2 * 5 = 10 (left: 3 10)\n3 * 10 = 30 (left: 30)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 5 = -3 (left: 8 11 -3)\n8 - -3 = 11 (left: 11 11)\n11 + 11 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n8 + 16 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 / 2 = 4.0 (left: 5 11 4.0)\n11 + 4.0 = 15.0 (left: 5 15.0)\n5 + 15.0 = 20.0 (left: 20.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "11 - 2 = 9 (left: 5 8 9)\n5 + 8 = 13 (left: 9 13)\n9 + 13 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n2 - 19 = -17 (left: 5 -17)\n5 - -17 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n8 / 2 = 4.0 (left: 16 4.0)\n16 + 4.0 = 20.0 (left: 20.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 5 = 10 (left: 8 11 10)\n8 + 11 = 19 (left: 10 19)\n10 + 19 = 29 (left: 29)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n5 - 2 = 3 (left: 19 3)\n19 + 3 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 5 = -3 (left: 8 11 -3)\n-3 - 8 = -11 (left: 11 -11)\n11 - -11 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n2 + 19 = 21 (left: 5 21)\n5 + 21 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 * 8 = 16 (left: 5 11 16)\n16 - 11 = 5 (left: 5 5)\n5 + 5 = 10 (left: 10)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 11 = -9 (left: 5 8 -9)\n5 + 8 = 13 (left: -9 13)\n13 - -9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 - 5 = 3 (left: 2 11 3)\n11 - 2 = 9 (left: 3 9)\n3 * 9 = 27 (left: 27)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "11 - 2 = 9 (left: 5 8 9)\n8 + 9 = 17 (left: 5 17)\n5 + 17 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "11 - 2 = 9 (left: 5 8 9)\n5 + 8 = 13 (left: 9 13)\n9 + 13 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n16 - 2 = 14 (left: 8 14)\n8 + 14 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 11 = 13 (left: 5 8 13)\n5 + 13 = 18 (left: 8 18)\n8 + 18 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 8 = 10 (left: 5 11 10)\n11 + 10 = 21 (left: 5 21)\n5 + 21 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 5 = 7 (left: 8 11 7)\n8 + 7 = 15 (left: 11 15)\n11 + 15 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n8 - 2 = 6 (left: 16 6)\n16 + 6 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 100, "generate_data": "8 / 2 = 4.0 (left: 5 11 4.0)\n11 - 5 = 6 (left: 4.0 6)\n4.0 * 6 = 24.0 (left: 24.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 8 = 10 (left: 5 11 10)\n11 + 10 = 21 (left: 5 21)\n5 + 21 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "11 - 8 = 3 (left: 2 5 3)\n2 * 5 = 10 (left: 3 10)\n3 * 10 = 30 (left: 30)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 11 = 13 (left: 5 8 13)\n13 - 8 = 5 (left: 5 5)\n5 + 5 = 10 (left: 10)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 8 = -3 (left: 2 11 -3)\n2 - 11 = -9 (left: -3 -9)\n-3 * -9 = 27 (left: 27)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n2 - 19 = -17 (left: 5 -17)\n5 - -17 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n5 + 19 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n11 + 13 = 24 (left: 2 24)\n2 * 24 = 48 (left: 48)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 5 = 7 (left: 8 11 7)\n11 + 7 = 18 (left: 8 18)\n8 + 18 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 11 = -9 (left: 5 8 -9)\n5 - -9 = 14 (left: 8 14)\n8 + 14 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 11 = -9 (left: 5 8 -9)\n-9 - 8 = -17 (left: 5 -17)\n5 - -17 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n2 + 5 = 7 (left: 19 7)\n19 + 7 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 / 2 = 4.0 (left: 5 11 4.0)\n5 + 4.0 = 9.0 (left: 11 9.0)\n11 + 9.0 = 20.0 (left: 20.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 11 = -9 (left: 5 8 -9)\n5 + 8 = 13 (left: -9 13)\n13 - -9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 5 = 7 (left: 8 11 7)\n8 + 7 = 15 (left: 11 15)\n11 + 15 = 26 (left: 26)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 11 = 16 (left: 2 8 16)\n2 / 8 = 0.25 (left: 16 0.25)\n16 * 0.25 = 4.0 (left: 4.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n2 - 5 = -3 (left: 19 -3)\n19 - -3 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 11 = -9 (left: 5 8 -9)\n5 + 8 = 13 (left: -9 13)\n13 - -9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 - 5 = -3 (left: 8 11 -3)\n-3 - 8 = -11 (left: 11 -11)\n11 - -11 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "11 / 5 = 2.2 (left: 2 8 2.2)\n2 + 8 = 10 (left: 2.2 10)\n2.2 * 10 = 22.0 (left: 22.0)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "2 + 11 = 13 (left: 5 8 13)\n13 - 8 = 5 (left: 5 5)\n5 + 5 = 10 (left: 10)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 + 8 = 13 (left: 2 11 13)\n2 - 11 = -9 (left: 13 -9)\n13 - -9 = 22 (left: 22)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "5 - 8 = -3 (left: 2 11 -3)\n11 + -3 = 8 (left: 2 8)\n2 * 8 = 16 (left: 16)"}
{"idx": 11, "input": "2 5 8 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 2 5 19)\n5 + 19 = 24 (left: 2 24)\n24 - 2 = 22 (left: 22)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n13 - 4 = 9 (left: 12 9)\n12 + 9 = 21 (left: 21)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 4 = 0 (left: 3 13 0)\n3 + 0 = 3 (left: 13 3)\n13 * 3 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n3 + 4 = 7 (left: -9 7)\n7 - -9 = 16 (left: 7 16)\n7 + 16 = 23 (left: 23)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 4 = 16 (left: 3 13 16)\n13 + 16 = 29 (left: 3 29)\n29 - 3 = 26 (left: 26)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n4 - -1 = 5 (left: 13 5)\n13 * 5 = 65 (left: 65)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 13 = 52 (left: 3 4 52)\n4 + 52 = 56 (left: 3 56)\n56 / 3 = 18.0 (left: 18.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "3 + 4 = 7 (left: 4 13 7)\n13 - 7 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n-9 - 4 = -13 (left: 3 -13)\n3 - -13 = 16 (left: 16)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n13 - 1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n13 - 4 = 9 (left: 12 9)\n12 + 9 = 21 (left: 21)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "3 + 4 = 7 (left: 4 13 7)\n13 - 7 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n13 - -1 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n3 * 4 = 12 (left: 9 12)\n9 + 12 = 21 (left: 21)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n3 - -9 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 13 = 39 (left: 4 4 39)\n4 + 39 = 43 (left: 4 43)\n43 - 4 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 / 4 = 1.0 (left: 3 13 1.0)\n3 + 13 = 16 (left: 1.0 16)\n1.0 * 16 = 16.0 (left: 16.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n4 * 9 = 36 (left: 3 36)\n36 - 3 = 33 (left: 33)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 4 = 0 (left: 3 13 0)\n3 + 0 = 3 (left: 13 3)\n13 * 3 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 13 = 39 (left: 4 4 39)\n39 - 4 = 35 (left: 4 35)\n35 - 4 = 31 (left: 31)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n4 + -1 = 3 (left: 13 3)\n13 * 3 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 / 4 = 1.0 (left: 3 13 1.0)\n13 - 1.0 = 12.0 (left: 3 12.0)\n3 * 12.0 = 36.0 (left: 36.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "3 + 13 = 16 (left: 4 4 16)\n4 + 16 = 20 (left: 4 20)\n4 + 20 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "3 + 4 = 7 (left: 4 13 7)\n13 - 7 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n4 - -1 = 5 (left: 13 5)\n13 * 5 = 65 (left: 65)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n4 + 13 = 17 (left: 1 17)\n1 + 17 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n3 * 9 = 27 (left: 4 27)\n27 - 4 = 23 (left: 23)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 / 4 = 1.0 (left: 3 13 1.0)\n13 - 1.0 = 12.0 (left: 3 12.0)\n3 * 12.0 = 36.0 (left: 36.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 13 = 52 (left: 3 4 52)\n4 * 52 = 216 (left: 3 216)\n216 / 3 = 72.0 (left: 72.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n4 * 13 = 52 (left: 1 52)\n52 / 1 = 52.0 (left: 52.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n17 - 4 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n4 + 17 = 21 (left: 3 21)\n3 + 21 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 13 = 39 (left: 4 4 39)\n39 - 4 = 35 (left: 4 35)\n35 - 4 = 31 (left: 31)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "3 + 4 = 7 (left: 4 13 7)\n4 + 7 = 11 (left: 13 11)\n13 + 11 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 13 = -10 (left: 4 4 -10)\n4 - -10 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 13 = 52 (left: 3 4 52)\n52 / 4 = 13.0 (left: 3 13.0)\n3 + 13.0 = 16.0 (left: 16.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n17 - 4 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n4 + 13 = 17 (left: 1 17)\n17 - 1 = 16 (left: 16)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 13 = 39 (left: 4 4 39)\n4 / 4 = 1.0 (left: 39 1.0)\n39 - 1.0 = 38.0 (left: 38.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n13 - 4 = 9 (left: 12 9)\n12 + 9 = 21 (left: 21)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n4 + 9 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n4 * 13 = 52 (left: 1 52)\n52 / 1 = 52.0 (left: 52.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n4 + -1 = 3 (left: 13 3)\n13 * 3 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n3 - -9 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n3 + 4 = 7 (left: 17 7)\n17 + 7 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n13 + -1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n3 * 9 = 27 (left: 4 27)\n27 - 4 = 23 (left: 23)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n3 + 4 = 7 (left: 17 7)\n17 + 7 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n12 - 4 = 8 (left: 13 8)\n13 + 8 = 21 (left: 21)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 / 4 = 1.0 (left: 3 13 1.0)\n3 + 13 = 16 (left: 1.0 16)\n1.0 * 16 = 16.0 (left: 16.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 / 4 = 0.75 (left: 4 13 0.75)\n13 + 0.75 = 13.75 (left: 4 13.75)\n4 + 13.75 = 17.75 (left: 17.75)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n9 / 4 = 2.25 (left: 3 2.25)\n3 * 2.25 = 6.75 (left: 6.75)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 / 4 = 1.0 (left: 3 13 1.0)\n3 - 1.0 = 2.0 (left: 13 2.0)\n13 * 2.0 = 26.0 (left: 26.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n17 - 4 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 13 = 52 (left: 3 4 52)\n52 - 4 = 48 (left: 3 48)\n48 / 3 = 16.0 (left: 16.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n3 + 9 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 13 = 52 (left: 3 4 52)\n52 / 4 = 13.0 (left: 3 13.0)\n3 + 13.0 = 16.0 (left: 16.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n4 + 17 = 21 (left: 3 21)\n3 + 21 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n4 + -1 = 3 (left: 13 3)\n13 * 3 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n3 + 9 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n13 + -1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n3 + 9 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n4 + 13 = 17 (left: 1 17)\n1 + 17 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n3 - -9 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n3 - 4 = -1 (left: 17 -1)\n17 - -1 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n13 - 4 = 9 (left: 12 9)\n12 + 9 = 21 (left: 21)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 13 = 52 (left: 3 4 52)\n3 * 52 = 156 (left: 4 156)\n156 / 4 = 39.0 (left: 39.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n4 - -9 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n4 - 1 = 3 (left: 13 3)\n13 * 3 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 3 = 10 (left: 4 4 10)\n4 + 10 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n13 + 12 = 25 (left: 4 25)\n25 - 4 = 21 (left: 21)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n4 + 13 = 17 (left: 12 17)\n12 + 17 = 29 (left: 29)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 / 4 = 1.0 (left: 3 13 1.0)\n3 - 1.0 = 2.0 (left: 13 2.0)\n13 * 2.0 = 26.0 (left: 26.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "3 + 4 = 7 (left: 4 13 7)\n4 + 13 = 17 (left: 7 17)\n7 + 17 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n12 / 4 = 3.0 (left: 13 3.0)\n13 * 3.0 = 39.0 (left: 39.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n3 * 4 = 12 (left: 9 12)\n9 + 12 = 21 (left: 21)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 / 4 = 1.0 (left: 3 13 1.0)\n13 - 1.0 = 12.0 (left: 3 12.0)\n3 * 12.0 = 36.0 (left: 36.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 13 = 39 (left: 4 4 39)\n4 - 4 = 0 (left: 39 0)\n39 - 0 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n3 - -9 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 4 = 0 (left: 3 13 0)\n3 + 0 = 3 (left: 13 3)\n13 * 3 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n13 - -1 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 4 = 0 (left: 3 13 0)\n3 + 13 = 16 (left: 0 16)\n0 + 16 = 16 (left: 16)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n17 - 4 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n4 - 3 = 1 (left: 17 1)\n17 + 1 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 4 = 16 (left: 3 13 16)\n16 - 3 = 13 (left: 13 13)\n13 + 13 = 26 (left: 26)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n4 + 17 = 21 (left: 3 21)\n3 + 21 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n13 + -1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n3 + 9 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 13 = 39 (left: 4 4 39)\n39 - 4 = 35 (left: 4 35)\n35 - 4 = 31 (left: 31)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n17 - 4 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 4 = 16 (left: 3 13 16)\n13 - 3 = 10 (left: 16 10)\n16 + 10 = 26 (left: 26)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 13 = 39 (left: 4 4 39)\n39 - 4 = 35 (left: 4 35)\n35 - 4 = 31 (left: 31)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n13 + -1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 / 4 = 1.0 (left: 3 13 1.0)\n13 - 1.0 = 12.0 (left: 3 12.0)\n3 * 12.0 = 36.0 (left: 36.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n4 + 12 = 16 (left: 13 16)\n13 + 16 = 29 (left: 29)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n4 - -1 = 5 (left: 13 5)\n13 * 5 = 65 (left: 65)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n13 - 1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n4 + 9 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 13 = 52 (left: 3 4 52)\n4 * 52 = 216 (left: 3 216)\n216 / 3 = 72.0 (left: 72.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 / 4 = 1.0 (left: 3 13 1.0)\n13 - 1.0 = 12.0 (left: 3 12.0)\n3 * 12.0 = 36.0 (left: 36.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 / 4 = 1.0 (left: 3 13 1.0)\n3 + 13 = 16 (left: 1.0 16)\n1.0 * 16 = 16.0 (left: 16.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 4 = 16 (left: 3 13 16)\n3 - 16 = -13 (left: 13 -13)\n13 - -13 = 26 (left: 26)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 + 13 = 16 (left: 4 4 16)\n4 / 4 = 1.0 (left: 16 1.0)\n16 * 1.0 = 16.0 (left: 16.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 13 = -10 (left: 4 4 -10)\n4 - -10 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n4 - 13 = -9 (left: -1 -9)\n-1 * -9 = 9 (left: 9)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "3 + 4 = 7 (left: 4 13 7)\n4 + 7 = 11 (left: 13 11)\n13 + 11 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n13 - 4 = 9 (left: 12 9)\n12 * 9 = 108 (left: 108)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n17 - 4 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n4 - -1 = 5 (left: 13 5)\n13 * 5 = 65 (left: 65)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n3 * 9 = 27 (left: 4 27)\n27 - 4 = 23 (left: 23)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n13 - 1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 13 = 52 (left: 3 4 52)\n3 + 52 = 55 (left: 4 55)\n55 - 4 = 51 (left: 55)\n55 > 4 (left: 55)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "3 + 13 = 16 (left: 4 4 16)\n4 + 4 = 8 (left: 16 8)\n16 + 8 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n13 - 1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n3 * -9 = -27 (left: 4 -27)\n4 - -27 = 31 (left: 31)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n4 - 12 = -8 (left: 13 -8)\n13 - -8 = 21 (left: 21)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 4 = 0 (left: 3 13 0)\n13 + 0 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 13 = 39 (left: 4 4 39)\n39 / 4 = 9.75 (left: 4 9.75)\n4 * 9.75 = 39.0 (left: 39.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 + 13 = 16 (left: 4 4 16)\n16 - 4 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n13 - 1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n13 + -1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 13 = 52 (left: 3 4 52)\n3 * 52 = 156 (left: 4 156)\n156 / 4 = 39.0 (left: 39.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n13 - 1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n3 * 9 = 27 (left: 4 27)\n27 - 4 = 23 (left: 23)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n4 + 17 = 21 (left: 3 21)\n3 + 21 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n4 - -9 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "3 + 4 = 7 (left: 4 13 7)\n4 + 13 = 17 (left: 7 17)\n7 + 17 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n3 - 4 = -1 (left: -9 -1)\n-9 * -1 = 9 (left: 9)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n13 - 4 = 9 (left: 12 9)\n12 + 9 = 21 (left: 21)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "3 + 4 = 7 (left: 4 13 7)\n4 + 13 = 17 (left: 7 17)\n7 + 17 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n13 - 1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 13 = -10 (left: 4 4 -10)\n4 + 4 = 8 (left: -10 8)\n8 - -10 = 18 (left: 8 18)\n8 + 18 = 26 (left: 26)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 4 = 0 (left: 3 13 0)\n13 + 0 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 / 4 = 1.0 (left: 3 13 1.0)\n3 + 1.0 = 4.0 (left: 13 4.0)\n13 * 4.0 = 52.0 (left: 52.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n3 - -9 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n4 + 13 = 17 (left: 1 17)\n1 + 17 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n13 - -1 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n4 - -1 = 5 (left: 13 5)\n13 * 5 = 65 (left: 65)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n4 + -1 = 3 (left: 13 3)\n13 * 3 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n4 + 13 = 17 (left: -1 17)\n17 - -1 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n13 - 1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n13 - -1 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 13 = 39 (left: 4 4 39)\n4 + 4 = 8 (left: 39 8)\n39 - 8 = 31 (left: 31)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 4 = 8 (left: 3 13 8)\n3 - 13 = -10 (left: 8 -10)\n8 - -10 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n3 - -9 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 13 = 39 (left: 4 4 39)\n39 - 4 = 35 (left: 4 35)\n35 - 4 = 31 (left: 31)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 13 = -10 (left: 4 4 -10)\n-10 - 4 = -14 (left: 4 -14)\n4 - -14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n3 - -9 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n3 * 4 = 12 (left: -9 12)\n12 - -9 = 21 (left: 21)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n3 * 4 = 12 (left: -9 12)\n12 - -9 = 21 (left: 21)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 4 = 8 (left: 3 13 8)\n13 - 8 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n13 - -1 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n4 * 13 = 52 (left: 1 52)\n52 / 1 = 52.0 (left: 52.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n4 + 17 = 21 (left: 3 21)\n3 + 21 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n17 - 4 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n13 - 1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n3 - 17 = -14 (left: 4 -14)\n4 - -14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 4 = 8 (left: 3 13 8)\n3 - 8 = -5 (left: 13 -5)\n13 - -5 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n4 - 13 = -9 (left: 12 -9)\n12 - -9 = 21 (left: 21)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n13 + 12 = 25 (left: 4 25)\n25 - 4 = 21 (left: 21)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 + 13 = 16 (left: 4 4 16)\n4 / 4 = 1.0 (left: 16 1.0)\n16 * 1.0 = 16.0 (left: 16.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n17 - 4 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n4 * -1 = -4 (left: 13 -4)\n13 - -4 = 17 (left: 17)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n9 - 3 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n13 - 4 = 9 (left: 12 9)\n12 + 9 = 21 (left: 21)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n4 + 13 = 17 (left: 12 17)\n12 + 17 = 29 (left: 29)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "3 + 13 = 16 (left: 4 4 16)\n4 + 16 = 20 (left: 4 20)\n4 + 20 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n17 - 4 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n4 - 12 = -8 (left: 13 -8)\n13 - -8 = 21 (left: 21)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 13 = 39 (left: 4 4 39)\n39 / 4 = 9.75 (left: 4 9.75)\n4 * 9.75 = 39.0 (left: 39.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 4 = 16 (left: 3 13 16)\n13 + 16 = 29 (left: 3 29)\n29 - 3 = 26 (left: 26)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n17 - 4 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 13 = -10 (left: 4 4 -10)\n4 - -10 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n4 + 9 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n9 - 4 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 4 = 0 (left: 3 13 0)\n3 + 0 = 3 (left: 13 3)\n13 * 3 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n13 - 4 = 9 (left: 12 9)\n12 + 9 = 21 (left: 21)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 / 4 = 1.0 (left: 3 13 1.0)\n13 - 1.0 = 12.0 (left: 3 12.0)\n3 * 12.0 = 36.0 (left: 36.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n3 - 4 = -1 (left: 17 -1)\n17 - -1 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 13 = -10 (left: 4 4 -10)\n4 * -10 = -40 (left: 4 -40)\n4 - -40 = 44 (left: 44)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n3 - 17 = -14 (left: 4 -14)\n4 - -14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n4 * 13 = 52 (left: 1 52)\n52 / 1 = 52.0 (left: 52.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n4 + 9 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "3 + 13 = 16 (left: 4 4 16)\n4 + 16 = 20 (left: 4 20)\n4 + 20 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n4 + 13 = 17 (left: 12 17)\n12 + 17 = 29 (left: 29)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n4 + 9 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 4 = 16 (left: 3 13 16)\n3 + 16 = 19 (left: 13 19)\n13 + 19 = 32 (left: 32)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 4 = 8 (left: 3 13 8)\n3 - 8 = -5 (left: 13 -5)\n13 - -5 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n9 - 4 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n4 + -1 = 3 (left: 13 3)\n13 * 3 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 13 = -10 (left: 4 4 -10)\n4 - -10 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n4 + 13 = 17 (left: 1 17)\n1 + 17 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n3 * -9 = -27 (left: 4 -27)\n4 - -27 = 31 (left: 31)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 4 = 0 (left: 3 13 0)\n13 + 0 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n4 + 1 = 5 (left: 13 5)\n13 + 5 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n13 + -1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n3 * 9 = 27 (left: 4 27)\n27 - 4 = 23 (left: 23)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 13 = 52 (left: 3 4 52)\n52 / 4 = 13.0 (left: 3 13.0)\n3 + 13.0 = 16.0 (left: 16.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "3 + 13 = 16 (left: 4 4 16)\n4 + 16 = 20 (left: 4 20)\n4 + 20 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n13 - 1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n3 - 17 = -14 (left: 4 -14)\n4 - -14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 13 = 39 (left: 4 4 39)\n4 * 39 = 156 (left: 4 156)\n156 / 4 = 39.0 (left: 39.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 13 = 52 (left: 3 4 52)\n3 + 52 = 55 (left: 4 55)\n55 - 4 = 51 (left: 55)\n55 = 51 - 4 (left: 51)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n4 + 13 = 17 (left: 1 17)\n1 + 17 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "4 + 4 = 8 (left: 3 13 8)\n13 + 8 = 21 (left: 3 21)\n3 + 21 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n4 * 13 = 52 (left: 1 52)\n52 / 1 = 52.0 (left: 52.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n17 - 4 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n17 - 4 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "3 + 4 = 7 (left: 4 13 7)\n13 + 7 = 20 (left: 4 20)\n4 + 20 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "3 + 4 = 7 (left: 4 13 7)\n4 + 13 = 17 (left: 7 17)\n7 + 17 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n13 - 1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 / 4 = 1.0 (left: 3 13 1.0)\n3 - 1.0 = 2.0 (left: 13 2.0)\n13 * 2.0 = 26.0 (left: 26.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n3 + 4 = 7 (left: 9 7)\n9 + 7 = 16 (left: 16)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 / 4 = 0.75 (left: 4 13 0.75)\n13 - 0.75 = 12.25 (left: 4 12.25)\n4 + 12.25 = 16.25 (left: 16.25)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n3 - 4 = -1 (left: 17 -1)\n17 - -1 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n3 * 4 = 12 (left: 9 12)\n9 + 12 = 21 (left: 21)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "3 + 13 = 16 (left: 4 4 16)\n4 + 16 = 20 (left: 4 20)\n4 + 20 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 4 = 16 (left: 3 13 16)\n3 + 16 = 19 (left: 13 19)\n13 + 19 = 32 (left: 32)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 4 = 0 (left: 3 13 0)\n13 + 0 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n4 + 13 = 17 (left: -1 17)\n17 - -1 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 13 = 52 (left: 3 4 52)\n52 / 4 = 13.0 (left: 3 13.0)\n3 + 13.0 = 16.0 (left: 16.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n13 - -1 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n3 * 9 = 27 (left: 4 27)\n27 - 4 = 23 (left: 23)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n13 - 1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n4 * 13 = 52 (left: 1 52)\n52 / 1 = 52.0 (left: 52.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 / 4 = 1.0 (left: 3 13 1.0)\n13 - 1.0 = 12.0 (left: 3 12.0)\n3 * 12.0 = 36.0 (left: 36.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 4 = 16 (left: 3 13 16)\n13 - 3 = 10 (left: 16 10)\n16 + 10 = 26 (left: 26)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 13 = -10 (left: 4 4 -10)\n4 - -10 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n3 + 4 = 7 (left: -9 7)\n7 - -9 = 16 (left: 16)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 13 = -10 (left: 4 4 -10)\n4 - -10 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n4 + 9 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "3 + 13 = 16 (left: 4 4 16)\n4 + 16 = 20 (left: 4 20)\n4 + 20 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n4 - -9 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 4 = 0 (left: 3 13 0)\n13 + 0 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "3 + 4 = 7 (left: 4 13 7)\n13 + 7 = 20 (left: 4 20)\n4 + 20 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 + 4 = 7 (left: 4 13 7)\n7 - 4 = 3 (left: 13 3)\n13 * 3 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n3 - 4 = -1 (left: -9 -1)\n-9 * -1 = 9 (left: 9)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n3 - -9 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n3 * -9 = -27 (left: 4 -27)\n4 - -27 = 31 (left: 31)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 + 13 = 16 (left: 4 4 16)\n4 - 4 = 0 (left: 16 0)\n16 + 0 = 16 (left: 16)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n4 + 13 = 17 (left: -1 17)\n17 - -1 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 13 = 52 (left: 3 4 52)\n52 / 4 = 13.0 (left: 3 13.0)\n3 + 13.0 = 16.0 (left: 16.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "3 + 4 = 7 (left: 4 13 7)\n4 + 13 = 17 (left: 7 17)\n7 + 17 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n4 * 9 = 36 (left: 3 36)\n36 - 3 = 33 (left: 33)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 / 4 = 1.0 (left: 3 13 1.0)\n13 - 1.0 = 12.0 (left: 3 12.0)\n3 * 12.0 = 36.0 (left: 36.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n13 - 4 = 9 (left: 12 9)\n12 + 9 = 21 (left: 21)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n17 - 4 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 13 = 52 (left: 3 4 52)\n52 / 4 = 13.0 (left: 3 13.0)\n3 + 13.0 = 16.0 (left: 16.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n4 - 12 = -8 (left: 13 -8)\n13 - -8 = 21 (left: 21)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n3 - 17 = -14 (left: 4 -14)\n4 - -14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 / 4 = 1.0 (left: 3 13 1.0)\n3 + 13 = 16 (left: 1.0 16)\n1.0 * 16 = 16.0 (left: 16.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 / 4 = 1.0 (left: 3 13 1.0)\n13 - 1.0 = 12.0 (left: 3 12.0)\n3 * 12.0 = 36.0 (left: 36.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 4 = 0 (left: 3 13 0)\n13 + 0 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 / 4 = 3.25 (left: 3 4 3.25)\n3 + 3.25 = 6.25 (left: 4 6.25)\n4 * 6.25 = 25.0 (left: 25.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 + 13 = 16 (left: 4 4 16)\n16 - 4 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n13 + -1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n9 - 4 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n4 + 13 = 17 (left: 1 17)\n1 + 17 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 4 = 0 (left: 3 13 0)\n3 + 0 = 3 (left: 13 3)\n13 * 3 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 4 = 8 (left: 3 13 8)\n3 - 8 = -5 (left: 13 -5)\n13 - -5 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n13 - 1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 / 4 = 1.0 (left: 3 13 1.0)\n13 - 1.0 = 12.0 (left: 3 12.0)\n3 * 12.0 = 36.0 (left: 36.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "4 + 4 = 8 (left: 3 13 8)\n13 + 8 = 21 (left: 3 21)\n3 + 21 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n3 - 17 = -14 (left: 4 -14)\n4 - -14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n13 + -1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n13 - -1 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n4 - -1 = 5 (left: 13 5)\n13 * 5 = 65 (left: 65)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n4 + 1 = 5 (left: 13 5)\n13 + 5 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 / 4 = 1.0 (left: 3 13 1.0)\n3 + 13 = 16 (left: 1.0 16)\n1.0 * 16 = 16.0 (left: 16.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n4 + 13 = 17 (left: 12 17)\n12 + 17 = 29 (left: 29)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n3 - 17 = -14 (left: 4 -14)\n4 - -14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n13 - 1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n17 - 4 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n13 - 4 = 9 (left: 12 9)\n12 * 9 = 108 (left: 108)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "3 + 13 = 16 (left: 4 4 16)\n4 + 16 = 20 (left: 4 20)\n4 + 20 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n13 - 1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 13 = 39 (left: 4 4 39)\n39 - 4 = 35 (left: 4 35)\n35 - 4 = 31 (left: 31)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 13 = -10 (left: 4 4 -10)\n4 - -10 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n4 + 9 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n3 - 17 = -14 (left: 4 -14)\n4 - -14 = 18 (left: 18)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n3 - -9 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 100, "generate_data": "4 + 13 = 17 (left: 3 4 17)\n4 + 17 = 21 (left: 3 21)\n3 + 21 = 24 (left: 24)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 / 4 = 1.0 (left: 3 13 1.0)\n3 + 13 = 16 (left: 1.0 16)\n1.0 * 16 = 16.0 (left: 16.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 4 = -1 (left: 4 13 -1)\n4 - -1 = 5 (left: 13 5)\n13 * 5 = 65 (left: 65)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n9 / 4 = 2.25 (left: 3 2.25)\n3 * 2.25 = 6.75 (left: 6.75)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n13 - 4 = 9 (left: 12 9)\n12 + 9 = 21 (left: 21)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n13 - 4 = 9 (left: 12 9)\n12 + 9 = 21 (left: 21)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n4 * 13 = 52 (left: 1 52)\n52 - 1 = 51 (left: 51)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 - 13 = -10 (left: 4 4 -10)\n4 * -10 = -40 (left: 4 -40)\n4 - -40 = 44 (left: 44)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 13 = 39 (left: 4 4 39)\n4 + 4 = 8 (left: 39 8)\n39 - 8 = 31 (left: 31)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 13 = 39 (left: 4 4 39)\n39 / 4 = 9.75 (left: 4 9.75)\n4 * 9.75 = 39.0 (left: 39.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 + 13 = 16 (left: 4 4 16)\n4 / 4 = 1.0 (left: 16 1.0)\n16 * 1.0 = 16.0 (left: 16.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 * 4 = 12 (left: 4 13 12)\n4 + 12 = 16 (left: 13 16)\n13 + 16 = 29 (left: 29)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 * 4 = 16 (left: 3 13 16)\n3 + 16 = 19 (left: 13 19)\n13 + 19 = 32 (left: 32)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n4 * 13 = 52 (left: 1 52)\n52 / 1 = 52.0 (left: 52.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "3 + 13 = 16 (left: 4 4 16)\n4 * 16 = 64 (left: 4 64)\n64 / 4 = 16.0 (left: 16.0)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "13 - 4 = 9 (left: 3 4 9)\n9 - 4 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 3 = 1 (left: 4 13 1)\n13 - 1 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n3 - -9 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 - 13 = -9 (left: 3 4 -9)\n4 - -9 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 12, "input": "3 4 4 13", "reward": 0.001, "generate_data": "4 / 4 = 1.0 (left: 3 13 1.0)\n13 - 1.0 = 12.0 (left: 3 12.0)\n3 * 12.0 = 36.0 (left: 36.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 - -3 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 * 9 = 72 (left: 6 7 72)\n72 / 7 = 10.0 (left: 6 10.0)\n6 + 10.0 = 16.0 (left: 16.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 - -3 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n9 + -1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 * 8 = 48 (left: 7 9 48)\n7 + 9 = 16 (left: 48 16)\n48 - 16 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 8 = 14 (left: 7 9 14)\n9 - 7 = 2 (left: 14 2)\n14 * 2 = 28 (left: 28)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 8 = -1 (left: 6 9 -1)\n9 + -1 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n8 + -1 = 7 (left: 9 7)\n9 + 7 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n16 - 8 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n15 - 9 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 + -3 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 6 = 1 (left: 8 9 1)\n9 - 1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 + -2 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 - -3 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 100, "generate_data": "9 - 7 = 2 (left: 6 8 2)\n6 / 2 = 3.0 (left: 8 3.0)\n8 * 3.0 = 24.0 (left: 24.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 8 = -1 (left: 6 9 -1)\n9 + -1 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n8 + -1 = 7 (left: 9 7)\n9 * 7 = 63 (left: 63)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 + 9 = 17 (left: 6 7 17)\n7 + 17 = 24 (left: 6 24)\n6 + 24 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 8 = 14 (left: 7 9 14)\n14 - 9 = 5 (left: 7 5)\n7 + 5 = 12 (left: 12)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n9 + 15 = 24 (left: 6 24)\n6 + 24 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 7 = 1 (left: 6 9 1)\n9 - 1 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n-2 - 7 = -9 (left: 9 -9)\n9 - -9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 * 8 = 48 (left: 7 9 48)\n7 + 9 = 16 (left: 48 16)\n48 - 16 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 - -3 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 8 = -1 (left: 6 9 -1)\n6 - -1 = 7 (left: 9 7)\n9 * 7 = 63 (left: 63)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 9 = -2 (left: 6 8 -2)\n8 - -2 = 10 (left: 6 10)\n6 + 10 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n8 + -1 = 7 (left: 9 7)\n9 + 7 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 - -3 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 6 = 2 (left: 7 9 2)\n7 + 2 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 7 = 1 (left: 6 9 1)\n9 - 1 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 - -2 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 + -2 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 - -3 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 / 7 = 0.86 (left: 8 9 0.86)\n9 / 0.86 = 11.0 (left: 8 11.0)\n8 + 11.0 = 19.0 (left: 19.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n9 + 15 = 24 (left: 6 24)\n6 + 24 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n16 - 8 = 8 (left: 6 8)\n6 + 8 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n8 - -1 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 - -2 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 8 = 14 (left: 7 9 14)\n7 + 14 = 21 (left: 9 21)\n9 + 21 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n9 + -1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 7 8 15)\n15 - 7 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n-1 - 9 = -10 (left: 8 -10)\n8 - -10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 + -3 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "9 - 7 = 2 (left: 6 8 2)\n8 * 2 = 16 (left: 6 16)\n6 + 16 = 22 (left: 22)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 8 = -1 (left: 6 9 -1)\n9 + -1 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 8 = -1 (left: 6 9 -1)\n6 - -1 = 7 (left: 9 7)\n9 * 7 = 63 (left: 63)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 6 = 2 (left: 7 9 2)\n7 + 2 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 6 = 1 (left: 8 9 1)\n9 - 1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "9 - 6 = 3 (left: 7 8 3)\n7 + 3 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 8 = -1 (left: 6 9 -1)\n9 - -1 = 10 (left: 6 10)\n6 + 10 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n6 / 8 = 0.75 (left: 16 0.75)\n16 / 0.75 = 20.0 (left: 20.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n16 - 8 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 * 9 = 63 (left: 6 8 63)\n8 / 63 = 0.13 (left: 6 0.13)\n6 / 0.13 = 46.15 (left: 46.15)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 7 = 13 (left: 8 9 13)\n13 - 9 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 7 8 15)\n15 - 7 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 - -3 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n15 - 9 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n-1 - 9 = -10 (left: 8 -10)\n8 - -10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n9 + -1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 7 = 1 (left: 6 9 1)\n9 - 1 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 + 9 = 17 (left: 6 7 17)\n7 - 6 = 1 (left: 17 1)\n17 + 1 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 6 = 2 (left: 7 9 2)\n7 + 9 = 16 (left: 2 16)\n2 * 16 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n6 - 16 = -10 (left: 8 -10)\n8 - -10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 7 8 15)\n15 - 7 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n6 - 16 = -10 (left: 8 -10)\n8 - -10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 - -3 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 7 = 1 (left: 6 9 1)\n9 - 1 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n8 + 16 = 24 (left: 6 24)\n6 * 24 = 144 (left: 144)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 7 8 15)\n7 + 15 = 22 (left: 8 22)\n8 + 22 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 7 8 15)\n15 - 7 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 8 = -1 (left: 6 9 -1)\n9 + -1 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 100, "generate_data": "9 - 7 = 2 (left: 6 8 2)\n8 / 2 = 4.0 (left: 6 4.0)\n6 * 4.0 = 24.0 (left: 24.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n15 - 9 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n9 + 15 = 24 (left: 6 24)\n6 * 24 = 144 (left: 144)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 7 8 15)\n15 - 8 = 7 (left: 7 7)\n7 + 7 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 + -3 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 9 = -1 (left: 6 7 -1)\n6 - -1 = 7 (left: 7 7)\n7 + 7 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "9 - 7 = 2 (left: 6 8 2)\n8 * 2 = 16 (left: 6 16)\n6 + 16 = 22 (left: 22)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n15 - 9 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "9 - 7 = 2 (left: 6 8 2)\n8 * 2 = 16 (left: 6 16)\n6 + 16 = 22 (left: 22)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 8 = 14 (left: 7 9 14)\n9 + 14 = 23 (left: 7 23)\n7 + 23 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 7 = 1 (left: 6 9 1)\n9 - 1 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n15 - 6 = 9 (left: 9 9)\n9 + 9 = 18 (left: 9)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 9 = -2 (left: 6 8 -2)\n8 - -2 = 10 (left: 6 10)\n6 + 10 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 9 = -1 (left: 6 7 -1)\n6 - -1 = 7 (left: 7 7)\n7 + 7 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 * 8 = 48 (left: 7 9 48)\n7 + 48 = 55 (left: 9 55)\n55 - 9 = 46 (left: 48 46)\n48 - 46 = 2 (left: 2)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 9 = -2 (left: 6 8 -2)\n6 + -2 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 8 = -1 (left: 6 9 -1)\n9 + -1 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 6 = 2 (left: 7 9 2)\n7 + 2 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 + -3 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n6 - 15 = -9 (left: 9 -9)\n9 - -9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 7 = 13 (left: 8 9 13)\n13 - 9 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 6 = 1 (left: 8 9 1)\n9 - 1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n9 + -2 = 7 (left: 7 7)\n7 + 7 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "9 - 7 = 2 (left: 6 8 2)\n8 * 2 = 16 (left: 6 16)\n6 + 16 = 22 (left: 22)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "9 - 6 = 3 (left: 7 8 3)\n7 + 3 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n9 + 15 = 24 (left: 6 24)\n6 + 24 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "9 - 6 = 3 (left: 7 8 3)\n7 + 8 = 15 (left: 3 15)\n3 + 15 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n15 - 9 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 + 9 = 17 (left: 6 7 17)\n6 - 17 = -11 (left: 7 -11)\n7 - -11 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 + -3 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "9 - 7 = 2 (left: 6 8 2)\n6 - 2 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n8 - -1 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n8 - -1 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n15 / 6 = 2.5 (left: 9 2.5)\n9 / 2.5 = 3.6 (left: 3.6)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 + -2 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 100, "generate_data": "9 - 7 = 2 (left: 6 8 2)\n8 / 2 = 4.0 (left: 6 4.0)\n6 * 4.0 = 24.0 (left: 24.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n16 - 8 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 8 = 14 (left: 7 9 14)\n14 - 7 = 7 (left: 9 7)\n9 + 7 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 * 9 = 54 (left: 7 8 54)\n54 / 8 = 6.75 (left: 7 6.75)\n7 * 6.75 = 47.5 (left: 47.5)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n8 + -1 = 7 (left: 9 7)\n9 + 7 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 9 = -2 (left: 6 8 -2)\n6 + -2 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 100, "generate_data": "9 - 7 = 2 (left: 6 8 2)\n6 * 8 = 48 (left: 2 48)\n48 / 2 = 24.0 (left: 24.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 7 8 15)\n15 - 7 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 + 9 = 17 (left: 6 7 17)\n6 + 7 = 13 (left: 17 13)\n17 + 13 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 - -3 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n8 + -1 = 7 (left: 9 7)\n9 + 7 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 8 = 14 (left: 7 9 14)\n9 + 14 = 23 (left: 7 23)\n7 + 23 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 8 = 14 (left: 7 9 14)\n7 + 9 = 16 (left: 14 16)\n14 + 16 = 29 (left: 29)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n15 - 9 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 - -3 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 7 8 15)\n15 - 7 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 7 = 1 (left: 6 9 1)\n6 * 1 = 6 (left: 9 6)\n9 * 6 = 54 (left: 54)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 6 = 2 (left: 7 9 2)\n7 + 2 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 7 = 1 (left: 6 9 1)\n9 - 1 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "9 - 6 = 3 (left: 7 8 3)\n7 - 3 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 + -3 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n15 - 9 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 - -3 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 9 = -1 (left: 6 7 -1)\n6 + 7 = 13 (left: -1 13)\n13 - -1 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 6 = 1 (left: 8 9 1)\n8 * 9 = 72 (left: 1 72)\n72 / 1 = 72.0 (left: 72.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 9 = -1 (left: 6 7 -1)\n6 + -1 = 5 (left: 7 5)\n7 + 5 = 12 (left: 12)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 + -2 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 + -3 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 8 = 14 (left: 7 9 14)\n7 + 14 = 21 (left: 9 21)\n9 + 21 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n9 + -1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n16 - 8 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 7 = 1 (left: 6 9 1)\n9 - 1 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 - -2 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 100, "generate_data": "9 - 7 = 2 (left: 6 8 2)\n8 / 2 = 4.0 (left: 6 4.0)\n6 * 4.0 = 24.0 (left: 24.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 8 = -1 (left: 6 9 -1)\n6 + -1 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 8 = 14 (left: 7 9 14)\n9 + 14 = 23 (left: 7 23)\n7 + 23 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 + -2 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 6 = 2 (left: 7 9 2)\n7 + 9 = 16 (left: 2 16)\n2 * 16 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n8 + -1 = 7 (left: 9 7)\n9 + 7 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 + -2 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 8 = -1 (left: 6 9 -1)\n6 + -1 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 + 9 = 17 (left: 6 7 17)\n6 - 7 = -1 (left: 17 -1)\n17 - -1 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 + 9 = 17 (left: 6 7 17)\n7 + 17 = 24 (left: 6 24)\n6 + 24 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n16 - 8 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 7 8 15)\n7 + 15 = 22 (left: 8 22)\n8 + 22 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 8 = 14 (left: 7 9 14)\n9 + 14 = 23 (left: 7 23)\n7 + 23 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "9 - 6 = 3 (left: 7 8 3)\n7 + 8 = 15 (left: 3 15)\n3 + 15 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n9 + -1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 7 = 13 (left: 8 9 13)\n9 - 8 = 1 (left: 13 1)\n13 * 1 = 13 (left: 13)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 + 9 = 17 (left: 6 7 17)\n6 + 7 = 13 (left: 17 13)\n17 + 13 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 100, "generate_data": "9 - 7 = 2 (left: 6 8 2)\n6 * 8 = 48 (left: 2 48)\n48 / 2 = 24.0 (left: 24.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 7 8 15)\n8 + 15 = 23 (left: 7 23)\n7 + 23 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 * 7 = 42 (left: 8 9 42)\n9 - 42 = -33 (left: 8 -33)\n8 - -33 = 41 (left: 41)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 8 = 14 (left: 7 9 14)\n9 + 14 = 23 (left: 7 23)\n7 + 23 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n8 * 16 = 128 (left: 6 128)\n128 / 6 = 21.0 (left: 21.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 9 = -2 (left: 6 8 -2)\n6 * -2 = -12 (left: 8 -12)\n8 - -12 = 20 (left: 20)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n9 - -1 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 100, "generate_data": "6 * 8 = 48 (left: 7 9 48)\n9 - 7 = 2 (left: 48 2)\n48 / 2 = 24.0 (left: 24.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n8 + -1 = 7 (left: 9 7)\n9 + 7 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n8 + -1 = 7 (left: 9 7)\n9 + 7 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "9 - 7 = 2 (left: 6 8 2)\n6 - 2 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 6 = 2 (left: 7 9 2)\n9 - 2 = 7 (left: 7 7)\n7 + 7 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n9 + -1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n9 + -1 = 8 (left: 8)\n8 (left: 8)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 6 = 2 (left: 7 9 2)\n9 - 2 = 7 (left: 7 7)\n7 + 7 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n15 - 6 = 9 (left: 9 9)\n9 + 9 = 18 (left: 9)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "9 - 7 = 2 (left: 6 8 2)\n6 + 8 = 14 (left: 2 14)\n2 * 14 = 28 (left: 28)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 7 8 15)\n15 - 7 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 7 8 15)\n15 - 7 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n9 + -2 = 7 (left: 7 7)\n7 + 7 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n8 / 2 = 4.0 (left: 6 4.0)\n6 * 4.0 = 24.0 (left: 24.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 6 = 1 (left: 8 9 1)\n8 + 1 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 7 8 15)\n15 - 7 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 6 = 2 (left: 7 9 2)\n7 + 2 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 + -2 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n8 - -1 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n9 + 15 = 24 (left: 6 24)\n6 + 24 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n15 - 9 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n8 - -1 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 9 = -2 (left: 6 8 -2)\n8 + -2 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 + -3 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 7 = 13 (left: 8 9 13)\n13 - 9 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 + 9 = 17 (left: 6 7 17)\n7 + 17 = 24 (left: 6 24)\n6 + 24 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 8 = 14 (left: 7 9 14)\n14 - 9 = 5 (left: 7 5)\n7 + 5 = 12 (left: 12)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 6 = 2 (left: 7 9 2)\n7 + 2 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 6 = 2 (left: 7 9 2)\n7 + 9 = 16 (left: 2 16)\n2 * 16 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n15 - 9 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 7 = 13 (left: 8 9 13)\n13 - 9 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 8 = -1 (left: 6 9 -1)\n6 + -1 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "9 - 8 = 1 (left: 6 7 1)\n6 * 7 = 42 (left: 1 42)\n42 / 1 = 42.0 (left: 42.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 7 8 15)\n15 - 7 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n15 - 9 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 - -2 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n8 + 16 = 24 (left: 6 24)\n6 + 24 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 + -2 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 - -2 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n15 - 9 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n16 - 8 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 * 8 = 48 (left: 7 9 48)\n9 / 7 = 1.27 (left: 48 1.27)\n48 / 1.27 = 38.2 (left: 38.2)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n6 - 8 = -2 (left: 16 -2)\n16 - -2 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n9 + 15 = 24 (left: 6 24)\n6 + 24 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 - -2 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n9 - 6 = 3 (left: 15 3)\n15 + 3 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n8 - 6 = 2 (left: 16 2)\n16 + 2 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 + -2 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n8 - -1 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n8 + 16 = 24 (left: 6 24)\n6 + 24 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 + -2 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n8 + 16 = 24 (left: 6 24)\n6 + 24 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 8 = -1 (left: 6 9 -1)\n6 + -1 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 * 9 = 54 (left: 7 8 54)\n54 / 8 = 6.75 (left: 7 6.75)\n7 * 6.75 = 45.0 (left: 45.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n15 / 9 = 1.666666666666667 (left: 6 1.666666666666667)\n6 * 1.666666666666667 = 10.0 (left: 10.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n16 - 8 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n8 + -1 = 7 (left: 9 7)\n9 + 7 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 + -2 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 8 = 14 (left: 7 9 14)\n9 + 14 = 23 (left: 7 23)\n7 + 23 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 7 = 13 (left: 8 9 13)\n13 - 9 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 + -2 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n9 - -2 = 11 (left: 7 11)\n7 + 11 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 8 = 14 (left: 7 9 14)\n14 - 7 = 7 (left: 9 7)\n9 + 7 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 + -3 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 + -3 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 + 9 = 17 (left: 6 7 17)\n6 - 7 = -1 (left: 17 -1)\n17 - -1 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 7 8 15)\n7 + 8 = 15 (left: 15 15)\n15 + 15 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 9 = -2 (left: 6 8 -2)\n8 + -2 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n9 + 15 = 24 (left: 6 24)\n6 + 24 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n8 + -1 = 7 (left: 9 7)\n9 * 7 = 63 (left: 63)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n-1 - 8 = -9 (left: 9 -9)\n9 - -9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n9 + -1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 + -3 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "9 - 6 = 3 (left: 7 8 3)\n7 - 3 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 * 7 = 42 (left: 8 9 42)\n9 / 42 = 0.21 (left: 8 0.21)\n8 / 0.21 = 40.0 (left: 40.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 + -2 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 7 = 13 (left: 8 9 13)\n9 + 13 = 22 (left: 8 22)\n8 + 22 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n8 - 6 = 2 (left: 16 2)\n16 + 2 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 6 = 1 (left: 8 9 1)\n9 + 1 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 6 = 1 (left: 8 9 1)\n8 + 1 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n-1 - 8 = -9 (left: 9 -9)\n9 - -9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n16 - 8 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 + -3 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n6 - 9 = -3 (left: 15 -3)\n15 - -3 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 + -2 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 - -3 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 + -2 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 - -2 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 + 9 = 17 (left: 6 7 17)\n6 + 17 = 23 (left: 7 23)\n7 + 23 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n15 - 9 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 + 9 = 17 (left: 6 7 17)\n17 - 6 = 11 (left: 7 11)\n7 + 11 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 - -2 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 + 9 = 17 (left: 6 7 17)\n7 + 17 = 24 (left: 6 24)\n6 * 24 = 144 (left: 144)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n16 / 8 = 2.0 (left: 6 2.0)\n6 * 2.0 = 12.0 (left: 12.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 8 = -2 (left: 7 9 -2)\n7 + -2 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 8 = 14 (left: 7 9 14)\n7 + 9 = 16 (left: 14 16)\n14 + 16 = 29 (left: 29)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 8 = 14 (left: 7 9 14)\n9 + 14 = 23 (left: 7 23)\n7 + 23 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 * 7 = 42 (left: 8 9 42)\n9 + 42 = 51 (left: 8 51)\n51 - 8 = 43 (left: 42 43)\n42 + 43 = 85 (left: 85)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 + 9 = 17 (left: 6 7 17)\n6 - 17 = -11 (left: 7 -11)\n7 - -11 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "9 - 6 = 3 (left: 7 8 3)\n7 * 8 = 56 (left: 3 56)\n56 / 3 = 18.0 (left: 18.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n16 - 8 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 6 = 2 (left: 7 9 2)\n7 + 2 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n6 - 9 = -3 (left: 15 -3)\n15 - -3 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "9 - 6 = 3 (left: 7 8 3)\n7 * 3 = 21 (left: 8 21)\n8 + 21 = 29 (left: 29)"}
{"idx": 13, "input": "6 7 8 9", "reward": 100, "generate_data": "9 - 7 = 2 (left: 6 8 2)\n8 / 2 = 4.0 (left: 6 4.0)\n6 * 4.0 = 24.0 (left: 24.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 + -3 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 100, "generate_data": "9 - 7 = 2 (left: 6 8 2)\n8 / 2 = 4.0 (left: 6 4.0)\n6 * 4.0 = 24.0 (left: 24.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 8 = 14 (left: 7 9 14)\n7 + 14 = 21 (left: 9 21)\n9 + 21 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 6 = 1 (left: 8 9 1)\n9 + 1 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n15 - 6 = 9 (left: 9 9)\n9 + 9 = 18 (left: 9)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "9 - 7 = 2 (left: 6 8 2)\n8 * 2 = 16 (left: 6 16)\n6 + 16 = 22 (left: 22)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 7 = 1 (left: 6 9 1)\n9 + 1 = 10 (left: 6 10)\n6 + 10 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n-1 - 9 = -10 (left: 8 -10)\n8 - -10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n9 + -1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 8 = 15 (left: 6 9 15)\n15 - 9 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n16 - 8 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 + 9 = 17 (left: 6 7 17)\n6 + 17 = 23 (left: 7 23)\n7 + 23 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 7 8 15)\n15 - 7 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 + 9 = 16 (left: 6 8 16)\n6 * 8 = 48 (left: 16 48)\n48 - 16 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 8 = 14 (left: 7 9 14)\n9 + 14 = 23 (left: 7 23)\n7 + 23 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "9 - 7 = 2 (left: 6 8 2)\n6 - 2 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "9 - 6 = 3 (left: 7 8 3)\n7 + 3 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 7 = 1 (left: 6 9 1)\n9 - 1 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n-1 - 9 = -10 (left: 8 -10)\n8 - -10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n-1 - 8 = -9 (left: 9 -9)\n9 - -9 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n9 - -1 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 * 8 = 48 (left: 7 9 48)\n48 / 7 = 7.0 (left: 9 7.0)\n9 * 7.0 = 63.0 (left: 63.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 8 = 14 (left: 7 9 14)\n7 + 9 = 16 (left: 14 16)\n14 + 16 = 29 (left: 29)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 + 9 = 17 (left: 6 7 17)\n7 + 17 = 24 (left: 6 24)\n6 + 24 = 30 (left: 30)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 8 = 14 (left: 7 9 14)\n14 - 7 = 7 (left: 9 7)\n9 + 7 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "8 - 7 = 1 (left: 6 9 1)\n9 - 1 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n9 + -1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 100, "generate_data": "9 - 7 = 2 (left: 6 8 2)\n8 / 2 = 4.0 (left: 6 4.0)\n6 * 4.0 = 24.0 (left: 24.0)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 7 8 -3)\n7 + -3 = 4 (left: 8 4)\n8 * 4 = 32 (left: 32)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 + 8 = 14 (left: 7 9 14)\n14 - 9 = 5 (left: 7 5)\n7 + 5 = 12 (left: 12)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "6 - 7 = -1 (left: 8 9 -1)\n9 + -1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 13, "input": "6 7 8 9", "reward": 0.001, "generate_data": "7 - 9 = -2 (left: 6 8 -2)\n6 - -2 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 * 13 = 13 (left: 11 11 13)\n11 - 13 = -2 (left: 11 -2)\n11 - -2 = 13 (left: 13)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 11 13 -10)\n-10 - 11 = -21 (left: 13 -21)\n13 - -21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 / 11 = 1.0 (left: 1 13 1.0)\n1 + 1.0 = 2.0 (left: 13 2.0)\n13 * 2.0 = 26.0 (left: 26.0)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 * 13 = 13 (left: 11 11 13)\n11 + 11 = 22 (left: 13 22)\n13 + 22 = 35 (left: 35)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 * 11 = 11 (left: 24 11)\n24 + 11 = 35 (left: 35)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 13 = -12 (left: 11 11 -12)\n11 + 11 = 22 (left: -12 22)\n22 - -12 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 11 13 -10)\n13 - -10 = 23 (left: 11 23)\n11 + 23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n11 * 2 = 22 (left: 1 22)\n1 + 22 = 23 (left: 23)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 24 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 * 11 = 11 (left: 2 11)\n2 * 11 = 22 (left: 22)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n13 - 10 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 13 22)\n1 - 22 = -21 (left: 13 -21)\n13 - -21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 13 = -12 (left: 11 11 -12)\n11 - -12 = 23 (left: 11 23)\n11 + 23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n11 + 13 = 24 (left: 10 24)\n10 + 24 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 13 22)\n22 - 1 = 21 (left: 13 21)\n13 + 21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 + 13 = 14 (left: 11 11 14)\n11 + 14 = 25 (left: 11 25)\n25 - 11 = 14 (left: 14)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n13 + 10 = 23 (left: 11 23)\n11 + 23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 + 24 = 35 (left: 1 35)\n35 - 1 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n11 + 13 = 24 (left: 10 24)\n10 + 24 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 + 13 = 14 (left: 11 11 14)\n11 + 14 = 25 (left: 11 25)\n25 - 11 = 14 (left: 14)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 13 22)\n1 - 22 = -21 (left: 13 -21)\n13 - -21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 + 24 = 35 (left: 1 35)\n35 - 1 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 13 = -2 (left: 1 11 -2)\n1 - -2 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 24 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n11 + 2 = 13 (left: 1 13)\n1 * 13 = 13 (left: 13)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 13 = -12 (left: 11 11 -12)\n11 + 11 = 22 (left: -12 22)\n22 - -12 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 + 24 = 25 (left: 11 25)\n25 - 11 = 14 (left: 25)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n24 - 1 = 23 (left: 11 23)\n11 + 23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 24 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 11 13 -10)\n11 + 13 = 24 (left: -10 24)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 13 = -2 (left: 1 11 -2)\n1 - 11 = -10 (left: -2 -10)\n-2 * -10 = 20 (left: 20)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n11 - 2 = 9 (left: 1 9)\n1 * 9 = 9 (left: 9)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 13 = -12 (left: 11 11 -12)\n11 + 11 = 22 (left: -12 22)\n22 - -12 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n11 + 13 = 24 (left: 10 24)\n10 + 24 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 13 22)\n1 - 22 = -21 (left: 13 -21)\n13 - -21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 + 11 = 12 (left: 24 12)\n24 + 12 = 36 (left: 36)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 + 24 = 35 (left: 1 35)\n35 - 1 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 13 = -12 (left: 11 11 -12)\n-12 - 11 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 13 = -12 (left: 11 11 -12)\n11 * 11 = 121 (left: -12 121)\n-12 + 121 = 109 (left: 109)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 13 22)\n22 - 1 = 21 (left: 13 21)\n13 + 21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n13 + 10 = 23 (left: 11 23)\n11 + 23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 13 22)\n1 - 22 = -21 (left: 13 -21)\n13 - -21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "1 + 11 = 12 (left: 11 13 12)\n13 - 11 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 13 22)\n1 - 22 = -21 (left: 13 -21)\n13 - -21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 11 13 -10)\n-10 - 11 = -21 (left: 13 -21)\n13 - -21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n11 + 2 = 13 (left: 1 13)\n1 * 13 = 13 (left: 13)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 24 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 2 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 * 11 = 11 (left: 2 11)\n2 * 11 = 22 (left: 22)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n11 * 2 = 22 (left: 1 22)\n1 + 22 = 23 (left: 23)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n13 + 10 = 23 (left: 11 23)\n11 + 23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 + 24 = 35 (left: 1 35)\n35 - 1 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 13 22)\n13 - 1 = 12 (left: 22 12)\n22 + 12 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 11 13 -10)\n-10 - 11 = -21 (left: 13 -21)\n13 - -21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n11 + 13 = 24 (left: 10 24)\n10 + 24 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 11 = 0 (left: 1 13 0)\n1 + 0 = 1 (left: 13 1)\n13 + 1 = 14 (left: 14)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 * 13 = 13 (left: 11 11 13)\n13 - 11 = 2 (left: 11 2)\n11 * 2 = 22 (left: 22)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 * 2 = 2 (left: 11 2)\n11 * 2 = 22 (left: 22)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 13 22)\n1 - 22 = -21 (left: 13 -21)\n13 - -21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 11 13 12)\n11 + 13 = 24 (left: 12 24)\n12 + 24 = 36 (left: 36)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 24 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 11 13 11)\n11 + 13 = 24 (left: 11 24)\n11 + 24 = 35 (left: 35)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 13 = -2 (left: 1 11 -2)\n1 - 11 = -10 (left: -2 -10)\n-2 * -10 = 20 (left: 20)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 * 11 = 11 (left: 2 11)\n2 * 11 = 22 (left: 22)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 24 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n24 / 11 = 2.2 (left: 1 2.2)\n1 * 2.2 = 2.2 (left: 2.2)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 * 24 = 24 (left: 11 24)\n11 + 24 = 35 (left: 35)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 * 11 = 11 (left: 2 11)\n2 * 11 = 22 (left: 22)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 11 13 11)\n13 + 11 = 24 (left: 11 24)\n11 + 24 = 35 (left: 35)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n13 - 10 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 * 24 = 264 (left: 1 264)\n264 / 1 = 264.0 (left: 264.0)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 + 24 = 25 (left: 11 25)\n25 - 11 = 14 (left: 14)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n11 * 2 = 22 (left: 1 22)\n1 + 22 = 23 (left: 23)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "1 + 11 = 12 (left: 11 13 12)\n13 - 11 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 * 11 = 11 (left: 2 11)\n2 * 11 = 22 (left: 22)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 24 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 11 13 -10)\n-10 - 13 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 13 22)\n22 - 1 = 21 (left: 13 21)\n13 + 21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 11 13 -10)\n-10 - 13 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 + 24 = 35 (left: 1 35)\n35 - 1 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 * 13 = 13 (left: 11 11 13)\n11 + 13 = 24 (left: 11 24)\n11 + 24 = 35 (left: 35)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 2 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 11 13 -10)\n-10 - 13 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 * 24 = 24 (left: 11 24)\n11 + 24 = 35 (left: 35)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 + 13 = 14 (left: 11 11 14)\n11 + 14 = 25 (left: 11 25)\n25 - 11 = 14 (left: 14)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 + 11 = 12 (left: 24 12)\n24 + 12 = 36 (left: 36)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n13 + 10 = 23 (left: 11 23)\n11 + 23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 * 24 = 264 (left: 1 264)\n264 / 1 = 264.0 (left: 264.0)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 11 = 0 (left: 1 13 0)\n1 * 13 = 13 (left: 0 13)\n0 + 13 = 13 (left: 13)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 * 11 = 11 (left: 2 11)\n2 * 11 = 22 (left: 22)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 13 = -12 (left: 11 11 -12)\n11 + 11 = 22 (left: -12 22)\n22 - -12 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 13 = -12 (left: 11 11 -12)\n-12 - 11 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 13 = -12 (left: 11 11 -12)\n11 + 11 = 22 (left: -12 22)\n22 - -12 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 + 24 = 25 (left: 11 25)\n25 - 11 = 14 (left: 25)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 24 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 11 = 0 (left: 1 13 0)\n13 + 0 = 13 (left: 1 13)\n1 * 13 = 13 (left: 13)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n13 + 10 = 23 (left: 11 23)\n11 + 23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 * 24 = 264 (left: 1 264)\n264 / 1 = 264.0 (left: 264.0)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 * 13 = 143 (left: 1 11 143)\n11 + 143 = 154 (left: 1 154)\n154 / 1 = 154.0 (left: 154.0)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 * 11 = 11 (left: 24 11)\n24 + 11 = 35 (left: 35)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n24 - 1 = 23 (left: 11 23)\n11 + 23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 2 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 * 11 = 11 (left: 24 11)\n24 + 11 = 35 (left: 35)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n11 * 2 = 22 (left: 1 22)\n1 + 22 = 23 (left: 23)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n24 - 11 = 13 (left: 1 13)\n1 * 13 = 13 (left: 13)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 24 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 13 = -2 (left: 1 11 -2)\n1 - -2 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n11 + 13 = 24 (left: 10 24)\n10 + 24 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n13 + 10 = 23 (left: 11 23)\n11 + 23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 2 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 13 = -2 (left: 1 11 -2)\n1 - 11 = -10 (left: -2 -10)\n-2 * -10 = 20 (left: 20)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 24 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n13 - 10 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 13 = -12 (left: 11 11 -12)\n11 + 11 = 22 (left: -12 22)\n22 - -12 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 / 11 = 1.0 (left: 1 13 1.0)\n1 * 1.0 = 1.0 (left: 13 1.0)\n13 * 1.0 = 13.0 (left: 13.0)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 * 11 = 11 (left: 2 11)\n2 * 11 = 22 (left: 22)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n13 + 10 = 23 (left: 11 23)\n11 + 23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 11 13 11)\n13 - 11 = 2 (left: 11 2)\n11 * 2 = 22 (left: 22)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n11 * 2 = 22 (left: 1 22)\n1 + 22 = 23 (left: 23)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n11 + 2 = 13 (left: 1 13)\n1 * 13 = 13 (left: 13)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n13 + 10 = 23 (left: 11 23)\n11 + 23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 * 24 = 264 (left: 1 264)\n264 / 1 = 264.0 (left: 264.0)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n24 - 11 = 13 (left: 1 13)\n1 * 13 = 13 (left: 13)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 13 = -2 (left: 1 11 -2)\n1 - 11 = -10 (left: -2 -10)\n-2 * -10 = 20 (left: 20)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 + 24 = 25 (left: 11 25)\n25 - 11 = 14 (left: 25)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 13 22)\n13 - 1 = 12 (left: 22 12)\n22 + 12 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 + 13 = 14 (left: 11 11 14)\n14 - 11 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n11 + 13 = 24 (left: 10 24)\n10 + 24 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 2 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n11 + 13 = 24 (left: 10 24)\n10 + 24 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 11 13 -10)\n-10 - 13 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 11 13 -10)\n-10 - 11 = -21 (left: 13 -21)\n13 - -21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n11 + 13 = 24 (left: 10 24)\n10 + 24 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n13 + 10 = 23 (left: 11 23)\n11 + 23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n24 - 1 = 23 (left: 11 23)\n11 + 23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 + 13 = 14 (left: 11 11 14)\n11 + 14 = 25 (left: 11 25)\n25 - 11 = 14 (left: 14)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 11 13 -10)\n-10 - 13 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 2 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 + 24 = 35 (left: 1 35)\n35 - 1 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 13 = -2 (left: 1 11 -2)\n1 - 11 = -10 (left: -2 -10)\n-2 * -10 = 20 (left: 20)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n11 + 13 = 24 (left: 10 24)\n10 + 24 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 13 22)\n13 - 1 = 12 (left: 22 12)\n22 + 12 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 24 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 24 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n11 + 13 = 24 (left: 10 24)\n10 + 24 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 11 13 11)\n11 + 13 = 24 (left: 11 24)\n11 + 24 = 35 (left: 35)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 * 11 = 11 (left: 2 11)\n2 * 11 = 22 (left: 22)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 * 13 = 13 (left: 11 11 13)\n11 * 13 = 143 (left: 11 143)\n143 / 11 = 13.0 (left: 13.0)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 13 = -12 (left: 11 11 -12)\n-12 - 11 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 11 13 -10)\n-10 - 11 = -21 (left: 13 -21)\n13 - -21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n24 - 1 = 23 (left: 11 23)\n11 + 23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 + 24 = 35 (left: 1 35)\n35 - 1 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 13 = -12 (left: 11 11 -12)\n-12 - 11 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 * 24 = 24 (left: 11 24)\n11 + 24 = 35 (left: 35)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 24 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 13 22)\n13 - 1 = 12 (left: 22 12)\n22 + 12 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 13 = -12 (left: 11 11 -12)\n-12 - 11 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 * 11 = 11 (left: 24 11)\n24 + 11 = 35 (left: 35)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n13 + 10 = 23 (left: 11 23)\n11 + 23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 * 2 = 2 (left: 11 2)\n11 * 2 = 22 (left: 22)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 13 = -12 (left: 11 11 -12)\n11 + 11 = 22 (left: -12 22)\n22 - -12 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 * 24 = 24 (left: 11 24)\n11 + 24 = 35 (left: 35)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n13 + 10 = 23 (left: 11 23)\n11 + 23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 11 13 -10)\n-10 - 13 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 + 11 = 12 (left: 24 12)\n24 + 12 = 36 (left: 36)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 + 24 = 35 (left: 1 35)\n35 - 1 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 13 = -12 (left: 11 11 -12)\n-12 - 11 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 * 24 = 24 (left: 11 24)\n11 + 24 = 35 (left: 35)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 * 24 = 24 (left: 11 24)\n11 + 24 = 35 (left: 35)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n13 + 10 = 23 (left: 11 23)\n11 + 23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 11 13 12)\n11 + 13 = 24 (left: 12 24)\n12 + 24 = 36 (left: 36)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 2 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 13 = -12 (left: 11 11 -12)\n-12 - 11 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 + 13 = 14 (left: 11 11 14)\n11 + 14 = 25 (left: 11 25)\n25 - 11 = 14 (left: 14)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 + 13 = 14 (left: 11 11 14)\n14 - 11 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 24 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n24 - 1 = 23 (left: 11 23)\n11 + 23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 13 22)\n13 - 1 = 12 (left: 22 12)\n22 + 12 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 11 13 -10)\n-10 - 13 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 11 13 -10)\n-10 - 11 = -21 (left: 13 -21)\n13 - -21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n13 + 10 = 23 (left: 11 23)\n11 + 23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 11 13 12)\n11 - 12 = -1 (left: 13 -1)\n13 - -1 = 14 (left: 14)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n24 - 11 = 13 (left: 1 13)\n1 * 13 = 13 (left: 13)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 2 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 * 24 = 24 (left: 11 24)\n11 + 24 = 35 (left: 35)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 13 22)\n13 - 1 = 12 (left: 22 12)\n22 + 12 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n13 + 10 = 23 (left: 11 23)\n11 + 23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n24 - 1 = 23 (left: 11 23)\n11 + 23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n11 + 13 = 24 (left: 10 24)\n10 + 24 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 13 = -12 (left: 11 11 -12)\n-12 - 11 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 24 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 + 11 = 12 (left: 24 12)\n24 - 12 = 12 (left: 12)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 + 24 = 35 (left: 1 35)\n35 - 1 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 + 24 = 35 (left: 1 35)\n35 - 1 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 * 2 = 2 (left: 11 2)\n11 * 2 = 22 (left: 22)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 13 22)\n1 - 22 = -21 (left: 13 -21)\n13 - -21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 13 22)\n1 - 22 = -21 (left: 13 -21)\n13 - -21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 * 24 = 24 (left: 11 24)\n11 + 24 = 35 (left: 35)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 11 13 -10)\n-10 - 11 = -21 (left: 13 -21)\n13 - -21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 11 13 11)\n13 - 11 = 2 (left: 11 2)\n11 * 2 = 22 (left: 22)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 13 = -2 (left: 1 11 -2)\n1 - -2 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n11 + 2 = 13 (left: 1 13)\n1 * 13 = 13 (left: 13)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 11 13 -10)\n-10 - 11 = -21 (left: 13 -21)\n13 - -21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 * 11 = 11 (left: 2 11)\n2 * 11 = 22 (left: 22)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 + 24 = 25 (left: 11 25)\n25 - 11 = 14 (left: 25)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 24 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 + 13 = 14 (left: 11 11 14)\n11 + 14 = 25 (left: 11 25)\n25 - 11 = 14 (left: 14)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 11 13 -10)\n-10 - 11 = -21 (left: 13 -21)\n13 - -21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 13 22)\n1 - 22 = -21 (left: 13 -21)\n13 - -21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n11 * 2 = 22 (left: 1 22)\n1 + 22 = 23 (left: 23)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 13 22)\n22 - 1 = 21 (left: 13 21)\n13 + 21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 13 = -12 (left: 11 11 -12)\n-12 - 11 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 - 13 = -12 (left: 11 11 -12)\n-12 - 11 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 + 24 = 25 (left: 11 25)\n25 - 11 = 14 (left: 25)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 + 24 = 25 (left: 11 25)\n25 - 11 = 14 (left: 25)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n11 * 2 = 22 (left: 1 22)\n1 + 22 = 23 (left: 23)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 + 13 = 14 (left: 11 11 14)\n11 + 14 = 25 (left: 11 25)\n25 - 11 = 14 (left: 14)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 * 13 = 13 (left: 11 11 13)\n11 + 13 = 24 (left: 11 24)\n11 + 24 = 35 (left: 35)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "1 + 13 = 14 (left: 11 11 14)\n11 + 14 = 25 (left: 11 25)\n25 - 11 = 14 (left: 14)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 11 = 22 (left: 1 13 22)\n1 - 22 = -21 (left: 13 -21)\n13 - -21 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 11 13 10)\n13 - 10 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 * 11 = 11 (left: 24 11)\n24 + 11 = 35 (left: 35)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 24 = -23 (left: 11 -23)\n11 - -23 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n1 - 11 = -10 (left: 24 -10)\n24 - -10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 100, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "11 + 13 = 24 (left: 1 11 24)\n11 - 1 = 10 (left: 24 10)\n24 + 10 = 34 (left: 34)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 + 2 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n11 + 2 = 13 (left: 1 13)\n1 * 13 = 13 (left: 13)"}
{"idx": 14, "input": "1 11 11 13", "reward": 0.001, "generate_data": "13 - 11 = 2 (left: 1 11 2)\n1 * 11 = 11 (left: 2 11)\n2 * 11 = 22 (left: 22)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 8 10 12)\n12 - 10 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 10 = 11 (left: 8 11 11)\n8 + 11 = 19 (left: 11 19)\n11 + 19 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 21 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 10 11 -7)\n10 + 11 = 21 (left: -7 21)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 100, "generate_data": "10 - 8 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 10 = -9 (left: 8 11 -9)\n-9 - 8 = -17 (left: 11 -17)\n11 - -17 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 * 10 = 10 (left: 8 11 10)\n10 - 8 = 2 (left: 11 2)\n11 * 2 = 22 (left: 22)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n8 + 21 = 29 (left: 1 29)\n29 - 1 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 8 = 2 (left: 1 11 2)\n1 + 2 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 10 = -9 (left: 8 11 -9)\n-9 - 11 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n9 - 8 = 1 (left: 11 1)\n11 * 1 = 11 (left: 11)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 8 10 10)\n10 + 10 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 10 11 -7)\n10 + 11 = 21 (left: -7 21)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 * 8 = 8 (left: 10 11 8)\n11 + 8 = 19 (left: 10 19)\n10 + 19 = 29 (left: 29)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 10 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 10 = 1 (left: 1 8 1)\n1 * 8 = 8 (left: 1 8)\n1 * 8 = 8 (left: 8)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 8 10 12)\n12 - 10 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 21 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 10 11 -7)\n10 - -7 = 17 (left: 11 17)\n11 + 17 = 28 (left: 28)\n28 - 11 = 17 (left: 17)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 10 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 8 10 10)\n10 + 10 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n9 / 8 = 1.125 (left: 11 1.125)\n11 * 1.125 = 12.5 (left: 12.5)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 10 11 9)\n10 + 9 = 19 (left: 11 19)\n11 + 19 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 100, "generate_data": "10 - 8 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 10 11 -7)\n10 + 11 = 21 (left: -7 21)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 10 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 21 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 10 = 11 (left: 8 11 11)\n11 + 11 = 22 (left: 8 22)\n8 + 22 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 8 10 10)\n10 + 10 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 + 8 = 9 (left: 21 9)\n21 + 9 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 10 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 21 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 8 = 3 (left: 1 10 3)\n1 + 10 = 11 (left: 3 11)\n3 * 11 = 33 (left: 33)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 8 10 11)\n10 - 8 = 2 (left: 11 2)\n11 * 2 = 22 (left: 22)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 10 11 -7)\n-7 - 10 = -17 (left: 11 -17)\n11 - -17 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 * 10 = 10 (left: 8 11 10)\n11 + 10 = 21 (left: 8 21)\n8 + 21 = 29 (left: 29)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 10 = 18 (left: 1 11 18)\n1 * 18 = 18 (left: 11 18)\n11 + 18 = 29 (left: 29)\n29 - 11 = 18 (left: 18)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 + 21 = 22 (left: 8 22)\n8 + 22 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 10 11 -7)\n-7 - 10 = -17 (left: 11 -17)\n11 - -17 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 - 10 = -2 (left: 1 11 -2)\n1 - 11 = -10 (left: -2 -10)\n-2 * -10 = 20 (left: 20)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 8 10 11)\n8 + 10 = 18 (left: 11 18)\n11 + 18 = 29 (left: 29)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 1 10 19)\n10 + 19 = 29 (left: 1 29)\n29 - 1 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 10 = 11 (left: 8 11 11)\n11 + 11 = 22 (left: 8 22)\n8 + 22 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 8 10 -10)\n-10 - 8 = -18 (left: 10 -18)\n10 - -18 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 1 10 19)\n10 + 19 = 29 (left: 1 29)\n29 - 1 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 10 = -9 (left: 8 11 -9)\n-9 - 11 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 10 11 -7)\n10 + 11 = 21 (left: -7 21)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 8 10 12)\n8 + 10 = 18 (left: 12 18)\n12 + 18 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 10 = -9 (left: 8 11 -9)\n11 - -9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 8 10 12)\n10 - 8 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 10 11 -7)\n10 - -7 = 17 (left: 11 17)\n11 + 17 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 10 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 10 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 10 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 8 10 12)\n10 + 12 = 22 (left: 8 22)\n8 + 22 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 8 10 10)\n10 + 10 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 10 = 11 (left: 8 11 11)\n11 + 11 = 22 (left: 8 22)\n8 + 22 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 8 10 12)\n10 - 8 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 8 10 10)\n10 + 10 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 10 = -9 (left: 8 11 -9)\n-9 - 11 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 21 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 10 11 9)\n10 + 11 = 21 (left: 9 21)\n9 + 21 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n8 + 21 = 29 (left: 1 29)\n29 - 1 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 10 11 9)\n10 + 9 = 19 (left: 11 19)\n11 + 19 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 10 = 18 (left: 1 11 18)\n1 - 18 = -17 (left: 11 -17)\n11 - -17 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 10 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 8 10 -10)\n-10 - 10 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 8 10 -10)\n-10 - 10 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 10 = 18 (left: 1 11 18)\n1 - 11 = -10 (left: 18 -10)\n18 - -10 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 10 = -9 (left: 8 11 -9)\n-9 - 11 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 10 = 18 (left: 1 11 18)\n1 - 11 = -10 (left: 18 -10)\n18 - -10 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n8 - 1 = 7 (left: 21 7)\n21 + 7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 10 = -9 (left: 8 11 -9)\n11 - -9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 8 = 2 (left: 1 11 2)\n1 + 2 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 10 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 - 10 = -2 (left: 1 11 -2)\n1 - 11 = -10 (left: -2 -10)\n-2 * -10 = 20 (left: 20)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 8 10 12)\n8 + 12 = 20 (left: 10 20)\n10 + 20 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 21 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 10 = 18 (left: 1 11 18)\n1 + 18 = 19 (left: 11 19)\n11 + 19 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 10 11 9)\n11 - 10 = 1 (left: 9 1)\n9 + 1 = 10 (left: 10)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 - 9 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 10 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 8 10 -10)\n-10 - 8 = -18 (left: 10 -18)\n10 - -18 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 8 10 10)\n10 - 8 = 2 (left: 10 2)\n10 + 2 = 12 (left: 12)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 21 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 10 = -9 (left: 8 11 -9)\n11 + -9 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 8 10 10)\n10 + 10 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 21 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 10 = 18 (left: 1 11 18)\n11 - 1 = 10 (left: 18 10)\n18 + 10 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 10 = -9 (left: 8 11 -9)\n11 - -9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 10 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 * 10 = 10 (left: 8 11 10)\n10 - 8 = 2 (left: 11 2)\n11 * 2 = 22 (left: 22)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 10 = 11 (left: 8 11 11)\n8 + 11 = 19 (left: 11 19)\n11 + 19 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 8 = 2 (left: 1 11 2)\n11 + 2 = 13 (left: 1 13)\n1 + 13 = 14 (left: 14)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 10 = 18 (left: 1 11 18)\n1 - 11 = -10 (left: 18 -10)\n18 - -10 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 100, "generate_data": "10 - 8 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n21 - 1 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 - 1 = 7 (left: 10 11 7)\n10 + 7 = 17 (left: 11 17)\n11 + 17 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 - 1 = 7 (left: 10 11 7)\n10 + 11 = 21 (left: 7 21)\n7 + 21 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n8 - 1 = 7 (left: 21 7)\n21 + 7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 1 10 19)\n10 + 19 = 29 (left: 1 29)\n29 - 1 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 8 10 12)\n10 + 12 = 22 (left: 8 22)\n8 + 22 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 10 = 11 (left: 8 11 11)\n8 + 11 = 19 (left: 11 19)\n11 + 19 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 10 = -9 (left: 8 11 -9)\n-9 - 8 = -17 (left: 11 -17)\n11 - -17 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 10 11 -7)\n10 - -7 = 17 (left: 11 17)\n11 + 17 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 10 11 9)\n10 + 11 = 21 (left: 9 21)\n9 + 21 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 8 10 11)\n11 - 10 = 1 (left: 8 1)\n8 * 1 = 8 (left: 8)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 8 10 10)\n10 + 10 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 1 10 19)\n10 - 1 = 9 (left: 19 9)\n19 + 9 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 10 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 100, "generate_data": "10 - 8 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n21 - 1 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 21 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n8 - 1 = 7 (left: 21 7)\n21 + 7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 8 10 -10)\n-10 - 8 = -18 (left: 10 -18)\n10 - -18 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 1 10 19)\n1 - 10 = -9 (left: 19 -9)\n19 - -9 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 10 11 -7)\n10 + 11 = 21 (left: -7 21)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 10 11 -7)\n-7 - 10 = -17 (left: 11 -17)\n11 - -17 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 - 1 = 7 (left: 10 11 7)\n10 + 7 = 17 (left: 11 17)\n11 + 17 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 100, "generate_data": "10 - 8 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 10 = -9 (left: 8 11 -9)\n-9 - 11 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 10 = 18 (left: 1 11 18)\n18 - 11 = 7 (left: 1 7)\n1 * 7 = 7 (left: 7)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 10 = 18 (left: 1 11 18)\n11 - 1 = 10 (left: 18 10)\n18 + 10 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n8 + 11 = 19 (left: 9 19)\n9 + 19 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 100, "generate_data": "10 - 8 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 10 11 -7)\n10 + -7 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n8 + 21 = 29 (left: 1 29)\n29 - 1 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 1 10 19)\n1 + 19 = 20 (left: 10 20)\n10 + 20 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 10 11 -7)\n-7 - 10 = -17 (left: 11 -17)\n11 - -17 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 10 = 11 (left: 8 11 11)\n11 + 11 = 22 (left: 8 22)\n8 + 22 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 10 = 11 (left: 8 11 11)\n11 + 11 = 22 (left: 8 22)\n8 + 22 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 8 10 10)\n10 + 10 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n8 - 1 = 7 (left: 21 7)\n21 + 7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n8 - 1 = 7 (left: 21 7)\n21 + 7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 8 10 10)\n10 + 10 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n8 + 9 = 17 (left: 11 17)\n11 + 17 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 10 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n8 + 21 = 29 (left: 1 29)\n29 - 1 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 8 = 9 (left: 10 11 9)\n10 + 11 = 21 (left: 9 21)\n9 + 21 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 10 = 18 (left: 1 11 18)\n1 - 11 = -10 (left: 18 -10)\n18 - -10 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 10 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 8 10 -10)\n10 - -10 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n9 - 8 = 1 (left: 11 1)\n11 * 1 = 11 (left: 11)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 8 10 -10)\n-10 - 10 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 - 9 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 * 10 = 10 (left: 8 11 10)\n11 - 10 = 1 (left: 8 1)\n8 * 1 = 8 (left: 8)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 * 8 = 8 (left: 10 11 8)\n10 + 11 = 21 (left: 8 21)\n8 + 21 = 29 (left: 29)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n21 - 1 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 - 1 = 7 (left: 10 11 7)\n10 + 7 = 17 (left: 11 17)\n11 + 17 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 10 = 18 (left: 1 11 18)\n1 * 11 = 11 (left: 18 11)\n18 + 11 = 29 (left: 29)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n8 + 21 = 29 (left: 1 29)\n29 - 1 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 8 10 12)\n10 - 8 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 15, "input": "1 8 10 11", "reward": 100, "generate_data": "10 - 8 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 8 10 -10)\n-10 - 8 = -18 (left: 10 -18)\n10 - -18 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 8 = 2 (left: 1 11 2)\n1 * 2 = 2 (left: 11 2)\n11 * 2 = 22 (left: 22)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 8 = 2 (left: 1 11 2)\n1 + 2 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 10 11 -7)\n10 + 11 = 21 (left: -7 21)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 8 10 -10)\n10 + -10 = 0 (left: 8 0)\n8 + 0 = 8 (left: 8)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 10 11 -7)\n10 + 11 = 21 (left: -7 21)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 10 = -9 (left: 8 11 -9)\n-9 - 11 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 1 10 19)\n1 - 10 = -9 (left: 19 -9)\n19 - -9 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 8 10 -10)\n10 - -10 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 11 = -1 (left: 1 8 -1)\n1 - -1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 10 = -9 (left: 8 11 -9)\n8 + 11 = 19 (left: -9 19)\n19 - -9 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 10 = 11 (left: 8 11 11)\n11 + 11 = 22 (left: 8 22)\n8 + 22 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 8 10 10)\n8 + 10 = 18 (left: 10 18)\n10 + 18 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 - 9 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 10 = 18 (left: 1 11 18)\n11 - 1 = 10 (left: 18 10)\n18 + 10 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 8 10 12)\n12 / 8 = 1.5 (left: 10 1.5)\n10 * 1.5 = 15.0 (left: 15.0)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 21 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 * 10 = 10 (left: 8 11 10)\n11 + 10 = 21 (left: 8 21)\n8 + 21 = 29 (left: 29)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 21 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 8 = 2 (left: 1 11 2)\n11 - 1 = 10 (left: 2 10)\n2 * 10 = 20 (left: 20)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 21 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 10 = -9 (left: 8 11 -9)\n-9 - 8 = -17 (left: 11 -17)\n11 - -17 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n21 - 1 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 11 = -10 (left: 8 10 -10)\n-10 - 10 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 8 10 10)\n10 + 10 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n21 - 1 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n8 + 21 = 29 (left: 1 29)\n29 - 1 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 10 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 10 = 11 (left: 8 11 11)\n11 - 8 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 * 11 = 11 (left: 8 10 11)\n11 - 10 = 1 (left: 8 1)\n8 * 1 = 8 (left: 8)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 8 10 10)\n10 + 10 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 8 10 12)\n10 - 8 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 1 10 19)\n1 * 10 = 10 (left: 19 10)\n19 + 10 = 29 (left: 29)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 10 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 - 9 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 8 10 10)\n10 + 10 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 - 10 = -2 (left: 1 11 -2)\n1 - 11 = -10 (left: -2 -10)\n-2 * -10 = 20 (left: 20)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 100, "generate_data": "10 - 8 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 10 = 11 (left: 8 11 11)\n11 + 11 = 22 (left: 8 22)\n8 + 22 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 10 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 10 11 -7)\n-7 - 10 = -17 (left: 11 -17)\n11 - -17 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 1 10 19)\n1 + 19 = 20 (left: 10 20)\n10 + 20 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 * 8 = 8 (left: 10 11 8)\n10 + 11 = 21 (left: 8 21)\n8 + 21 = 29 (left: 29)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n21 - 1 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 11 = -1 (left: 1 8 -1)\n1 - -1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 10 11 -7)\n10 - -7 = 17 (left: 11 17)\n11 + 17 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n11 + 9 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 100, "generate_data": "10 - 8 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 * 8 = 8 (left: 10 11 8)\n10 + 11 = 21 (left: 8 21)\n8 + 21 = 29 (left: 29)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 10 11 -7)\n10 - 11 = -1 (left: -7 -1)\n-7 * -1 = 7 (left: 7)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 21 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 21 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 10 = -9 (left: 8 11 -9)\n8 - -9 = 17 (left: 11 17)\n11 + 17 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 10 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 10 = 11 (left: 8 11 11)\n11 + 11 = 22 (left: 8 22)\n8 + 22 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 10 = -9 (left: 8 11 -9)\n-9 - 8 = -17 (left: 11 -17)\n11 - -17 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 8 10 10)\n10 + 10 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n9 - 8 = 1 (left: 11 1)\n11 * 1 = 11 (left: 11)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 10 = -9 (left: 8 11 -9)\n-9 - 11 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 1 10 19)\n1 + 10 = 11 (left: 19 11)\n19 + 11 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 10 = -9 (left: 8 11 -9)\n-9 - 11 = -20 (left: 8 -20)\n8 - -20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 10 = -9 (left: 8 11 -9)\n-9 - 8 = -17 (left: 11 -17)\n11 - -17 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 1 10 19)\n10 + 19 = 29 (left: 1 29)\n29 - 1 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n21 - 1 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 11 = 19 (left: 1 10 19)\n1 + 19 = 20 (left: 10 20)\n10 + 20 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 - 8 = -7 (left: 10 11 -7)\n10 + 11 = 21 (left: -7 21)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 / 8 = 1.25 (left: 1 11 1.25)\n1 * 1.25 = 1.25 (left: 11 1.25)\n11 * 1.25 = 14.0 (left: 14.0)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 10 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 + 21 = 22 (left: 8 22)\n8 + 22 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n8 + 11 = 19 (left: 9 19)\n9 + 19 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 8 10 10)\n10 + 10 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 10 = 18 (left: 1 11 18)\n11 - 1 = 10 (left: 18 10)\n18 + 10 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 - 1 = 7 (left: 10 11 7)\n10 + 7 = 17 (left: 11 17)\n11 + 17 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 8 10 12)\n10 + 12 = 22 (left: 8 22)\n8 + 22 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 8 = 3 (left: 1 10 3)\n1 * 3 = 3 (left: 10 3)\n10 * 3 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 1 = 9 (left: 8 11 9)\n9 - 8 = 1 (left: 11 1)\n11 * 1 = 11 (left: 11)"}
{"idx": 15, "input": "1 8 10 11", "reward": 100, "generate_data": "10 - 8 = 2 (left: 1 11 2)\n1 + 11 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 + 21 = 22 (left: 8 22)\n8 + 22 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 8 10 10)\n10 + 10 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 / 10 = 1.1 (left: 1 8 1.1)\n1 + 8 = 9 (left: 1.1 9)\n1.1 * 9 = 10.0 (left: 10.0)"}
{"idx": 15, "input": "1 8 10 11", "reward": 100, "generate_data": "1 + 11 = 12 (left: 8 10 12)\n10 - 8 = 2 (left: 12 2)\n12 * 2 = 24 (left: 24)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 8 = 2 (left: 1 11 2)\n1 + 2 = 3 (left: 11 3)\n11 * 3 = 33 (left: 33)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n21 - 1 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n21 - 1 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 - 10 = -2 (left: 1 11 -2)\n1 - 11 = -10 (left: -2 -10)\n-2 * -10 = 20 (left: 20)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 - 8 = 2 (left: 1 11 2)\n1 * 11 = 11 (left: 2 11)\n2 * 11 = 22 (left: 22)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "1 + 11 = 12 (left: 8 10 12)\n10 + 12 = 22 (left: 8 22)\n8 + 22 = 30 (left: 30)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 10 = 1 (left: 1 8 1)\n1 + 1 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 8 10 10)\n10 + 10 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 10 = 18 (left: 1 11 18)\n1 - 11 = -10 (left: 18 -10)\n18 - -10 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 / 8 = 1.25 (left: 1 11 1.25)\n1 + 11 = 12 (left: 1.25 12)\n1.25 * 12 = 15.0 (left: 15.0)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 - 11 = -3 (left: 1 10 -3)\n10 + -3 = 7 (left: 1 7)\n1 * 7 = 7 (left: 7)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "10 + 11 = 21 (left: 1 8 21)\n1 - 8 = -7 (left: 21 -7)\n21 - -7 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "11 - 1 = 10 (left: 8 10 10)\n10 + 10 = 20 (left: 8 20)\n8 + 20 = 28 (left: 28)"}
{"idx": 15, "input": "1 8 10 11", "reward": 0.001, "generate_data": "8 + 10 = 18 (left: 1 11 18)\n1 + 11 = 12 (left: 18 12)\n18 + 12 = 30 (left: 30)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 2 = 4 (left: 3 9 4)\n3 * 9 = 27 (left: 4 27)\n27 - 4 = 23 (left: 23)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 + 9 = 11 (left: 3 6 11)\n11 - 6 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 2 = 4 (left: 3 9 4)\n3 + 9 = 12 (left: 4 12)\n4 * 12 = 48 (left: 48)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "9 - 2 = 7 (left: 3 6 7)\n7 - 3 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 2 6 27)\n2 - 6 = -4 (left: 27 -4)\n27 + -4 = 23 (left: 23)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 2 3 15)\n2 + 15 = 17 (left: 3 17)\n3 + 17 = 20 (left: 20)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 - 9 = -7 (left: 3 6 -7)\n3 - -7 = 10 (left: 6 10)\n6 + 10 = 16 (left: 16)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 - 6 = 3 (left: 2 3 3)\n2 * 3 = 6 (left: 3 6)\n3 * 6 = 18 (left: 18)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "6 * 9 = 54 (left: 2 3 54)\n54 / 2 = 27.0 (left: 3 27.0)\n27.0 - 3 = 24.0 (left: 24.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 + 9 = 11 (left: 3 6 11)\n11 - 6 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 6 = 18 (left: 2 9 18)\n2 * 9 = 18 (left: 18 18)\n18 + 18 = 36 (left: 36)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 - 6 = -4 (left: 3 9 -4)\n3 * 9 = 27 (left: -4 27)\n-4 + 27 = 23 (left: 23)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 2 3 -3)\n2 - -3 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 - 2 = 1 (left: 6 9 1)\n6 + 1 = 7 (left: 9 7)\n9 + 7 = 16 (left: 16)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 3 = 6 (left: 6 9 6)\n6 * 9 = 54 (left: 6 54)\n54 / 6 = 9.0 (left: 9.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 2 3 -3)\n2 - -3 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 - 6 = -4 (left: 3 9 -4)\n9 - -4 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 2 6 27)\n27 - 6 = 21 (left: 2 21)\n2 + 21 = 23 (left: 23)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 3 = 3 (left: 2 9 3)\n9 - 2 = 7 (left: 3 7)\n3 * 7 = 21 (left: 21)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 + 6 = 9 (left: 2 9 9)\n2 * 9 = 18 (left: 9 18)\n9 + 18 = 27 (left: 27)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 + 9 = 11 (left: 3 6 11)\n3 + 11 = 14 (left: 6 14)\n6 + 14 = 20 (left: 20)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 / 2 = 1.5 (left: 6 9 1.5)\n6 * 1.5 = 9.0 (left: 9 9.0)\n9 + 9.0 = 18.0 (left: 18.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 2 3 15)\n3 * 15 = 45 (left: 2 45)\n45 / 2 = 22.5 (left: 22.5)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 2 3 15)\n2 + 15 = 17 (left: 3 17)\n3 + 17 = 20 (left: 20)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n18 / 3 = 6.0 (left: 6 6.0)\n6 + 6.0 = 12.0 (left: 12.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "9 / 2 = 4.5 (left: 3 6 4.5)\n6 * 4.5 = 27.0 (left: 3 27.0)\n27.0 - 3 = 24.0 (left: 24.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n18 - 6 = 12 (left: 3 12)\n3 * 12 = 36 (left: 36)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 - 2 = 1 (left: 6 9 1)\n9 - 1 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 / 2 = 4.5 (left: 3 6 4.5)\n4.5 / 3 = 1.5 (left: 6 1.5)\n6 / 1.5 = 4.0 (left: 4.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 - 6 = 3 (left: 2 3 3)\n2 * 3 = 6 (left: 3 6)\n3 * 6 = 18 (left: 18)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 + 6 = 9 (left: 2 9 9)\n2 * 9 = 18 (left: 9 18)\n9 + 18 = 27 (left: 27)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "6 - 2 = 4 (left: 3 9 4)\n9 - 3 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "9 - 2 = 7 (left: 3 6 7)\n7 - 3 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 * 9 = 54 (left: 2 3 54)\n2 * 54 = 120 (left: 3 120)\n120 / 3 = 40.0 (left: 40.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 - 2 = 7 (left: 3 6 7)\n6 * 7 = 42 (left: 3 42)\n42 / 3 = 14.0 (left: 14.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 3 = 3 (left: 2 9 3)\n2 * 9 = 18 (left: 3 18)\n3 + 18 = 21 (left: 21)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 2 6 27)\n2 - 27 = -25 (left: 6 -25)\n6 - -25 = 31 (left: 31)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 3 = 3 (left: 2 9 3)\n9 - 2 = 7 (left: 3 7)\n3 * 7 = 21 (left: 21)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 3 = 3 (left: 2 9 3)\n9 - 2 = 7 (left: 3 7)\n3 * 7 = 21 (left: 21)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 - 2 = 7 (left: 3 6 7)\n6 + 7 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 - 6 = 3 (left: 2 3 3)\n2 * 3 = 6 (left: 3 6)\n3 * 6 = 18 (left: 18)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n18 / 3 = 6.0 (left: 6 6.0)\n6 + 6.0 = 12.0 (left: 12.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 2 3 15)\n3 + 15 = 18 (left: 2 18)\n2 * 18 = 36 (left: 36)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n3 / 6 = 0.5 (left: 18 0.5)\n18 / 0.5 = 36.0 (left: 36.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "9 - 3 = 6 (left: 2 6 6)\n6 + 6 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 6 = 12 (left: 3 9 12)\n9 - 3 = 6 (left: 12 6)\n12 * 6 = 72 (left: 72)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "3 + 9 = 12 (left: 2 6 12)\n2 * 6 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 - 2 = 1 (left: 6 9 1)\n9 - 1 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n3 * 18 = 54 (left: 6 54)\n54 / 6 = 9.0 (left: 9.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n6 + 18 = 24 (left: 3 24)\n3 * 24 = 72 (left: 72)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 - 2 = 7 (left: 3 6 7)\n6 + 7 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "9 - 3 = 6 (left: 2 6 6)\n6 + 6 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "6 / 2 = 3.0 (left: 3 9 3.0)\n3 * 9 = 27 (left: 3.0 27)\n27 - 3.0 = 24.0 (left: 24.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 6 = 18 (left: 2 9 18)\n9 + 18 = 27 (left: 2 27)\n27 - 2 = 25 (left: 25)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 / 3 = 3.0 (left: 2 6 3.0)\n2 * 3.0 = 6.0 (left: 6 6.0)\n6 + 6.0 = 12.0 (left: 12.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "9 - 3 = 6 (left: 2 6 6)\n6 + 6 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 6 = 18 (left: 2 9 18)\n2 + 18 = 20 (left: 9 20)\n9 + 20 = 29 (left: 29)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 2 3 -3)\n2 * 3 = 6 (left: -3 6)\n6 - -3 = 9 (left: 9)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 2 3 -3)\n2 - -3 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 2 6 27)\n27 - 6 = 21 (left: 2 21)\n2 + 21 = 23 (left: 23)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 / 2 = 4.5 (left: 3 6 4.5)\n3 / 4.5 = 0.67 (left: 6 0.67)\n6 / 0.67 = 9.0 (left: 9.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 / 3 = 3.0 (left: 2 6 3.0)\n6 * 3.0 = 18.0 (left: 2 18.0)\n2 + 18.0 = 20.0 (left: 20.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "3 + 9 = 12 (left: 2 6 12)\n2 * 6 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 + 3 = 5 (left: 6 9 5)\n6 + 9 = 15 (left: 5 15)\n5 + 15 = 20 (left: 20)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 - 6 = -4 (left: 3 9 -4)\n3 * 9 = 27 (left: -4 27)\n-4 + 27 = 23 (left: 23)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 2 6 27)\n6 - 2 = 4 (left: 27 4)\n27 - 4 = 23 (left: 23)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 2 6 27)\n6 - 2 = 4 (left: 27 4)\n27 - 4 = 23 (left: 23)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "6 + 9 = 15 (left: 2 3 15)\n15 - 3 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 + 6 = 8 (left: 3 9 8)\n9 + 8 = 17 (left: 3 17)\n3 + 17 = 20 (left: 20)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 * 9 = 54 (left: 2 3 54)\n2 * 54 = 120 (left: 3 120)\n120 / 3 = 40.0 (left: 40.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 - 2 = 1 (left: 6 9 1)\n9 - 1 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 + 9 = 11 (left: 3 6 11)\n11 - 6 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 / 2 = 3.0 (left: 3 9 3.0)\n3 + 9 = 12 (left: 3.0 12)\n3.0 * 12 = 36.0 (left: 36.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 3 = 6 (left: 6 9 6)\n9 / 6 = 1.5 (left: 6 1.5)\n6 * 1.5 = 9.0 (left: 9.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 / 2 = 3.0 (left: 3 9 3.0)\n9 - 3 = 6 (left: 3.0 6)\n3.0 * 6 = 18.0 (left: 18.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 / 3 = 3.0 (left: 2 6 3.0)\n2 * 3.0 = 6.0 (left: 6 6.0)\n6 + 6.0 = 12.0 (left: 12.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 3 = 3 (left: 2 9 3)\n2 * 9 = 18 (left: 3 18)\n3 + 18 = 21 (left: 21)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 / 2 = 1.5 (left: 6 9 1.5)\n9 / 1.5 = 6.0 (left: 6 6.0)\n6 + 6.0 = 12.0 (left: 12.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n18 - 6 = 12 (left: 3 12)\n3 * 12 = 36 (left: 36)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 / 3 = 3.0 (left: 2 6 3.0)\n2 * 6 = 12 (left: 3.0 12)\n3.0 * 12 = 36.0 (left: 36.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 - 9 = -7 (left: 3 6 -7)\n3 * 6 = 18 (left: -7 18)\n18 - -7 = 25 (left: 25)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "2 * 6 = 12 (left: 3 9 12)\n3 + 12 = 15 (left: 9 15)\n9 + 15 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 3 = 3 (left: 2 9 3)\n2 * 9 = 18 (left: 3 18)\n3 + 18 = 21 (left: 21)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 3 = 3 (left: 2 9 3)\n9 - 2 = 7 (left: 3 7)\n3 * 7 = 21 (left: 21)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 6 = 18 (left: 2 9 18)\n2 - 9 = -7 (left: 18 -7)\n18 - -7 = 25 (left: 25)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 / 3 = 3.0 (left: 2 6 3.0)\n2 + 3.0 = 5.0 (left: 6 5.0)\n6 * 5.0 = 30.0 (left: 30.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 3 = 3 (left: 2 9 3)\n2 * 9 = 18 (left: 3 18)\n3 + 18 = 21 (left: 21)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 * 9 = 54 (left: 2 3 54)\n2 * 54 = 120 (left: 3 120)\n120 / 3 = 40.0 (left: 40.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 2 6 27)\n27 / 2 = 13.5 (left: 6 13.5)\n6 + 13.5 = 19.5 (left: 19.5)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 2 = 4 (left: 3 9 4)\n3 * 9 = 27 (left: 4 27)\n27 - 4 = 23 (left: 23)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 * 9 = 54 (left: 2 3 54)\n54 / 3 = 18.0 (left: 2 18.0)\n2 * 18.0 = 36.0 (left: 36.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 / 2 = 3.0 (left: 3 9 3.0)\n3 + 3.0 = 6.0 (left: 9 6.0)\n9 * 6.0 = 54.0 (left: 54.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 + 6 = 8 (left: 3 9 8)\n9 - 3 = 6 (left: 8 6)\n8 * 6 = 48 (left: 48)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 - 6 = 3 (left: 2 3 3)\n3 + 3 = 6 (left: 2 6)\n2 * 6 = 12 (left: 12)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n3 * 18 = 54 (left: 6 54)\n54 / 6 = 9.0 (left: 9.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 - 6 = 3 (left: 2 3 3)\n3 - 2 = 1 (left: 3 1)\n3 * 1 = 3 (left: 3)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 / 2 = 1.5 (left: 6 9 1.5)\n9 + 1.5 = 10.5 (left: 6 10.5)\n6 + 10.5 = 16.5 (left: 16.5)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n18 - 3 = 15 (left: 6 15)\n6 + 15 = 21 (left: 21)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 / 3 = 3.0 (left: 2 6 3.0)\n2 * 3.0 = 6.0 (left: 6 6.0)\n6 + 6.0 = 12.0 (left: 12.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 / 3 = 3.0 (left: 2 6 3.0)\n2 * 6 = 12 (left: 3.0 12)\n3.0 * 12 = 36.0 (left: 36.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "3 + 9 = 12 (left: 2 6 12)\n2 * 6 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n18 / 3 = 6.0 (left: 6 6.0)\n6 + 6.0 = 12.0 (left: 12.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 / 3 = 3.0 (left: 2 6 3.0)\n6 * 3.0 = 18.0 (left: 2 18.0)\n2 + 18.0 = 20.0 (left: 20.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 3 = 6 (left: 6 9 6)\n6 + 9 = 15 (left: 6 15)\n6 + 15 = 21 (left: 21)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n3 + 18 = 21 (left: 6 21)\n6 + 21 = 27 (left: 27)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 3 = 6 (left: 6 9 6)\n9 - 6 = 3 (left: 6 3)\n6 * 3 = 18 (left: 18)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 / 6 = 0.5 (left: 2 9 0.5)\n9 / 0.5 = 18.0 (left: 2 18.0)\n2 + 18.0 = 20.0 (left: 20.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 3 = 6 (left: 6 9 6)\n6 + 9 = 15 (left: 6 15)\n6 + 15 = 21 (left: 21)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "9 - 2 = 7 (left: 3 6 7)\n7 - 3 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "2 + 3 = 5 (left: 6 9 5)\n9 - 5 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 6 = 18 (left: 2 9 18)\n9 + 18 = 27 (left: 2 27)\n27 - 2 = 25 (left: 25)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 + 9 = 11 (left: 3 6 11)\n11 - 3 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 3 = 6 (left: 6 9 6)\n9 / 6 = 1.5 (left: 6 1.5)\n6 * 1.5 = 9.0 (left: 9.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 3 = 6 (left: 6 9 6)\n9 - 6 = 3 (left: 6 3)\n6 * 3 = 18 (left: 18)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 3 = 3 (left: 2 9 3)\n2 * 9 = 18 (left: 3 18)\n3 + 18 = 21 (left: 21)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "9 - 3 = 6 (left: 2 6 6)\n6 - 2 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n6 + 18 = 24 (left: 3 24)\n3 * 24 = 72 (left: 72)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 2 6 27)\n27 / 2 = 13.5 (left: 6 13.5)\n6 + 13.5 = 19.5 (left: 19.5)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 2 3 15)\n3 + 15 = 18 (left: 2 18)\n2 * 18 = 36 (left: 36)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "6 - 2 = 4 (left: 3 9 4)\n9 - 3 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "9 - 3 = 6 (left: 2 6 6)\n6 + 6 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 2 = 4 (left: 3 9 4)\n3 + 4 = 7 (left: 9 7)\n9 + 7 = 16 (left: 16)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 2 = 4 (left: 3 9 4)\n3 * 9 = 27 (left: 4 27)\n27 - 4 = 23 (left: 23)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 3 = 3 (left: 2 9 3)\n2 * 9 = 18 (left: 3 18)\n3 + 18 = 21 (left: 21)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 6 = 12 (left: 3 9 12)\n9 / 3 = 3.0 (left: 12 3.0)\n12 * 3.0 = 36.0 (left: 36.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 3 = 3 (left: 2 9 3)\n2 + 9 = 11 (left: 3 11)\n3 * 11 = 33 (left: 33)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n6 + 18 = 24 (left: 3 24)\n3 * 24 = 72 (left: 72)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 3 = 6 (left: 6 9 6)\n6 * 9 = 54 (left: 6 54)\n54 / 6 = 9.0 (left: 9.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 2 6 12)\n2 * 12 = 24 (left: 6 24)\n6 * 24 = 144 (left: 144)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 / 3 = 3.0 (left: 2 6 3.0)\n3.0 / 2 = 1.5 (left: 6 1.5)\n6 / 1.5 = 4.0 (left: 4.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 6 = 12 (left: 3 9 12)\n9 - 3 = 6 (left: 12 6)\n12 * 6 = 72 (left: 72)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 2 6 27)\n6 - 2 = 4 (left: 27 4)\n27 - 4 = 23 (left: 23)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "9 - 2 = 7 (left: 3 6 7)\n7 - 3 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 + 6 = 9 (left: 2 9 9)\n2 * 9 = 18 (left: 9 18)\n9 + 18 = 27 (left: 27)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 - 2 = 7 (left: 3 6 7)\n6 + 7 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 2 6 12)\n2 * 12 = 24 (left: 6 24)\n6 + 24 = 30 (left: 30)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 3 = 6 (left: 6 9 6)\n9 - 6 = 3 (left: 6 3)\n6 * 3 = 18 (left: 18)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 / 2 = 1.5 (left: 6 9 1.5)\n9 + 1.5 = 10.5 (left: 6 10.5)\n6 + 10.5 = 16.5 (left: 16.5)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 3 = 6 (left: 6 9 6)\n6 + 6 = 12 (left: 9 12)\n9 + 12 = 21 (left: 21)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 + 6 = 9 (left: 2 9 9)\n9 + 9 = 18 (left: 2 18)\n2 + 18 = 20 (left: 20)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 2 3 -3)\n2 - -3 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 2 3 -3)\n2 * -3 = -6 (left: 3 -6)\n3 - -6 = 9 (left: 9)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 - 9 = -7 (left: 3 6 -7)\n3 - -7 = 10 (left: 6 10)\n6 + 10 = 16 (left: 16)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "2 * 6 = 12 (left: 3 9 12)\n9 + 12 = 21 (left: 3 21)\n3 + 21 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 - 9 = -7 (left: 3 6 -7)\n3 - 6 = -3 (left: -7 -3)\n-7 * -3 = 21 (left: 21)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 + 6 = 8 (left: 3 9 8)\n9 + 8 = 17 (left: 3 17)\n3 + 17 = 20 (left: 20)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 / 3 = 3.0 (left: 2 6 3.0)\n2 * 3.0 = 6.0 (left: 6 6.0)\n6 + 6.0 = 12.0 (left: 12.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 2 3 -3)\n2 - -3 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "6 + 9 = 15 (left: 2 3 15)\n15 - 3 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 2 6 12)\n6 / 2 = 3.0 (left: 12 3.0)\n12 * 3.0 = 36.0 (left: 36.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 2 6 12)\n6 + 12 = 18 (left: 2 18)\n2 + 18 = 20 (left: 20)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 / 9 = 0.3333 (left: 2 6 0.3333)\n2 / 0.3333 = 6.0 (left: 6 6.0)\n6 + 6.0 = 12.0 (left: 12.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "2 + 6 = 8 (left: 3 9 8)\n9 / 3 = 3.0 (left: 8 3.0)\n8 * 3.0 = 24.0 (left: 24.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "9 - 2 = 7 (left: 3 6 7)\n7 - 3 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 - 9 = -7 (left: 3 6 -7)\n3 - -7 = 10 (left: 6 10)\n6 + 10 = 16 (left: 16)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 - 9 = -7 (left: 3 6 -7)\n3 - -7 = 10 (left: 6 10)\n6 + 10 = 16 (left: 16)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "9 / 2 = 4.5 (left: 3 6 4.5)\n6 * 4.5 = 27.0 (left: 3 27.0)\n27.0 - 3 = 24.0 (left: 24.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 2 6 27)\n6 + 27 = 33 (left: 2 33)\n33 - 2 = 31 (left: 31)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 2 6 27)\n27 - 6 = 21 (left: 2 21)\n2 + 21 = 23 (left: 23)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 + 9 = 11 (left: 3 6 11)\n11 - 3 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n3 + 6 = 9 (left: 18 9)\n18 + 9 = 27 (left: 27)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 + 6 = 8 (left: 3 9 8)\n9 - 3 = 6 (left: 8 6)\n8 + 6 = 14 (left: 14)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 - 6 = 3 (left: 2 3 3)\n2 * 3 = 6 (left: 3 6)\n3 * 6 = 18 (left: 18)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 3 = 3 (left: 2 9 3)\n9 - 2 = 7 (left: 3 7)\n3 * 7 = 21 (left: 21)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 - 2 = 7 (left: 3 6 7)\n6 + 7 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 / 2 = 1.5 (left: 6 9 1.5)\n6 * 9 = 54 (left: 1.5 54)\n54 / 1.5 = 36.0 (left: 36.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 - 2 = 1 (left: 6 9 1)\n6 + 1 = 7 (left: 9 7)\n9 + 7 = 16 (left: 16)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 3 = 6 (left: 6 9 6)\n6 * 9 = 54 (left: 6 54)\n54 / 6 = 9.0 (left: 9.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 2 3 15)\n3 * 15 = 45 (left: 2 45)\n45 / 2 = 22.5 (left: 22.5)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 3 = 3 (left: 2 9 3)\n2 + 3 = 5 (left: 9 5)\n9 * 5 = 45 (left: 45)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 * 9 = 54 (left: 2 3 54)\n2 * 3 = 6 (left: 54 6)\n54 / 6 = 9.0 (left: 9.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 + 6 = 9 (left: 2 9 9)\n2 + 9 = 11 (left: 9 11)\n9 + 11 = 20 (left: 20)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 3 = 6 (left: 6 9 6)\n9 - 6 = 3 (left: 6 3)\n6 * 3 = 18 (left: 18)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n6 + 18 = 24 (left: 3 24)\n3 * 24 = 72 (left: 72)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 + 6 = 8 (left: 3 9 8)\n9 - 3 = 6 (left: 8 6)\n8 + 6 = 14 (left: 14)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n18 - 6 = 12 (left: 3 12)\n3 * 12 = 36 (left: 36)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 + 9 = 11 (left: 3 6 11)\n11 - 6 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "6 / 2 = 3.0 (left: 3 9 3.0)\n3 * 9 = 27 (left: 3.0 27)\n27 - 3.0 = 24.0 (left: 24.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 6 = 18 (left: 2 9 18)\n2 + 18 = 20 (left: 9 20)\n9 + 20 = 29 (left: 29)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 2 3 15)\n15 - 2 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "2 * 6 = 12 (left: 3 9 12)\n3 + 9 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 3 = 6 (left: 6 9 6)\n9 - 6 = 3 (left: 6 3)\n6 * 3 = 18 (left: 18)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 6 = 12 (left: 3 9 12)\n9 / 3 = 3.0 (left: 12 3.0)\n12 * 3.0 = 36.0 (left: 36.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 6 = 18 (left: 2 9 18)\n2 + 18 = 20 (left: 9 20)\n9 + 20 = 29 (left: 29)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 - 2 = 7 (left: 3 6 7)\n6 * 7 = 42 (left: 3 42)\n42 - 3 = 39 (left: 39)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "3 + 9 = 12 (left: 2 6 12)\n2 * 6 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 - 6 = -4 (left: 3 9 -4)\n9 + -4 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 - 9 = -7 (left: 3 6 -7)\n3 - -7 = 10 (left: 6 10)\n6 + 10 = 16 (left: 16)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 3 = 6 (left: 6 9 6)\n6 + 6 = 12 (left: 9 12)\n9 + 12 = 21 (left: 21)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 3 = 6 (left: 6 9 6)\n9 - 6 = 3 (left: 6 3)\n6 * 3 = 18 (left: 18)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "2 + 3 = 5 (left: 6 9 5)\n9 - 5 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 / 2 = 4.5 (left: 3 6 4.5)\n4.5 / 3 = 1.5 (left: 6 1.5)\n6 / 1.5 = 4.0 (left: 4.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "2 + 3 = 5 (left: 6 9 5)\n9 - 5 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "9 - 3 = 6 (left: 2 6 6)\n6 + 6 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 - 2 = 7 (left: 3 6 7)\n6 + 7 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "9 - 3 = 6 (left: 2 6 6)\n6 + 6 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "3 - 9 = -6 (left: 2 6 -6)\n2 - 6 = -4 (left: -6 -4)\n-6 * -4 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 - 6 = 3 (left: 2 3 3)\n2 * 3 = 6 (left: 3 6)\n3 * 6 = 18 (left: 18)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "2 + 3 = 5 (left: 6 9 5)\n9 - 5 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n6 + 18 = 24 (left: 3 24)\n3 * 24 = 72 (left: 72)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 6 = 18 (left: 2 9 18)\n2 - 18 = -16 (left: 9 -16)\n9 - -16 = 25 (left: 25)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 - 3 = -1 (left: 6 9 -1)\n6 - -1 = 7 (left: 9 7)\n9 + 7 = 16 (left: 16)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 + 6 = 9 (left: 2 9 9)\n2 * 9 = 18 (left: 9 18)\n9 + 18 = 27 (left: 27)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n18 / 3 = 6.0 (left: 6 6.0)\n6 + 6.0 = 12.0 (left: 12.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 2 6 27)\n27 - 6 = 21 (left: 2 21)\n2 + 21 = 23 (left: 23)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "6 - 2 = 4 (left: 3 9 4)\n9 - 3 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 + 9 = 11 (left: 3 6 11)\n11 - 6 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 3 = 3 (left: 2 9 3)\n2 * 9 = 18 (left: 3 18)\n3 + 18 = 21 (left: 21)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 2 6 12)\n6 * 12 = 72 (left: 2 72)\n72 / 2 = 36.0 (left: 36.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 - 2 = 7 (left: 3 6 7)\n3 + 7 = 10 (left: 6 10)\n6 + 10 = 16 (left: 16)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 3 = 3 (left: 2 9 3)\n2 + 9 = 11 (left: 3 11)\n3 * 11 = 33 (left: 33)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n3 + 6 = 9 (left: 18 9)\n18 + 9 = 27 (left: 27)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "9 - 3 = 6 (left: 2 6 6)\n6 + 6 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 2 6 27)\n6 - 2 = 4 (left: 27 4)\n27 - 4 = 23 (left: 23)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 + 6 = 8 (left: 3 9 8)\n3 + 8 = 11 (left: 9 11)\n9 + 11 = 20 (left: 20)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 2 6 27)\n6 + 27 = 33 (left: 2 33)\n33 - 2 = 31 (left: 31)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 - 2 = 1 (left: 6 9 1)\n9 - 1 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 + 3 = 5 (left: 6 9 5)\n6 + 5 = 11 (left: 9 11)\n9 + 11 = 20 (left: 20)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 - 2 = 1 (left: 6 9 1)\n9 - 1 = 8 (left: 6 8)\n6 + 8 = 14 (left: 14)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 / 2 = 1.5 (left: 6 9 1.5)\n6 * 1.5 = 9.0 (left: 9 9.0)\n9 + 9.0 = 18.0 (left: 18.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 - 2 = 7 (left: 3 6 7)\n6 * 7 = 42 (left: 3 42)\n42 / 3 = 14.0 (left: 14.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "9 - 2 = 7 (left: 3 6 7)\n7 - 3 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "9 - 2 = 7 (left: 3 6 7)\n7 - 3 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 - 6 = 3 (left: 2 3 3)\n3 + 3 = 6 (left: 2 6)\n2 * 6 = 12 (left: 12)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 3 = 3 (left: 2 9 3)\n2 * 9 = 18 (left: 3 18)\n3 + 18 = 21 (left: 21)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 / 3 = 3.0 (left: 2 6 3.0)\n2 * 6 = 12 (left: 3.0 12)\n3.0 * 12 = 36.0 (left: 36.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 6 = 12 (left: 3 9 12)\n9 - 3 = 6 (left: 12 6)\n12 * 6 = 72 (left: 72)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 2 6 27)\n27 - 2 = 25 (left: 6 25)\n25 - 6 = 19 (left: 25)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 - 3 = -1 (left: 6 9 -1)\n6 + -1 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n3 + 6 = 9 (left: 18 9)\n18 + 9 = 27 (left: 27)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 3 = 6 (left: 6 9 6)\n9 - 6 = 3 (left: 6 3)\n6 * 3 = 18 (left: 18)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 2 6 27)\n6 + 27 = 33 (left: 2 33)\n33 - 2 = 31 (left: 31)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 2 6 12)\n2 / 6 = 0.33333 (left: 12 0.33333)\n12 / 0.33333 = 36.0 (left: 36.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n6 / 3 = 2.0 (left: 18 2.0)\n18 + 2.0 = 20.0 (left: 20.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 3 = 3 (left: 2 9 3)\n2 + 9 = 11 (left: 3 11)\n3 * 11 = 33 (left: 33)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "6 - 2 = 4 (left: 3 9 4)\n9 - 3 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 / 2 = 1.5 (left: 6 9 1.5)\n9 * 1.5 = 13.5 (left: 6 13.5)\n6 + 13.5 = 19.5 (left: 19.5)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 - 2 = 1 (left: 6 9 1)\n9 - 1 = 8 (left: 6 8)\n6 * 8 = 48 (left: 48)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 - 6 = -4 (left: 3 9 -4)\n9 - -4 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 - 6 = -4 (left: 3 9 -4)\n9 + -4 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 / 3 = 3.0 (left: 2 6 3.0)\n2 * 6 = 12 (left: 3.0 12)\n3.0 * 12 = 36.0 (left: 36.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 6 = 12 (left: 3 9 12)\n9 - 3 = 6 (left: 12 6)\n12 * 6 = 72 (left: 72)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 / 2 = 1.5 (left: 6 9 1.5)\n9 / 1.5 = 6.0 (left: 6 6.0)\n6 + 6.0 = 12.0 (left: 12.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "6 - 2 = 4 (left: 3 9 4)\n9 - 3 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 6 = 12 (left: 3 9 12)\n9 - 3 = 6 (left: 12 6)\n12 * 6 = 72 (left: 72)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "3 + 9 = 12 (left: 2 6 12)\n2 * 6 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n6 + 18 = 24 (left: 3 24)\n3 * 24 = 72 (left: 72)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 + 9 = 11 (left: 3 6 11)\n11 - 6 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "6 * 9 = 54 (left: 2 3 54)\n54 / 2 = 27.0 (left: 3 27.0)\n27.0 - 3 = 24.0 (left: 24.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 - 2 = 7 (left: 3 6 7)\n3 + 7 = 10 (left: 6 10)\n6 + 10 = 16 (left: 16)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 / 2 = 4.5 (left: 3 6 4.5)\n4.5 / 3 = 1.5 (left: 6 1.5)\n6 / 1.5 = 4.0 (left: 4.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 2 6 12)\n2 * 12 = 24 (left: 6 24)\n6 / 24 = 0.25 (left: 0.25)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n3 + 18 = 21 (left: 6 21)\n6 + 21 = 27 (left: 27)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "2 + 3 = 5 (left: 6 9 5)\n9 - 5 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 3 = 6 (left: 6 9 6)\n9 - 6 = 3 (left: 6 3)\n6 * 3 = 18 (left: 18)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "6 - 3 = 3 (left: 2 9 3)\n9 + 3 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 / 2 = 3.0 (left: 3 9 3.0)\n3 + 3.0 = 6.0 (left: 9 6.0)\n9 * 6.0 = 54.0 (left: 54.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 - 6 = 3 (left: 2 3 3)\n2 * 3 = 6 (left: 3 6)\n3 * 6 = 18 (left: 18)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 + 9 = 11 (left: 3 6 11)\n3 * 6 = 18 (left: 11 18)\n11 + 18 = 29 (left: 29)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 3 = 3 (left: 2 9 3)\n2 * 9 = 18 (left: 3 18)\n3 + 18 = 21 (left: 21)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 - 2 = 7 (left: 3 6 7)\n3 + 6 = 9 (left: 7 9)\n7 + 9 = 16 (left: 16)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 3 = 6 (left: 6 9 6)\n9 - 6 = 3 (left: 6 3)\n6 * 3 = 18 (left: 18)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 + 9 = 11 (left: 3 6 11)\n3 + 11 = 14 (left: 6 14)\n6 + 14 = 20 (left: 20)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 2 6 12)\n6 + 12 = 18 (left: 2 18)\n2 * 18 = 36 (left: 36)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n3 / 18 = 0.1666666666666666 (left: 6 0.1666666666666666)\n6 / 0.1666666666666666 = 36.0 (left: 36.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 - 2 = 1 (left: 6 9 1)\n9 + 1 = 10 (left: 6 10)\n6 + 10 = 16 (left: 16)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 - 2 = 1 (left: 6 9 1)\n9 + 1 = 10 (left: 6 10)\n6 + 10 = 16 (left: 16)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 * 9 = 54 (left: 2 3 54)\n2 * 3 = 6 (left: 54 6)\n54 / 6 = 9.0 (left: 9.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "9 - 3 = 6 (left: 2 6 6)\n6 + 6 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "9 - 3 = 6 (left: 2 6 6)\n6 + 6 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 2 3 15)\n15 - 2 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 2 = 4 (left: 3 9 4)\n3 * 9 = 27 (left: 4 27)\n27 - 4 = 23 (left: 23)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n18 - 6 = 12 (left: 3 12)\n3 * 12 = 36 (left: 36)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n3 + 6 = 9 (left: 18 9)\n18 + 9 = 27 (left: 27)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 6 = 12 (left: 3 9 12)\n9 - 3 = 6 (left: 12 6)\n12 * 6 = 72 (left: 72)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 6 = 12 (left: 3 9 12)\n9 - 3 = 6 (left: 12 6)\n12 * 6 = 72 (left: 72)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 3 = 3 (left: 2 9 3)\n9 / 2 = 4.5 (left: 3 4.5)\n3 * 4.5 = 13.5 (left: 13.5)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 / 9 = 0.666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666\n3 * 9 = 27 (left: 2 6 27)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "6 / 2 = 3.0 (left: 3 9 3.0)\n3 * 9 = 27 (left: 3.0 27)\n27 - 3.0 = 24.0 (left: 24.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 / 9 = 0.333333333333 (left: 2 6 0.333333333333)\n0.333333333333 / 2 = 0.166666666666 (left: 6 0.166666666666)\n6 / 0.166666666666 = 36.0 (left: 36.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 / 2 = 4.5 (left: 3 6 4.5)\n4.5 / 3 = 1.5 (left: 6 1.5)\n6 / 1.5 = 4.0 (left: 4.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 + 6 = 9 (left: 2 9 9)\n2 * 9 = 18 (left: 9 18)\n9 + 18 = 27 (left: 27)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 2 6 27)\n27 - 6 = 21 (left: 2 21)\n2 + 21 = 23 (left: 23)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 3 = 6 (left: 6 9 6)\n6 - 9 = -3 (left: 6 -3)\n6 - -3 = 9 (left: 9)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n3 + 6 = 9 (left: 18 9)\n18 + 9 = 27 (left: 27)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 3 = 3 (left: 2 9 3)\n9 / 2 = 4.5 (left: 3 4.5)\n3 * 4.5 = 13.5 (left: 13.5)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 / 3 = 3.0 (left: 2 6 3.0)\n2 * 6 = 12 (left: 3.0 12)\n3.0 * 12 = 36.0 (left: 36.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n6 + 18 = 24 (left: 3 24)\n3 * 24 = 72 (left: 72)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 - 3 = -1 (left: 6 9 -1)\n6 + -1 = 5 (left: 9 5)\n9 + 5 = 14 (left: 14)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 9 = 18 (left: 3 6 18)\n18 / 3 = 6.0 (left: 6 6.0)\n6 + 6.0 = 12.0 (left: 12.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "9 - 6 = 3 (left: 2 3 3)\n2 * 3 = 6 (left: 3 6)\n3 * 6 = 18 (left: 18)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "9 - 3 = 6 (left: 2 6 6)\n6 + 6 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 / 3 = 2.0 (left: 2 9 2.0)\n2 * 9 = 18 (left: 2.0 18)\n2.0 + 18 = 20.0 (left: 20.0)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 2 3 15)\n2 + 3 = 5 (left: 15 5)\n15 + 5 = 20 (left: 20)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "2 * 6 = 12 (left: 3 9 12)\n9 + 12 = 21 (left: 3 21)\n3 + 21 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "2 * 3 = 6 (left: 6 9 6)\n9 - 6 = 3 (left: 6 3)\n6 * 3 = 18 (left: 18)"}
{"idx": 16, "input": "2 3 6 9", "reward": 100, "generate_data": "9 - 3 = 6 (left: 2 6 6)\n6 + 6 = 12 (left: 2 12)\n2 * 12 = 24 (left: 24)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 - 9 = -3 (left: 2 3 -3)\n2 * -3 = -6 (left: 3 -6)\n3 - -6 = 9 (left: 9)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 2 6 27)\n2 * 6 = 12 (left: 27 12)\n27 - 12 = 15 (left: 15)"}
{"idx": 16, "input": "2 3 6 9", "reward": 0.001, "generate_data": "6 + 9 = 15 (left: 2 3 15)\n2 + 15 = 17 (left: 3 17)\n3 + 17 = 20 (left: 20)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 1 5 12)\n1 * 5 = 5 (left: 12 5)\n12 * 5 = 60 (left: 60)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 1 5 27)\n1 + 27 = 28 (left: 5 28)\n28 - 5 = 23 (left: 23)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 1 = 4 (left: 3 9 4)\n3 * 9 = 27 (left: 4 27)\n27 - 4 = 23 (left: 23)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 5 = 4 (left: 1 3 4)\n1 * 3 = 3 (left: 4 3)\n4 * 3 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n1 + 9 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 * 5 = 15 (left: 1 9 15)\n1 + 15 = 16 (left: 9 16)\n9 + 16 = 25 (left: 25)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 5 9 4)\n9 - 5 = 4 (left: 4 4)\n4 + 4 = 8 (left: 8)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 5 9 4)\n5 * 9 = 45 (left: 4 45)\n45 / 4 = 11.25 (left: 11.25)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n9 - 1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 1 5 12)\n1 + 5 = 6 (left: 12 6)\n12 * 6 = 72 (left: 72)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 1 = 4 (left: 3 9 4)\n9 - 4 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 5 = 6 (left: 3 9 6)\n3 * 9 = 27 (left: 6 27)\n27 - 6 = 21 (left: 21)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 5 = 4 (left: 1 3 4)\n1 + 3 = 4 (left: 4 4)\n4 + 4 = 8 (left: 8)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 5 9 4)\n5 + 9 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 9 = 10 (left: 3 5 10)\n10 - 5 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 - 5 = 4 (left: 3 4)\n3 * 4 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n3 * 5 = 15 (left: 9 15)\n9 + 15 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 + 5 = 14 (left: 3 14)\n3 * 14 = 42 (left: 42)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 9 = 10 (left: 3 5 10)\n3 + 10 = 13 (left: 5 13)\n5 + 13 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 9 = 10 (left: 3 5 10)\n5 + 10 = 15 (left: 3 15)\n3 + 15 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 1 5 12)\n1 * 5 = 5 (left: 12 5)\n12 * 5 = 60 (left: 60)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 * 5 = 15 (left: 1 9 15)\n1 + 15 = 16 (left: 9 16)\n9 + 16 = 25 (left: 25)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "5 - 1 = 4 (left: 3 9 4)\n9 - 3 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 5 = 4 (left: 1 3 4)\n1 * 4 = 4 (left: 3 4)\n3 * 4 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 5 = 6 (left: 3 9 6)\n9 - 3 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 3 = 6 (left: 1 5 6)\n1 + 5 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 3 = 2 (left: 1 9 2)\n1 + 9 = 10 (left: 2 10)\n2 * 10 = 20 (left: 20)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n3 * 5 = 15 (left: 9 15)\n9 + 15 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 + 9 = 14 (left: 1 3 14)\n1 * 14 = 14 (left: 3 14)\n3 + 14 = 17 (left: 17)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 3 = 2 (left: 1 9 2)\n9 - 1 = 8 (left: 2 8)\n2 * 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 1 = 4 (left: 3 9 4)\n3 * 9 = 27 (left: 4 27)\n27 - 4 = 23 (left: 23)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 - 9 = -8 (left: 3 5 -8)\n5 - -8 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n1 * 9 = 9 (left: 8 9)\n8 + 9 = 17 (left: 17)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 + 9 = 14 (left: 1 3 14)\n14 - 1 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 1 5 27)\n1 - 27 = -26 (left: 5 -26)\n5 - -26 = 31 (left: 31)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 - 5 = 4 (left: 3 4)\n3 * 4 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 - 5 = 4 (left: 3 4)\n3 * 4 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 3 = 2 (left: 1 9 2)\n9 - 1 = 8 (left: 2 8)\n2 * 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 1 5 27)\n5 - 1 = 4 (left: 27 4)\n27 - 4 = 23 (left: 23)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 - 1 = 2 (left: 5 9 2)\n9 / 2 = 4.5 (left: 5 4.5)\n5 * 4.5 = 22.5 (left: 22.5)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 - 1 = 2 (left: 5 9 2)\n5 + 2 = 7 (left: 9 7)\n9 + 7 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "5 - 1 = 4 (left: 3 9 4)\n9 - 3 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 1 = 8 (left: 3 5 8)\n5 + 8 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 5 = 4 (left: 1 3 4)\n1 + 3 = 4 (left: 4 4)\n4 + 4 = 8 (left: 8)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 9 = 10 (left: 3 5 10)\n5 + 10 = 15 (left: 3 15)\n3 + 15 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 9 = 10 (left: 3 5 10)\n3 * 5 = 15 (left: 10 15)\n10 + 15 = 25 (left: 25)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 5 = 4 (left: 1 3 4)\n4 - 1 = 3 (left: 3 3)\n3 * 3 = 9 (left: 9)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n9 + 8 = 17 (left: 1 17)\n1 + 17 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 1 5 12)\n1 + 5 = 6 (left: 12 6)\n12 * 6 = 72 (left: 72)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 + 9 = 14 (left: 1 3 14)\n1 + 14 = 15 (left: 3 15)\n3 + 15 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 + 9 = 14 (left: 1 3 14)\n3 + 14 = 17 (left: 1 17)\n17 - 1 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 * 5 = 45 (left: 3 45)\n45 / 3 = 15.0 (left: 15.0)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 9 = 9 (left: 3 5 9)\n5 + 9 = 14 (left: 3 14)\n3 + 14 = 17 (left: 9)\n9 + 17 = 26 (left: 26)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 + 9 = 14 (left: 1 3 14)\n1 + 3 = 4 (left: 14 4)\n14 + 4 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 1 5 27)\n1 * 27 = 27 (left: 5 27)\n27 - 5 = 22 (left: 27)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n1 + 9 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 + 5 = 14 (left: 3 14)\n3 * 14 = 42 (left: 42)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 9 = 9 (left: 3 5 9)\n9 - 5 = 4 (left: 3 4)\n3 * 4 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 3 = 3 (left: 5 9 3)\n5 + 9 = 14 (left: 3 14)\n3 + 14 = 17 (left: 17)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 9 = 9 (left: 3 5 9)\n5 + 9 = 14 (left: 3 14)\n3 + 14 = 17 (left: 9)\n9 + 17 = 26 (left: 26)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 3 = 2 (left: 1 9 2)\n1 + 9 = 10 (left: 2 10)\n2 * 10 = 20 (left: 20)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "5 - 1 = 4 (left: 3 9 4)\n9 - 3 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 9 = 9 (left: 3 5 9)\n5 + 9 = 14 (left: 3 14)\n3 + 14 = 17 (left: 9)\n9 + 17 = 26 (left: 26)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 + 9 = 14 (left: 1 3 14)\n1 + 14 = 15 (left: 3 15)\n3 + 15 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 - 9 = -8 (left: 3 5 -8)\n5 - -8 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 1 = 4 (left: 3 9 4)\n3 * 9 = 27 (left: 4 27)\n27 - 4 = 23 (left: 23)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n9 - 1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 - 1 = 2 (left: 5 9 2)\n5 + 2 = 7 (left: 9 7)\n9 + 7 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n9 - 1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "9 - 3 = 6 (left: 1 5 6)\n5 - 1 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 * 5 = 15 (left: 1 9 15)\n9 - 1 = 8 (left: 15 8)\n15 + 8 = 23 (left: 23)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 5 = 4 (left: 1 3 4)\n1 + 4 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 1 5 12)\n1 + 5 = 6 (left: 12 6)\n12 * 6 = 72 (left: 72)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 3 = 2 (left: 1 9 2)\n1 + 9 = 10 (left: 2 10)\n2 * 10 = 20 (left: 20)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 3 = 2 (left: 1 9 2)\n9 - 1 = 8 (left: 2 8)\n2 * 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 9 = 9 (left: 3 5 9)\n3 + 9 = 12 (left: 5 12)\n5 * 12 = 60 (left: 60)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 9 = 10 (left: 3 5 10)\n5 + 10 = 15 (left: 3 15)\n3 + 15 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 9 = 9 (left: 3 5 9)\n5 + 9 = 14 (left: 3 14)\n3 + 14 = 17 (left: 9)\n9 + 17 = 26 (left: 26)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 + 9 = 14 (left: 1 3 14)\n1 + 3 = 4 (left: 14 4)\n14 + 4 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "1 * 9 = 9 (left: 3 5 9)\n3 * 5 = 15 (left: 9 15)\n9 + 15 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 1 5 27)\n27 - 5 = 22 (left: 1 22)\n22 - 1 = 21 (left: 21)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 1 = 8 (left: 3 5 8)\n5 - 3 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n9 - 1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n9 - 1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 + 5 = 14 (left: 3 14)\n3 * 14 = 42 (left: 42)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 9 = 9 (left: 3 5 9)\n5 * 9 = 45 (left: 3 45)\n45 / 3 = 15.0 (left: 15.0)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 9 = 9 (left: 3 5 9)\n9 / 3 = 3.0 (left: 5 3.0)\n5 * 3.0 = 15.0 (left: 15.0)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 5 = 4 (left: 1 3 4)\n1 + 3 = 4 (left: 4 4)\n4 + 4 = 8 (left: 8)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 9 = 9 (left: 3 5 9)\n5 + 9 = 14 (left: 3 14)\n3 + 14 = 17 (left: 9 17)\n9 + 17 = 26 (left: 26)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n9 - 1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n9 + 8 = 17 (left: 1 17)\n17 - 1 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 3 = 3 (left: 5 9 3)\n5 + 3 = 8 (left: 9 8)\n9 + 8 = 17 (left: 17)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 - 5 = 4 (left: 3 4)\n3 * 4 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 3 = 2 (left: 1 9 2)\n1 + 2 = 3 (left: 9 3)\n9 * 3 = 27 (left: 27)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 1 5 12)\n1 * 5 = 5 (left: 12 5)\n12 * 5 = 60 (left: 60)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 3 = 3 (left: 5 9 3)\n9 - 5 = 4 (left: 3 4)\n3 * 4 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 - 5 = 4 (left: 3 4)\n3 * 4 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 5 = 4 (left: 1 3 4)\n1 * 4 = 4 (left: 3 4)\n3 * 4 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "5 - 1 = 4 (left: 3 9 4)\n9 - 3 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 5 = 6 (left: 3 9 6)\n9 + 6 = 15 (left: 3 15)\n3 + 15 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 1 5 12)\n1 + 5 = 6 (left: 12 6)\n12 * 6 = 72 (left: 72)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 1 5 27)\n1 - 5 = -4 (left: 27 -4)\n27 + -4 = 23 (left: 23)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 - 9 = -8 (left: 3 5 -8)\n5 - -8 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n9 - 1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 + 9 = 14 (left: 1 3 14)\n1 - 3 = -2 (left: 14 -2)\n14 - -2 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 9 = 10 (left: 3 5 10)\n5 + 10 = 15 (left: 3 15)\n3 + 15 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n9 - 1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 9 = 10 (left: 3 5 10)\n10 - 5 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n1 + 9 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 + 5 = 14 (left: 3 14)\n3 * 14 = 42 (left: 42)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "5 - 1 = 4 (left: 3 9 4)\n9 - 3 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 5 = 6 (left: 3 9 6)\n3 + 9 = 12 (left: 6 12)\n6 + 12 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 - 5 = 4 (left: 3 4)\n3 * 4 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "1 * 9 = 9 (left: 3 5 9)\n3 * 5 = 15 (left: 9 15)\n9 + 15 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 5 = 4 (left: 1 3 4)\n1 * 3 = 3 (left: 4 3)\n4 * 3 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 - 5 = -4 (left: 3 9 -4)\n9 + -4 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 1 5 12)\n1 + 5 = 6 (left: 12 6)\n12 * 6 = 72 (left: 72)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n9 - 1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 5 = 4 (left: 1 3 4)\n3 * 4 = 12 (left: 1 12)\n1 * 12 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 - 5 = 4 (left: 3 4)\n3 * 4 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n3 + 5 = 8 (left: 9 8)\n9 + 8 = 17 (left: 17)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n1 + 9 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 - 5 = 4 (left: 3 4)\n3 * 4 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 - 1 = 2 (left: 5 9 2)\n9 + 2 = 11 (left: 5 11)\n5 + 11 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 + 9 = 14 (left: 1 3 14)\n1 * 3 = 3 (left: 14 3)\n14 + 3 = 17 (left: 17)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 9 = 9 (left: 3 5 9)\n9 - 5 = 4 (left: 3 4)\n3 * 4 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n5 - 3 = 2 (left: 9 2)\n9 * 2 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 9 = 10 (left: 3 5 10)\n5 + 10 = 15 (left: 3 15)\n3 + 15 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 1 5 27)\n5 - 1 = 4 (left: 27 4)\n27 - 4 = 23 (left: 23)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 9 = 9 (left: 3 5 9)\n5 + 9 = 14 (left: 3 14)\n3 + 14 = 17 (left: 9 17)\n9 + 17 = 26 (left: 26)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "5 - 1 = 4 (left: 3 9 4)\n9 - 3 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 + 5 = 14 (left: 3 14)\n3 * 14 = 42 (left: 42)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 9 = 10 (left: 3 5 10)\n3 + 10 = 13 (left: 5 13)\n5 + 13 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 1 5 27)\n27 - 5 = 22 (left: 1 22)\n22 - 1 = 21 (left: 21)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 3 = 3 (left: 5 9 3)\n5 + 3 = 8 (left: 9 8)\n9 + 8 = 17 (left: 17)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 5 = 6 (left: 3 9 6)\n3 + 6 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n1 + 9 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n9 - 1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 1 5 27)\n1 - 5 = -4 (left: 27 -4)\n27 + -4 = 23 (left: 23)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 9 = 10 (left: 3 5 10)\n5 + 10 = 15 (left: 3 15)\n3 + 15 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n1 + 8 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 / 5 = 0.2 (left: 3 9 0.2)\n3 * 9 = 27 (left: 0.2 27)\n27 / 0.2 = 130.0 (left: 130.0)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 * 5 = 15 (left: 1 9 15)\n1 + 9 = 10 (left: 15 10)\n15 + 10 = 25 (left: 25)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 3 = 2 (left: 1 9 2)\n9 - 1 = 8 (left: 2 8)\n2 * 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 1 = 8 (left: 3 5 8)\n3 * 5 = 15 (left: 8 15)\n8 + 15 = 23 (left: 23)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 1 5 27)\n1 - 5 = -4 (left: 27 -4)\n27 + -4 = 23 (left: 23)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 1 = 8 (left: 3 5 8)\n3 * 5 = 15 (left: 8 15)\n8 + 15 = 23 (left: 23)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 3 = 2 (left: 1 9 2)\n1 + 2 = 3 (left: 9 3)\n9 * 3 = 27 (left: 27)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 1 = 4 (left: 3 9 4)\n3 + 4 = 7 (left: 9 7)\n9 + 7 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 9 = 10 (left: 3 5 10)\n10 - 5 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 + 5 = 14 (left: 3 14)\n3 * 14 = 42 (left: 42)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 + 5 = 14 (left: 3 14)\n3 * 14 = 42 (left: 42)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 * 9 = 45 (left: 1 3 45)\n3 - 1 = 2 (left: 45 2)\n45 / 2 = 22.5 (left: 22.5)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 5 9 4)\n5 + 9 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 + 9 = 14 (left: 1 3 14)\n1 + 3 = 4 (left: 14 4)\n14 + 4 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 3 = 2 (left: 1 9 2)\n9 - 1 = 8 (left: 2 8)\n2 * 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 9 = 10 (left: 3 5 10)\n5 + 10 = 15 (left: 3 15)\n3 + 15 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "1 * 3 = 3 (left: 5 9 3)\n5 * 3 = 15 (left: 9 15)\n9 + 15 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 - 9 = -8 (left: 3 5 -8)\n3 * 5 = 15 (left: -8 15)\n15 - -8 = 23 (left: 23)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "9 - 3 = 6 (left: 1 5 6)\n5 - 1 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 9 = 10 (left: 3 5 10)\n10 - 5 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 3 = 3 (left: 5 9 3)\n5 + 3 = 8 (left: 9 8)\n9 + 8 = 17 (left: 17)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n5 - 3 = 2 (left: 9 2)\n9 * 2 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 5 = 6 (left: 3 9 6)\n9 - 3 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 + 9 = 14 (left: 1 3 14)\n3 + 14 = 17 (left: 1 17)\n17 - 1 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n1 + 9 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 5 9 4)\n5 + 4 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 1 5 27)\n5 - 1 = 4 (left: 27 4)\n27 - 4 = 23 (left: 23)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 5 = 4 (left: 1 3 4)\n1 * 3 = 3 (left: 4 3)\n4 * 3 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n9 - 1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 3 = 2 (left: 1 9 2)\n1 + 9 = 10 (left: 2 10)\n2 * 10 = 20 (left: 20)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "9 - 3 = 6 (left: 1 5 6)\n5 - 1 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "3 * 5 = 15 (left: 1 9 15)\n9 + 15 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n3 * 5 = 15 (left: 9 15)\n9 + 15 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 1 = 8 (left: 3 5 8)\n3 + 5 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 5 9 4)\n5 + 9 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 - 5 = 4 (left: 3 4)\n3 * 4 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 - 1 = 2 (left: 5 9 2)\n5 + 9 = 14 (left: 2 14)\n2 * 14 = 28 (left: 28)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 - 5 = 4 (left: 3 4)\n3 * 4 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 9 = 10 (left: 3 5 10)\n10 - 5 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 + 9 = 14 (left: 1 3 14)\n1 * 3 = 3 (left: 14 3)\n14 + 3 = 17 (left: 17)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "3 * 5 = 15 (left: 1 9 15)\n9 + 15 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 + 5 = 14 (left: 3 14)\n3 * 14 = 42 (left: 42)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 + 9 = 14 (left: 1 3 14)\n3 + 14 = 17 (left: 1 17)\n1 + 17 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 + 5 = 14 (left: 3 14)\n3 * 14 = 42 (left: 42)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 3 = 3 (left: 5 9 3)\n5 + 3 = 8 (left: 9 8)\n9 + 8 = 17 (left: 17)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n3 + 5 = 8 (left: 9 8)\n9 + 8 = 17 (left: 17)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 3 = 2 (left: 1 9 2)\n1 * 9 = 9 (left: 2 9)\n2 * 9 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 3 = 2 (left: 1 9 2)\n9 - 1 = 8 (left: 2 8)\n2 * 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 1 5 12)\n5 + 12 = 17 (left: 1 17)\n1 + 17 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 + 5 = 14 (left: 3 14)\n3 * 14 = 42 (left: 42)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n1 + 9 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 * 5 = 15 (left: 1 9 15)\n15 - 1 = 14 (left: 9 14)\n9 + 14 = 23 (left: 23)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 - 5 = 4 (left: 3 4)\n3 * 4 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 5 = 6 (left: 3 9 6)\n3 + 6 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "5 - 1 = 4 (left: 3 9 4)\n9 - 3 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "3 * 5 = 15 (left: 1 9 15)\n9 + 15 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 3 = 3 (left: 5 9 3)\n5 * 9 = 45 (left: 3 45)\n45 / 3 = 15.0 (left: 15.0)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 - 9 = -8 (left: 3 5 -8)\n3 * 5 = 15 (left: -8 15)\n15 - -8 = 23 (left: 23)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 3 = 2 (left: 1 9 2)\n9 - 1 = 8 (left: 2 8)\n2 * 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 - 5 = -2 (left: 1 9 -2)\n1 - -2 = 3 (left: 9 3)\n9 * 3 = 27 (left: 27)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 1 5 12)\n5 - 1 = 4 (left: 12 4)\n12 * 4 = 48 (left: 48)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 5 = 6 (left: 3 9 6)\n9 - 6 = 3 (left: 3 3)\n3 + 3 = 6 (left: 6)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 5 9 4)\n5 + 4 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 9 = 10 (left: 3 5 10)\n3 + 5 = 8 (left: 10 8)\n10 + 8 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 1 5 27)\n5 - 1 = 4 (left: 27 4)\n27 - 4 = 23 (left: 23)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n9 + 8 = 17 (left: 1 17)\n1 + 17 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "5 - 1 = 4 (left: 3 9 4)\n9 - 3 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "3 * 5 = 15 (left: 1 9 15)\n9 + 15 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 5 = 6 (left: 3 9 6)\n3 + 6 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 / 3 = 3.0 (left: 1 5 3.0)\n1 + 5 = 6 (left: 3.0 6)\n3.0 * 6 = 18.0 (left: 18.0)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n9 - 1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 5 = 4 (left: 1 3 4)\n3 * 4 = 12 (left: 1 12)\n1 * 12 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "1 * 9 = 9 (left: 3 5 9)\n3 * 5 = 15 (left: 9 15)\n9 + 15 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "3 * 5 = 15 (left: 1 9 15)\n9 + 15 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 1 = 8 (left: 3 5 8)\n5 - 3 = 2 (left: 8 2)\n8 * 2 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 9 = 9 (left: 3 5 9)\n5 * 9 = 45 (left: 3 45)\n45 / 3 = 15.0 (left: 15.0)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 5 = 4 (left: 1 3 4)\n1 * 3 = 3 (left: 4 3)\n4 * 3 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 5 = 6 (left: 3 9 6)\n9 - 3 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 - 5 = 4 (left: 3 4)\n3 * 4 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 * 5 = 15 (left: 1 9 15)\n1 - 15 = -14 (left: 9 -14)\n9 - -14 = 23 (left: 23)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 * 5 = 15 (left: 1 9 15)\n1 + 15 = 16 (left: 9 16)\n9 + 16 = 25 (left: 25)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 - 5 = 4 (left: 3 4)\n3 * 4 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 3 = 6 (left: 1 5 6)\n1 * 5 = 5 (left: 6 5)\n6 * 5 = 30 (left: 30)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n3 * 5 = 15 (left: 9 15)\n9 + 15 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 3 = 6 (left: 1 5 6)\n1 + 5 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 5 = 6 (left: 3 9 6)\n3 * 9 = 27 (left: 6 27)\n27 - 6 = 21 (left: 21)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 3 = 2 (left: 1 9 2)\n9 - 1 = 8 (left: 2 8)\n2 * 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 9 = 9 (left: 3 5 9)\n5 + 9 = 14 (left: 3 14)\n3 + 14 = 17 (left: 9)\n9 + 17 = 26 (left: 26)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 + 9 = 14 (left: 1 3 14)\n3 + 14 = 17 (left: 1 17)\n1 + 17 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 3 = 3 (left: 5 9 3)\n9 - 5 = 4 (left: 3 4)\n3 * 4 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 1 5 12)\n12 - 5 = 7 (left: 1 7)\n1 * 7 = 7 (left: 7)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 + 9 = 14 (left: 1 3 14)\n3 + 14 = 17 (left: 1 17)\n1 + 17 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n1 + 8 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 9 = 9 (left: 3 5 9)\n5 + 9 = 14 (left: 3 14)\n3 + 14 = 17 (left: 9 17)\n9 + 17 = 26 (left: 26)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 5 9 4)\n9 - 5 = 4 (left: 4 4)\n4 + 4 = 8 (left: 8)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 3 = 2 (left: 1 9 2)\n9 - 1 = 8 (left: 2 8)\n2 * 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 5 = 4 (left: 1 3 4)\n4 - 1 = 3 (left: 3 3)\n3 + 3 = 6 (left: 6)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 3 = 3 (left: 5 9 3)\n5 * 9 = 45 (left: 3 45)\n45 / 3 = 15.0 (left: 15.0)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 * 9 = 27 (left: 1 5 27)\n5 + 27 = 32 (left: 1 32)\n32 - 1 = 31 (left: 31)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 1 5 12)\n5 + 12 = 17 (left: 1 17)\n1 + 17 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "5 - 1 = 4 (left: 3 9 4)\n9 - 3 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n1 + 8 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 1 5 12)\n5 + 12 = 17 (left: 1 17)\n1 + 17 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n9 - 1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n1 + 9 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 - 5 = -4 (left: 3 9 -4)\n3 * 9 = 27 (left: -4 27)\n-4 + 27 = 23 (left: 23)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 9 = 9 (left: 3 5 9)\n9 / 5 = 1.8 (left: 3 1.8)\n3 * 1.8 = 5.4 (left: 5.4)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 - 5 = -2 (left: 1 9 -2)\n1 - 9 = -8 (left: -2 -8)\n-2 * -8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 3 = 2 (left: 1 9 2)\n1 + 2 = 3 (left: 9 3)\n9 * 3 = 27 (left: 27)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 9 = 9 (left: 3 5 9)\n5 + 9 = 14 (left: 3 14)\n3 + 14 = 17 (left: 9)\n9 + 17 = 26 (left: 26)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 * 5 = 15 (left: 1 9 15)\n1 - 9 = -8 (left: 15 -8)\n15 - -8 = 23 (left: 23)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 9 = -4 (left: 1 3 -4)\n3 - -4 = 7 (left: 1 7)\n1 * 7 = 7 (left: 7)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 - 5 = -4 (left: 3 9 -4)\n9 + -4 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 5 = 6 (left: 3 9 6)\n9 - 3 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 1 = 4 (left: 3 9 4)\n9 * 4 = 36 (left: 3 36)\n36 - 3 = 33 (left: 33)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 1 = 8 (left: 3 5 8)\n3 + 5 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 3 = 3 (left: 5 9 3)\n5 + 3 = 8 (left: 9 8)\n9 + 8 = 17 (left: 17)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 9 = -4 (left: 1 3 -4)\n1 - -4 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 + 9 = 14 (left: 1 3 14)\n1 * 14 = 14 (left: 3 14)\n3 + 14 = 17 (left: 17)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 3 = 3 (left: 5 9 3)\n5 + 9 = 14 (left: 3 14)\n3 + 14 = 17 (left: 17)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 5 9 4)\n5 + 9 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 + 9 = 14 (left: 1 3 14)\n1 + 14 = 15 (left: 3 15)\n3 + 15 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 9 = 10 (left: 3 5 10)\n10 - 5 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "3 * 5 = 15 (left: 1 9 15)\n9 + 15 = 24 (left: 1 24)\n1 * 24 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 5 9 4)\n9 * 4 = 36 (left: 5 36)\n36 - 5 = 31 (left: 31)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 / 3 = 3.0 (left: 1 5 3.0)\n5 * 3.0 = 15.0 (left: 1 15.0)\n15.0 - 1 = 14.0 (left: 14.0)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 9 = 10 (left: 3 5 10)\n5 + 10 = 15 (left: 3 15)\n3 + 15 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 9 = 9 (left: 3 5 9)\n9 - 5 = 4 (left: 3 4)\n3 * 4 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 - 1 = 4 (left: 3 9 4)\n3 * 4 = 12 (left: 9 12)\n9 + 12 = 21 (left: 21)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 9 = 9 (left: 3 5 9)\n5 + 9 = 14 (left: 3 14)\n3 + 14 = 17 (left: 9 17)\n9 + 17 = 26 (left: 26)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "5 - 1 = 4 (left: 3 9 4)\n9 - 3 = 6 (left: 4 6)\n4 * 6 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 1 = 8 (left: 3 5 8)\n3 + 5 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 100, "generate_data": "9 - 3 = 6 (left: 1 5 6)\n5 - 1 = 4 (left: 6 4)\n6 * 4 = 24 (left: 24)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 5 9 4)\n9 - 5 = 4 (left: 4 4)\n4 + 4 = 8 (left: 8)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 - 5 = 4 (left: 3 4)\n3 * 4 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 + 9 = 14 (left: 1 3 14)\n14 - 1 = 13 (left: 3 13)\n3 * 13 = 39 (left: 39)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 5 = 6 (left: 3 9 6)\n9 - 3 = 6 (left: 6 6)\n6 + 6 = 12 (left: 12)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 - 5 = -4 (left: 3 9 -4)\n9 - -4 = 13 (left: 3 13)\n3 + 13 = 16 (left: 16)\n16 - 3 = 13 (left: 13)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 * 5 = 15 (left: 1 9 15)\n1 + 15 = 16 (left: 9 16)\n9 + 16 = 25 (left: 25)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 5 = 6 (left: 3 9 6)\n9 / 3 = 3.0 (left: 6 3.0)\n6 * 3.0 = 18.0 (left: 18.0)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n1 + 9 = 10 (left: 8 10)\n8 + 10 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 + 9 = 14 (left: 1 3 14)\n1 * 14 = 14 (left: 3 14)\n3 + 14 = 17 (left: 17)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 5 9 4)\n5 + 9 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 * 9 = 45 (left: 1 3 45)\n1 * 45 = 45 (left: 3 45)\n45 / 3 = 15.0 (left: 15.0)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 3 = 3 (left: 5 9 3)\n5 + 3 = 8 (left: 9 8)\n9 + 8 = 17 (left: 17)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 1 5 12)\n1 - 5 = -4 (left: 12 -4)\n12 - -4 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n9 - 1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "5 + 9 = 14 (left: 1 3 14)\n1 + 14 = 15 (left: 3 15)\n3 + 15 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 9 = 12 (left: 1 5 12)\n5 - 1 = 4 (left: 12 4)\n12 * 4 = 48 (left: 48)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 + 5 = 14 (left: 3 14)\n3 * 14 = 42 (left: 42)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 5 = 6 (left: 3 9 6)\n3 + 6 = 9 (left: 9 9)\n9 + 9 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "9 - 5 = 4 (left: 1 3 4)\n1 + 4 = 5 (left: 3 5)\n3 * 5 = 15 (left: 15)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 - 9 = -8 (left: 3 5 -8)\n-8 - 5 = -13 (left: 3 -13)\n3 - -13 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 - 5 = -2 (left: 1 9 -2)\n1 - 9 = -8 (left: -2 -8)\n-2 * -8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 - 1 = 2 (left: 5 9 2)\n5 + 9 = 14 (left: 2 14)\n2 * 14 = 28 (left: 28)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 + 5 = 14 (left: 3 14)\n3 * 14 = 42 (left: 42)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 + 3 = 4 (left: 5 9 4)\n5 + 9 = 14 (left: 4 14)\n4 + 14 = 18 (left: 18)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "3 + 5 = 8 (left: 1 9 8)\n9 - 1 = 8 (left: 8 8)\n8 + 8 = 16 (left: 16)"}
{"idx": 17, "input": "1 3 5 9", "reward": 0.001, "generate_data": "1 * 5 = 5 (left: 3 9 5)\n9 - 5 = 4 (left: 3 4)\n3 * 4 = 12 (left: 12)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n3 + 3 = 6 (left: 19 6)\n19 + 6 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 12 = 36 (left: 4 36)\n36 - 4 = 32 (left: 32)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n10 - 3 = 7 (left: 12 7)\n12 * 7 = 84 (left: 84)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 / 4 = 0.75 (left: 12 0.75)\n12 / 0.75 = 16.0 (left: 16.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 4 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "12 - 3 = 9 (left: 3 7 9)\n7 + 9 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n3 - 12 = -9 (left: 21 -9)\n21 - -9 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n7 + 1.0 = 8.0 (left: 12 8.0)\n12 * 8.0 = 96.0 (left: 96.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 7 = 0.428571 (left: 3 12 0.428571)\n12 / 0.428571 = 28.571 (left: 3 28.571)\n28.571 - 3 = 25.571 (left: 25.571)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n3 + 3 = 6 (left: 19 6)\n19 + 6 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 3 = 6 (left: 7 12 6)\n7 - 6 = 1 (left: 12 1)\n12 * 1 = 12 (left: 12)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 12 = 36 (left: 3 7 36)\n7 - 3 = 4 (left: 36 4)\n36 - 4 = 32 (left: 32)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 + 12 = 15 (left: 10 15)\n10 + 15 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 / 4 = 0.75 (left: 12 0.75)\n12 / 0.75 = 16.0 (left: 16.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 12 = 36 (left: 3 7 36)\n36 / 7 = 5.0 (left: 3 5.0)\n3 * 5.0 = 15.0 (left: 15.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n12 + 21 = 33 (left: 3 33)\n3 + 33 = 36 (left: 36)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 3 = 0 (left: 7 12 0)\n7 + 12 = 19 (left: 0 19)\n0 + 19 = 19 (left: 19)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 4 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n15 - 7 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "7 - 12 = -5 (left: 3 3 -5)\n3 - -5 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 + 10 = 13 (left: 12 13)\n12 + 13 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n15 - 7 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n12 + 10 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 12 = -9 (left: 3 7 -9)\n7 + -9 = 2 (left: 3 2)\n3 * 2 = 6 (left: 6)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 4 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 4 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n4 - 3 = 1 (left: 12 1)\n12 * 1 = 12 (left: 12)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 7 = -4 (left: 3 12 -4)\n12 - -4 = 16 (left: 3 16)\n3 + 16 = 19 (left: 19)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n12 + 21 = 33 (left: 3 33)\n3 + 33 = 36 (left: 36)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 + 12 = 15 (left: 10 15)\n10 + 15 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n1.0 / 7 = 0.142857 (left: 12 0.142857)\n12 / 0.142857 = 84.0 (left: 84.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 12 = 36 (left: 4 36)\n36 - 4 = 32 (left: 32)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n3 + 3 = 6 (left: 19 6)\n19 + 6 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n7 + 15 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n3 / 21 = 0.14285714285714285714285714285714 (left: 12 0.14285714285714285714285714285714)\n12 / 0.14285714285714285714285714285714 = 86.4 (left: 86.4)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 12 = 36 (left: 3 7 36)\n36 / 7 = 5.0 (left: 3 5.0)\n3 * 5.0 = 15.0 (left: 15.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 7 = -4 (left: 3 12 -4)\n3 - -4 = 7 (left: 12 7)\n12 * 7 = 84 (left: 84)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n7 + 12 = 19 (left: 1.0 19)\n1.0 + 19 = 20.0 (left: 20.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 4 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n12 - 3 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 + 12 = 15 (left: 10 15)\n10 + 15 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 + 10 = 13 (left: 12 13)\n12 + 13 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n3 + 15 = 18 (left: 7 18)\n7 + 18 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n12 - 4 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n12 + 10 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n7 - 3 = 4 (left: 15 4)\n15 + 4 = 19 (left: 19)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n12 + 10 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 12 = 36 (left: 3 7 36)\n36 - 7 = 29 (left: 3 29)\n29 - 3 = 26 (left: 26)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n7 + 15 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n12 + 10 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 12 = -5 (left: 3 3 -5)\n3 * -5 = -15 (left: 3 -15)\n3 - -15 = 18 (left: 18)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 4 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 + 10 = 13 (left: 12 13)\n12 + 13 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 7 = 0.428571 (left: 3 12 0.428571)\n3 / 0.428571 = 7.0 (left: 12 7.0)\n12 * 7.0 = 84.0 (left: 84.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "12 - 7 = 5 (left: 3 3 5)\n3 + 5 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 12 = 36 (left: 3 7 36)\n3 - 7 = -4 (left: 36 -4)\n36 + -4 = 32 (left: 32)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n7 + 15 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n1.0 / 7 = 0.142857 (left: 12 0.142857)\n12 / 0.142857 = 84.0 (left: 84.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n15 - 7 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "12 - 7 = 5 (left: 3 3 5)\n3 + 5 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n3 + 19 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 + 10 = 13 (left: 12 13)\n12 + 13 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n7 + 1.0 = 8.0 (left: 12 8.0)\n12 * 8.0 = 96.0 (left: 96.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 7 = 0.428571 (left: 3 12 0.428571)\n3 / 0.428571 = 7.0 (left: 12 7.0)\n12 * 7.0 = 84.0 (left: 84.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 7 = -4 (left: 3 12 -4)\n3 - -4 = 7 (left: 12 7)\n12 * 7 = 84 (left: 84)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n3 + 19 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 3 = 6 (left: 7 12 6)\n12 / 6 = 2.0 (left: 7 2.0)\n7 * 2.0 = 14.0 (left: 14.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "12 / 3 = 4.0 (left: 3 7 4.0)\n7 + 4.0 = 11.0 (left: 3 11.0)\n3 + 11.0 = 14.0 (left: 14.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n12 + 10 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "12 - 7 = 5 (left: 3 3 5)\n3 + 5 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 7 = -4 (left: 3 12 -4)\n12 - -4 = 16 (left: 3 16)\n3 + 16 = 19 (left: 19)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 12 = -9 (left: 3 7 -9)\n7 + -9 = 2 (left: 3 2)\n3 * 2 = 6 (left: 6)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 + 4 = 7 (left: 12 7)\n12 * 7 = 84 (left: 84)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 7 = -4 (left: 3 12 -4)\n3 - -4 = 7 (left: 12 7)\n12 * 7 = 84 (left: 84)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n3 - 21 = -18 (left: 12 -18)\n12 - -18 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n1.0 / 7 = 0.142857 (left: 12 0.142857)\n12 / 0.142857 = 86.0 (left: 86.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "12 - 7 = 5 (left: 3 3 5)\n3 + 5 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "3 - 7 = -4 (left: 3 12 -4)\n3 * -4 = -12 (left: 12 -12)\n12 - -12 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 12 = 36 (left: 3 7 36)\n3 - 7 = -4 (left: 36 -4)\n36 + -4 = 32 (left: 32)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 * 10 = 30 (left: 12 30)\n30 - 12 = 18 (left: 30 - 18)\n30 - 18 = 12 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 12 = 36 (left: 3 7 36)\n36 - 3 = 33 (left: 7 33)\n33 - 7 = 26 (left: 26)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 + 12 = 15 (left: 10 15)\n10 + 15 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "12 - 7 = 5 (left: 3 3 5)\n3 * 5 = 15 (left: 3 15)\n3 * 15 = 45 (left: 45)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "12 - 3 = 9 (left: 3 7 9)\n3 * 7 = 21 (left: 9 21)\n9 + 21 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "12 - 3 = 9 (left: 3 7 9)\n7 + 9 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 4 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 4 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 / 4 = 0.75 (left: 12 0.75)\n12 / 0.75 = 16.0 (left: 16.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 + 10 = 13 (left: 12 13)\n12 + 13 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n7 + 1.0 = 8.0 (left: 12 8.0)\n12 * 8.0 = 96.0 (left: 96.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n19 - 3 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n3 + 19 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 12 = -9 (left: 3 7 -9)\n7 - -9 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "12 - 7 = 5 (left: 3 3 5)\n3 * 5 = 15 (left: 3 15)\n3 * 15 = 45 (left: 45)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 + 12 = 15 (left: 10 15)\n10 + 15 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "12 - 7 = 5 (left: 3 3 5)\n3 * 5 = 15 (left: 3 15)\n3 * 15 = 45 (left: 45)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n4 - 3 = 1 (left: 12 1)\n12 * 1 = 12 (left: 12)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n3 - 12 = -9 (left: 21 -9)\n21 - -9 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n7 + 15 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n7 + 1.0 = 8.0 (left: 12 8.0)\n12 * 8.0 = 96.0 (left: 96.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 + 12 = 15 (left: 10 15)\n10 + 15 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n12 + 10 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 12 = 36 (left: 4 36)\n36 - 4 = 32 (left: 32)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n3 * 19 = 57 (left: 3 57)\n57 / 3 = 19.0 (left: 19.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n12 - 21 = -9 (left: 3 -9)\n3 - -9 = 12 (left: 12)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n3 + 7 = 10 (left: 15 10)\n15 + 10 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n3 - 21 = -18 (left: 12 -18)\n12 - -18 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n7 + 1.0 = 8.0 (left: 12 8.0)\n12 * 8.0 = 96.0 (left: 96.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 12 = 36 (left: 4 36)\n36 - 4 = 32 (left: 32)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n12 + 21 = 33 (left: 3 33)\n3 + 33 = 36 (left: 36)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 4 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n12 / 3 = 4.0 (left: 4 4.0)\n4 * 4.0 = 16.0 (left: 16.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 3 = 6 (left: 7 12 6)\n12 - 7 = 5 (left: 6 5)\n6 * 5 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n3 - 3 = 0 (left: 19 0)\n19 + 0 = 19 (left: 19)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n3 - 3 = 0 (left: 19 0)\n19 + 0 = 19 (left: 19)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n7 + 1.0 = 8.0 (left: 12 8.0)\n12 * 8.0 = 96.0 (left: 96.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 7 = -4 (left: 3 12 -4)\n3 - -4 = 7 (left: 12 7)\n12 * 7 = 84 (left: 84)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n3 * 15 = 45 (left: 7 45)\n45 - 7 = 38 (left: 38)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n3 - 21 = -18 (left: 12 -18)\n12 - -18 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 3 = 6 (left: 7 12 6)\n7 - 6 = 1 (left: 12 1)\n12 * 1 = 12 (left: 12)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n7 + 15 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n12 + 10 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 4 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 / 4 = 0.75 (left: 12 0.75)\n12 / 0.75 = 16.0 (left: 16.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 7 = -4 (left: 3 12 -4)\n3 - -4 = 7 (left: 12 7)\n12 * 7 = 84 (left: 84)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n7 + 12 = 19 (left: 1.0 19)\n1.0 + 19 = 20.0 (left: 20.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n1.0 / 7 = 0.142857 (left: 12 0.142857)\n12 / 0.142857 = 84.0 (left: 84.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n7 + 15 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 7 = -4 (left: 3 12 -4)\n3 - -4 = 7 (left: 12 7)\n12 * 7 = 84 (left: 84)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n3 - 12 = -9 (left: 21 -9)\n21 - -9 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n7 + 1.0 = 8.0 (left: 12 8.0)\n12 * 8.0 = 96.0 (left: 96.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n15 / 3 = 5.0 (left: 7 5.0)\n7 + 5.0 = 12.0 (left: 12.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 12 = -9 (left: 3 7 -9)\n7 - -9 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n7 + 15 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n4 - 3 = 1 (left: 12 1)\n12 * 1 = 12 (left: 12)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n3 * 15 = 45 (left: 7 45)\n45 - 7 = 38 (left: 38)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n12 - 3 = 9 (left: 4 9)\n4 * 9 = 36 (left: 36)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 12 = 36 (left: 3 7 36)\n36 - 7 = 29 (left: 3 29)\n29 - 3 = 26 (left: 26)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 4 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n15 - 7 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 / 4 = 0.75 (left: 12 0.75)\n12 / 0.75 = 16.0 (left: 16.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 / 4 = 0.75 (left: 12 0.75)\n12 / 0.75 = 16.0 (left: 16.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n21 - 3 = 18 (left: 12 18)\n12 + 18 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 12 = 36 (left: 3 7 36)\n36 / 7 = 5.0 (left: 3 5.0)\n3 * 5.0 = 15.0 (left: 15.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 12 = 0.25 (left: 3 7 0.25)\n3 + 7 = 10 (left: 0.25 10)\n10 / 0.25 = 40.0 (left: 40.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n7 + 1.0 = 8.0 (left: 12 8.0)\n12 * 8.0 = 96.0 (left: 96.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 4 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 / 4 = 0.75 (left: 12 0.75)\n12 / 0.75 = 16.0 (left: 16.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n3 - 19 = -16 (left: 3 -16)\n3 - -16 = 19 (left: 19)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 + 10 = 13 (left: 12 13)\n12 + 13 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 7 = -4 (left: 3 12 -4)\n12 - -4 = 16 (left: 3 16)\n3 + 16 = 19 (left: 19)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n3 - 21 = -18 (left: 12 -18)\n12 - -18 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n15 - 7 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 4 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n3 + 15 = 18 (left: 7 18)\n7 + 18 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 12 = 36 (left: 3 7 36)\n7 - 3 = 4 (left: 36 4)\n36 - 4 = 32 (left: 32)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 7 = -4 (left: 3 12 -4)\n3 - -4 = 7 (left: 12 7)\n12 * 7 = 84 (left: 84)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "3 - 7 = -4 (left: 3 12 -4)\n3 * -4 = -12 (left: 12 -12)\n12 - -12 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 3 = 0 (left: 7 12 0)\n7 + 0 = 7 (left: 12 7)\n12 * 7 = 84 (left: 84)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n3 - 21 = -18 (left: 12 -18)\n12 - -18 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n12 + 10 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n3 - 21 = -18 (left: 12 -18)\n12 - -18 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 + 10 = 13 (left: 12 13)\n12 + 13 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "7 - 12 = -5 (left: 3 3 -5)\n3 - -5 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n21 - 3 = 18 (left: 12 18)\n12 + 18 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n3 + 7 = 10 (left: 15 10)\n15 + 10 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n15 - 3 = 12 (left: 7 12)\n7 + 12 = 19 (left: 19)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n1.0 / 7 = 0.142857142857143 (left: 12 0.142857142857143)\n12 / 0.142857142857143 = 84.0 (left: 84.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 / 4 = 0.75 (left: 12 0.75)\n12 / 0.75 = 16.0 (left: 16.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n21 - 3 = 18 (left: 12 18)\n12 + 18 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n7 + 15 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "12 - 7 = 5 (left: 3 3 5)\n3 + 5 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 12 = 36 (left: 3 7 36)\n36 - 7 = 29 (left: 3 29)\n29 - 3 = 26 (left: 26)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n12 - 3 = 9 (left: 21 9)\n21 + 9 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 12 = 36 (left: 3 7 36)\n7 - 3 = 4 (left: 36 4)\n36 - 4 = 32 (left: 32)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 12 = 36 (left: 4 36)\n36 - 4 = 32 (left: 32)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 7 = 0.428571 (left: 3 12 0.428571)\n3 / 0.428571 = 7.0 (left: 12 7.0)\n12 * 7.0 = 84.0 (left: 84.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 * 10 = 30 (left: 12 30)\n30 - 12 = 18 (left: 18)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 3 = 6 (left: 7 12 6)\n12 + 6 = 18 (left: 7 18)\n7 + 18 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 7 = 0.428571 (left: 3 12 0.428571)\n3 / 0.428571 = 7.0 (left: 12 7.0)\n12 * 7.0 = 84.0 (left: 84.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 12 = -9 (left: 3 7 -9)\n7 - -9 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 12 = -9 (left: 3 7 -9)\n-9 - 7 = -16 (left: 3 -16)\n3 - -16 = 19 (left: 19)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n3 + 19 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 + 10 = 13 (left: 12 13)\n12 + 13 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n12 + 10 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 4 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n7 + 1.0 = 8.0 (left: 12 8.0)\n12 * 8.0 = 96.0 (left: 96.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 3 = 6 (left: 7 12 6)\n7 - 6 = 1 (left: 12 1)\n12 * 1 = 12 (left: 12)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 + 4 = 7 (left: 12 7)\n12 * 7 = 84 (left: 84)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 3 = 6 (left: 7 12 6)\n7 - 6 = 1 (left: 12 1)\n12 * 1 = 12 (left: 12)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 / 4 = 0.75 (left: 12 0.75)\n12 / 0.75 = 16.0 (left: 16.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 / 4 = 0.75 (left: 12 0.75)\n12 / 0.75 = 16.0 (left: 16.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 + 10 = 13 (left: 12 13)\n12 + 13 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n19 - 3 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n12 + 4 = 16 (left: 3 16)\n3 + 16 = 19 (left: 19)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 7 = -4 (left: 3 12 -4)\n3 - -4 = 7 (left: 12 7)\n12 * 7 = 84 (left: 84)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n12 + 10 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n12 + 10 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n21 - 3 = 18 (left: 12 18)\n12 + 18 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 * 12 = 36 (left: 10 36)\n36 - 10 = 26 (left: 26)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n4 - 3 = 1 (left: 12 1)\n12 * 1 = 12 (left: 12)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 4 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n19 - 3 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n1.0 / 7 = 0.142857 (left: 12 0.142857)\n12 / 0.142857 = 86.0 (left: 86.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n3 * 3 = 9 (left: 19 9)\n19 + 9 = 28 (left: 28)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n12 - 3 = 9 (left: 21 9)\n21 + 9 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n10 - 3 = 7 (left: 12 7)\n12 * 7 = 84 (left: 84)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n7 + 1.0 = 8.0 (left: 12 8.0)\n12 * 8.0 = 96.0 (left: 96.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n3 + 3 = 6 (left: 19 6)\n19 + 6 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 3 = 6 (left: 7 12 6)\n7 - 6 = 1 (left: 12 1)\n12 * 1 = 12 (left: 12)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n7 + 12 = 19 (left: 1.0 19)\n1.0 + 19 = 20.0 (left: 20.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n21 - 3 = 18 (left: 12 18)\n12 + 18 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 4 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n21 - 3 = 18 (left: 12 18)\n12 + 18 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 12 = -5 (left: 3 3 -5)\n3 * -5 = -15 (left: 3 -15)\n3 - -15 = 18 (left: 18)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 3 = 6 (left: 7 12 6)\n7 - 6 = 1 (left: 12 1)\n12 * 1 = 12 (left: 12)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n12 - 3 = 9 (left: 21 9)\n21 + 9 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 3 = 6 (left: 7 12 6)\n12 - 7 = 5 (left: 6 5)\n6 * 5 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n3 + 15 = 18 (left: 7 18)\n7 + 18 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n12 + 10 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 7 = -4 (left: 3 12 -4)\n3 - 12 = -9 (left: -4 -9)\n-4 * -9 = 36 (left: 36)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n10 - 3 = 7 (left: 12 7)\n12 * 7 = 84 (left: 84)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "12 - 7 = 5 (left: 3 3 5)\n3 + 3 = 6 (left: 5 6)\n5 * 6 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 3 = 6 (left: 7 12 6)\n7 - 6 = 1 (left: 12 1)\n12 * 1 = 12 (left: 12)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n12 + 10 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 7 = -4 (left: 3 12 -4)\n12 - -4 = 16 (left: 3 16)\n3 + 16 = 19 (left: 19)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n7 + 15 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n10 - 3 = 7 (left: 12 7)\n12 * 7 = 84 (left: 84)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n3 - 21 = -18 (left: 12 -18)\n12 - -18 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "12 - 3 = 9 (left: 3 7 9)\n7 + 9 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n19 - 3 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n7 + 15 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n12 * 4 = 48 (left: 3 48)\n48 / 3 = 16.0 (left: 16.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 - 10 = -7 (left: 12 -7)\n12 - -7 = 19 (left: 19)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "12 - 3 = 9 (left: 3 7 9)\n3 * 7 = 21 (left: 9 21)\n9 + 21 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n7 + 1.0 = 8.0 (left: 12 8.0)\n12 * 8.0 = 96.0 (left: 96.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 12 = -5 (left: 3 3 -5)\n3 * -5 = -15 (left: 3 -15)\n3 - -15 = 18 (left: 18)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n3 + 15 = 18 (left: 7 18)\n7 + 18 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n12 - 3 = 9 (left: 21 9)\n21 + 9 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "12 - 7 = 5 (left: 3 3 5)\n3 + 5 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 3 = 6 (left: 7 12 6)\n7 + 6 = 13 (left: 12 13)\n12 + 13 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 4 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "12 - 3 = 9 (left: 3 7 9)\n7 + 9 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 4 = 12 (left: 12 12)\n12 + 12 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n3 - 3 = 0 (left: 19 0)\n19 + 0 = 19 (left: 19)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n3 - 19 = -16 (left: 3 -16)\n3 - -16 = 19 (left: 19)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n12 + 10 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 + 10 = 13 (left: 12 13)\n12 + 13 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 12 = 36 (left: 4 36)\n36 - 4 = 32 (left: 32)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n15 - 7 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n12 - 3 = 9 (left: 21 9)\n21 + 9 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n12 + 10 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n7 + 1.0 = 8.0 (left: 12 8.0)\n12 * 8.0 = 96.0 (left: 96.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "12 - 3 = 9 (left: 3 7 9)\n7 + 9 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 + 12 = 15 (left: 10 15)\n10 + 15 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n12 + 10 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n7 + 1.0 = 8.0 (left: 12 8.0)\n12 * 8.0 = 96.0 (left: 96.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 12 = 36 (left: 3 7 36)\n36 / 7 = 5.0 (left: 3 5.0)\n3 * 5.0 = 15.0 (left: 15.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 3 = 6 (left: 7 12 6)\n7 - 6 = 1 (left: 12 1)\n12 * 1 = 12 (left: 12)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n12 * 4 = 48 (left: 3 48)\n48 / 3 = 16.0 (left: 16.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n7 + 1.0 = 8.0 (left: 12 8.0)\n12 * 8.0 = 96.0 (left: 96.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "12 / 3 = 4.0 (left: 3 7 4.0)\n3 + 7 = 10 (left: 4.0 10)\n4.0 * 10 = 40.0 (left: 40.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n19 - 3 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n3 + 19 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 3 = 6 (left: 7 12 6)\n12 / 6 = 2.0 (left: 7 2.0)\n7 * 2.0 = 14.0 (left: 14.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 12 = -9 (left: 3 7 -9)\n3 * 7 = 21 (left: -9 21)\n21 - -9 = 30 (left: 30)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n3 + 3 = 6 (left: 19 6)\n19 + 6 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 12 = 36 (left: 3 7 36)\n36 - 7 = 29 (left: 3 29)\n29 - 3 = 26 (left: 26)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n3 + 19 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n3 + 19 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n3 + 7 = 10 (left: 15 10)\n15 + 10 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 7 = -4 (left: 3 12 -4)\n12 - -4 = 16 (left: 3 16)\n3 + 16 = 19 (left: 19)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n3 / 3 = 1.0 (left: 19 1.0)\n19 + 1.0 = 20.0 (left: 20.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 100, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n15 - 7 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n7 + 15 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 12 = 36 (left: 3 7 36)\n36 / 3 = 12.0 (left: 7 12.0)\n7 + 12.0 = 19.0 (left: 19.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n7 + 1.0 = 8.0 (left: 12 8.0)\n12 * 8.0 = 96.0 (left: 96.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 / 3 = 1.0 (left: 7 12 1.0)\n7 + 1.0 = 8.0 (left: 12 8.0)\n12 * 8.0 = 96.0 (left: 96.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 7 = -4 (left: 3 12 -4)\n12 - -4 = 16 (left: 3 16)\n3 + 16 = 19 (left: 19)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 / 4 = 0.75 (left: 12 0.75)\n12 / 0.75 = 16.0 (left: 16.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "12 - 3 = 9 (left: 3 7 9)\n7 + 9 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 12 = 36 (left: 3 7 36)\n36 / 3 = 12.0 (left: 7 12.0)\n7 + 12.0 = 19.0 (left: 19.0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "12 - 7 = 5 (left: 3 3 5)\n3 * 5 = 15 (left: 3 15)\n3 * 15 = 45 (left: 45)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 7 = -4 (left: 3 12 -4)\n-4 - 3 = -7 (left: 12 -7)\n12 - -7 = 19 (left: 19)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 12 = -9 (left: 3 7 -9)\n7 - -9 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 + 10 = 13 (left: 12 13)\n12 + 13 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 12 = -5 (left: 3 3 -5)\n3 * -5 = -15 (left: 3 -15)\n3 - -15 = 18 (left: 18)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 12 = 36 (left: 3 7 36)\n7 - 3 = 4 (left: 36 4)\n36 - 4 = 32 (left: 32)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n3 + 10 = 13 (left: 12 13)\n12 + 13 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 12 = 36 (left: 4 36)\n36 - 4 = 32 (left: 32)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 - 12 = -9 (left: 3 7 -9)\n7 - -9 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 7 = 21 (left: 3 12 21)\n3 + 21 = 24 (left: 12 24)\n12 + 24 = 36 (left: 24)\n24 - 24 = 0 (left: 0)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n7 - 3 = 4 (left: 15 4)\n15 + 4 = 19 (left: 19)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n3 + 15 = 18 (left: 7 18)\n7 + 18 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 7 = 10 (left: 3 12 10)\n12 + 10 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 * 12 = 36 (left: 3 7 36)\n36 - 3 = 33 (left: 7 33)\n33 - 7 = 26 (left: 26)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 + 12 = 19 (left: 3 3 19)\n3 + 19 = 22 (left: 3 22)\n3 + 22 = 25 (left: 25)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "7 - 3 = 4 (left: 3 12 4)\n3 * 12 = 36 (left: 4 36)\n36 - 4 = 32 (left: 32)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "3 + 12 = 15 (left: 3 7 15)\n7 - 3 = 4 (left: 15 4)\n15 + 4 = 19 (left: 19)"}
{"idx": 18, "input": "3 3 7 12", "reward": 0.001, "generate_data": "12 - 3 = 9 (left: 3 7 9)\n7 + 9 = 16 (left: 3 16)\n3 * 16 = 48 (left: 48)"}
