{"start_time": "2023-09-08 08:56:33", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-08 09:05:51", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-08 09:12:00", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-08 09:13:46", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-08 09:21:08", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-08 09:22:03", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-08 09:23:17", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-08 09:26:05", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-08 09:30:56", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-08 09:35:28", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-08 09:38:53", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"total": 300, "avg_time_per_sample": "0:00:14.817502", "success": 221, "fail": 75}
{"total": 400, "avg_time_per_sample": "0:00:14.425531", "success": 305, "fail": 91}
{"total": 700, "avg_time_per_sample": "0:00:14.615285", "success": 536, "fail": 160}
{"total": 1100, "avg_time_per_sample": "0:00:14.402299", "success": 853, "fail": 243}
{"total": 1300, "avg_time_per_sample": "0:00:14.981653", "success": 968, "fail": 328}
{"total": 1400, "avg_time_per_sample": "0:00:15.303881", "success": 1022, "fail": 374}
{"total": 1500, "avg_time_per_sample": "0:00:15.573037", "success": 1076, "fail": 420}
{"total": 1900, "avg_time_per_sample": "0:00:17.210380", "success": 1258, "fail": 638}
{"total": 2000, "avg_time_per_sample": "0:00:17.673212", "success": 1296, "fail": 700}
{"total": 2100, "avg_time_per_sample": "0:00:18.006450", "success": 1341, "fail": 755}
{"start_time": "2023-09-08 17:22:43", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-08 18:51:41", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"total": 200, "avg_time_per_sample": "0:00:21.458855", "success": 115, "fail": 81}
{"start_time": "2023-09-08 19:55:35", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-09 01:58:07", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"total": 300, "avg_time_per_sample": "0:00:22.922180", "success": 163, "fail": 133}
{"start_time": "2023-09-09 06:59:20", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"total": 100, "avg_time_per_sample": "0:00:15.393436", "success": 70, "fail": 26}
{"start_time": "2023-09-09 07:48:32", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"total": 100, "avg_time_per_sample": "0:00:12.068588", "success": 81, "fail": 15}
{"total": 200, "avg_time_per_sample": "0:00:12.677579", "success": 157, "fail": 39}
{"total": 300, "avg_time_per_sample": "0:00:13.004444", "success": 232, "fail": 64}
{"start_time": "2023-09-09 08:59:58", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-09 09:03:41", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-09 09:06:16", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-09 09:08:04", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-09 09:09:08", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-09 09:11:01", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-09 09:14:31", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-09 09:15:08", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-09 09:16:58", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-09 09:18:35", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-09 09:23:46", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-09 09:36:00", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 10000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"total": 100, "avg_time_per_sample": "0:00:11.357867", "success": 86, "fail": 14, "<class '__main__.RateLimitError'>": 13, "<class '__main__.ContentFormatError'>": 1}
{"total": 200, "avg_time_per_sample": "0:00:11.617385", "success": 168, "fail": 32, "<class '__main__.RateLimitError'>": 29, "<class '__main__.ContentFormatError'>": 2, "<class 'KeyError'>": 1}
{"total": 300, "avg_time_per_sample": "0:00:11.710378", "success": 251, "fail": 49, "<class '__main__.RateLimitError'>": 45, "<class '__main__.ContentFormatError'>": 3, "<class 'KeyError'>": 1}
{"total": 400, "avg_time_per_sample": "0:00:12.150294", "success": 328, "fail": 72, "<class '__main__.RateLimitError'>": 65, "<class '__main__.ContentFormatError'>": 5, "<class 'KeyError'>": 1, "<class 'asyncio.exceptions.TimeoutError'>": 1}
{"total": 500, "avg_time_per_sample": "0:00:12.222478", "success": 417, "fail": 83, "<class '__main__.RateLimitError'>": 75, "<class '__main__.ContentFormatError'>": 5, "<class 'KeyError'>": 1, "<class 'asyncio.exceptions.TimeoutError'>": 2}
{"start_time": "2023-09-09 13:34:30", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 1000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"total": 100, "avg_time_per_sample": "0:00:10.262440", "success": 94, "fail": 6, "<class '__main__.RateLimitError'>": 5, "<class 'asyncio.exceptions.TimeoutError'>": 1}
{"total": 200, "avg_time_per_sample": "0:00:09.383618", "success": 190, "fail": 10, "<class '__main__.RateLimitError'>": 8, "<class 'asyncio.exceptions.TimeoutError'>": 1, "<class '__main__.ContentFormatError'>": 1}
{"start_time": "2023-09-09 16:18:58", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 1000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-09 16:34:31", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 1000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"total": 100, "avg_time_per_sample": "0:00:08.120360", "success": 97, "fail": 3, "<class '__main__.RateLimitError'>": 3}
{"start_time": "2023-09-09 16:50:05", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 1000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-09 16:52:49", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 1000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-09 16:53:07", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 1000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"total": 100, "avg_time_per_sample": "0:00:00.000138", "success": 100, "fail": 0}
{"total": 200, "avg_time_per_sample": "0:00:00.000185", "success": 200, "fail": 0}
{"total": 300, "avg_time_per_sample": "0:00:00.000144", "success": 300, "fail": 0}
{"total": 400, "avg_time_per_sample": "0:00:00.000123", "success": 400, "fail": 0}
{"total": 500, "avg_time_per_sample": "0:00:00.000109", "success": 500, "fail": 0}
{"total": 600, "avg_time_per_sample": "0:00:00.000101", "success": 600, "fail": 0}
{"total": 700, "avg_time_per_sample": "0:00:00.000094", "success": 700, "fail": 0}
{"total": 800, "avg_time_per_sample": "0:00:00.000089", "success": 800, "fail": 0}
{"total": 900, "avg_time_per_sample": "0:00:00.000085", "success": 900, "fail": 0}
{"total": 996, "avg_time_per_sample": "0:00:00.000083", "success": 996, "fail": 0}
{"total": 997, "avg_time_per_sample": "0:00:00.000084", "success": 997, "fail": 0}
{"total": 998, "avg_time_per_sample": "0:00:00.000084", "success": 998, "fail": 0}
{"total": 999, "avg_time_per_sample": "0:00:00.000084", "success": 999, "fail": 0}
{"total": 1000, "avg_time_per_sample": "0:00:00.000084", "success": 1000, "fail": 0}
{"start_time": "2023-09-09 16:58:54", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 1000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"total": 100, "avg_time_per_sample": "0:00:08.713460", "success": 94, "fail": 6, "<class 'AttributeError'>": 2, "<class '__main__.ContentFormatError'>": 2, "<class '__main__.RateLimitError'>": 2}
{"start_time": "2023-09-09 17:17:46", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 1000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"total": 100, "avg_time_per_sample": "0:00:08.046534", "success": 99, "fail": 1, "<class '__main__.RateLimitError'>": 1}
{"total": 200, "avg_time_per_sample": "0:00:08.088738", "success": 197, "fail": 3, "<class '__main__.RateLimitError'>": 2, "<class '__main__.ContentFormatError'>": 1}
{"total": 300, "avg_time_per_sample": "0:00:08.694813", "success": 292, "fail": 8, "<class '__main__.RateLimitError'>": 2, "<class '__main__.ContentFormatError'>": 2, "<class 'asyncio.exceptions.TimeoutError'>": 4}
{"total": 400, "avg_time_per_sample": "0:00:08.920976", "success": 391, "fail": 9, "<class '__main__.RateLimitError'>": 2, "<class '__main__.ContentFormatError'>": 3, "<class 'asyncio.exceptions.TimeoutError'>": 4}
{"total": 500, "avg_time_per_sample": "0:00:09.160870", "success": 489, "fail": 11, "<class '__main__.RateLimitError'>": 2, "<class '__main__.ContentFormatError'>": 3, "<class 'asyncio.exceptions.TimeoutError'>": 6}
{"total": 600, "avg_time_per_sample": "0:00:09.275186", "success": 589, "fail": 11, "<class '__main__.RateLimitError'>": 2, "<class '__main__.ContentFormatError'>": 3, "<class 'asyncio.exceptions.TimeoutError'>": 6}
{"total": 700, "avg_time_per_sample": "0:00:09.353500", "success": 687, "fail": 13, "<class '__main__.RateLimitError'>": 2, "<class '__main__.ContentFormatError'>": 4, "<class 'asyncio.exceptions.TimeoutError'>": 7}
{"total": 800, "avg_time_per_sample": "0:00:09.405546", "success": 786, "fail": 14, "<class '__main__.RateLimitError'>": 2, "<class '__main__.ContentFormatError'>": 4, "<class 'asyncio.exceptions.TimeoutError'>": 8}
{"total": 900, "avg_time_per_sample": "0:00:09.431004", "success": 883, "fail": 17, "<class '__main__.RateLimitError'>": 2, "<class '__main__.ContentFormatError'>": 7, "<class 'asyncio.exceptions.TimeoutError'>": 8}
{"start_time": "2023-09-10 00:12:10", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 1000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-10 00:23:11", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 1000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-10 00:29:16", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 1000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"total": 100, "avg_time_per_sample": "0:00:08.254642", "success": 99, "fail": 1, "<class '__main__.ContentFormatError'>": 1}
{"total": 200, "avg_time_per_sample": "0:00:08.458849", "success": 196, "fail": 4, "<class '__main__.ContentFormatError'>": 1, "<class '__main__.RateLimitError'>": 1, "<class 'requests.exceptions.ReadTimeout'>": 1, "<class 'KeyError'>": 1}
{"total": 300, "avg_time_per_sample": "0:00:08.281356", "success": 295, "fail": 5, "<class '__main__.ContentFormatError'>": 1, "<class '__main__.RateLimitError'>": 2, "<class 'requests.exceptions.ReadTimeout'>": 1, "<class 'KeyError'>": 1}
{"total": 400, "avg_time_per_sample": "0:00:08.295711", "success": 392, "fail": 8, "<class '__main__.ContentFormatError'>": 1, "<class '__main__.RateLimitError'>": 3, "<class 'requests.exceptions.ReadTimeout'>": 2, "<class 'KeyError'>": 2}
{"total": 500, "avg_time_per_sample": "0:00:08.524206", "success": 489, "fail": 11, "<class '__main__.ContentFormatError'>": 2, "<class '__main__.RateLimitError'>": 5, "<class 'requests.exceptions.ReadTimeout'>": 2, "<class 'KeyError'>": 2}
{"total": 600, "avg_time_per_sample": "0:00:09.100999", "success": 584, "fail": 16, "<class '__main__.ContentFormatError'>": 3, "<class '__main__.RateLimitError'>": 5, "<class 'requests.exceptions.ReadTimeout'>": 3, "<class 'KeyError'>": 2, "<class 'matplotlib.backend_bases._get_renderer.<locals>.Done'>": 2, "<class 'RuntimeError'>": 1}
{"total": 700, "avg_time_per_sample": "0:00:09.426066", "success": 682, "fail": 18, "<class '__main__.ContentFormatError'>": 4, "<class '__main__.RateLimitError'>": 5, "<class 'requests.exceptions.ReadTimeout'>": 3, "<class 'KeyError'>": 3, "<class 'matplotlib.backend_bases._get_renderer.<locals>.Done'>": 2, "<class 'RuntimeError'>": 1}
{"total": 800, "avg_time_per_sample": "0:00:09.833615", "success": 782, "fail": 18, "<class '__main__.ContentFormatError'>": 4, "<class '__main__.RateLimitError'>": 5, "<class 'requests.exceptions.ReadTimeout'>": 3, "<class 'KeyError'>": 3, "<class 'matplotlib.backend_bases._get_renderer.<locals>.Done'>": 2, "<class 'RuntimeError'>": 1}
{"total": 900, "avg_time_per_sample": "0:00:10.259887", "success": 878, "fail": 22, "<class '__main__.ContentFormatError'>": 6, "<class '__main__.RateLimitError'>": 5, "<class 'requests.exceptions.ReadTimeout'>": 5, "<class 'KeyError'>": 3, "<class 'matplotlib.backend_bases._get_renderer.<locals>.Done'>": 2, "<class 'RuntimeError'>": 1}
{"start_time": "2023-09-10 12:04:30", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 1000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"total": 100, "avg_time_per_sample": "0:00:11.379997", "success": 89, "fail": 11, "<class '__main__.RateLimitError'>": 11}
{"total": 200, "avg_time_per_sample": "0:00:10.538229", "success": 182, "fail": 18, "<class '__main__.RateLimitError'>": 17, "<class '__main__.ContentFormatError'>": 1}
{"total": 300, "avg_time_per_sample": "0:00:10.080480", "success": 279, "fail": 21, "<class '__main__.RateLimitError'>": 18, "<class '__main__.ContentFormatError'>": 3}
{"total": 400, "avg_time_per_sample": "0:00:10.205028", "success": 368, "fail": 32, "<class '__main__.RateLimitError'>": 26, "<class '__main__.ContentFormatError'>": 5, "<class 'requests.exceptions.ReadTimeout'>": 1}
{"total": 500, "avg_time_per_sample": "0:00:09.729199", "success": 466, "fail": 34, "<class '__main__.RateLimitError'>": 27, "<class '__main__.ContentFormatError'>": 5, "<class 'requests.exceptions.ReadTimeout'>": 1, "<class 'KeyError'>": 1}
{"total": 600, "avg_time_per_sample": "0:00:09.359434", "success": 566, "fail": 34, "<class '__main__.RateLimitError'>": 27, "<class '__main__.ContentFormatError'>": 5, "<class 'requests.exceptions.ReadTimeout'>": 1, "<class 'KeyError'>": 1}
{"total": 700, "avg_time_per_sample": "0:00:09.258511", "success": 665, "fail": 35, "<class '__main__.RateLimitError'>": 28, "<class '__main__.ContentFormatError'>": 5, "<class 'requests.exceptions.ReadTimeout'>": 1, "<class 'KeyError'>": 1}
{"total": 800, "avg_time_per_sample": "0:00:09.413597", "success": 755, "fail": 45, "<class '__main__.RateLimitError'>": 36, "<class '__main__.ContentFormatError'>": 6, "<class 'requests.exceptions.ReadTimeout'>": 2, "<class 'KeyError'>": 1}
{"total": 900, "avg_time_per_sample": "0:00:09.777687", "success": 841, "fail": 59, "<class '__main__.RateLimitError'>": 47, "<class '__main__.ContentFormatError'>": 9, "<class 'requests.exceptions.ReadTimeout'>": 2, "<class 'KeyError'>": 1}
{"start_time": "2023-09-10 23:54:07", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 1000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"start_time": "2023-09-10 23:55:06", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 1000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 1, "llm": "gpt-4"}
{"total": 100, "avg_time_per_sample": "0:00:08.918516", "success": 98, "fail": 2, "<class '__main__.RateLimitError'>": 1, "<class '__main__.ContentFormatError'>": 1}
{"total": 200, "avg_time_per_sample": "0:00:09.348586", "success": 197, "fail": 3, "<class '__main__.RateLimitError'>": 2, "<class '__main__.ContentFormatError'>": 1}
{"total": 300, "avg_time_per_sample": "0:00:09.643987", "success": 296, "fail": 4, "<class '__main__.RateLimitError'>": 3, "<class '__main__.ContentFormatError'>": 1}
{"total": 100, "avg_time_per_sample": "0:00:37.369501", "success": 98, "fail": 2, "<class '__main__.RateLimitError'>": 2}
{"total": 400, "avg_time_per_sample": "0:00:09.808614", "success": 394, "fail": 6, "<class '__main__.RateLimitError'>": 5, "<class '__main__.ContentFormatError'>": 1}
{"total": 500, "avg_time_per_sample": "0:00:09.743337", "success": 493, "fail": 7, "<class '__main__.RateLimitError'>": 5, "<class '__main__.ContentFormatError'>": 1, "<class 'requests.exceptions.ReadTimeout'>": 1}
{"total": 600, "avg_time_per_sample": "0:00:09.766897", "success": 590, "fail": 10, "<class '__main__.RateLimitError'>": 7, "<class '__main__.ContentFormatError'>": 2, "<class 'requests.exceptions.ReadTimeout'>": 1}
{"total": 700, "avg_time_per_sample": "0:00:09.655456", "success": 689, "fail": 11, "<class '__main__.RateLimitError'>": 7, "<class '__main__.ContentFormatError'>": 3, "<class 'requests.exceptions.ReadTimeout'>": 1}
{"total": 200, "avg_time_per_sample": "0:00:35.696115", "success": 196, "fail": 4, "<class '__main__.RateLimitError'>": 3, "<class '__main__.ContentFormatError'>": 1}
{"total": 800, "avg_time_per_sample": "0:00:09.426243", "success": 789, "fail": 11, "<class '__main__.RateLimitError'>": 7, "<class '__main__.ContentFormatError'>": 3, "<class 'requests.exceptions.ReadTimeout'>": 1}
{"total": 900, "avg_time_per_sample": "0:00:09.253877", "success": 889, "fail": 11, "<class '__main__.RateLimitError'>": 7, "<class '__main__.ContentFormatError'>": 3, "<class 'requests.exceptions.ReadTimeout'>": 1}
{"total": 300, "avg_time_per_sample": "0:00:31.675913", "success": 296, "fail": 4, "<class '__main__.RateLimitError'>": 3, "<class '__main__.ContentFormatError'>": 1}
{"start_time": "2023-09-11 07:12:50", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 1000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"total": 100, "avg_time_per_sample": "0:00:05.716299", "success": 99, "fail": 1, "<class 'KeyError'>": 1}
{"total": 200, "avg_time_per_sample": "0:00:05.991885", "success": 198, "fail": 2, "<class 'KeyError'>": 1, "<class '__main__.ContentFormatError'>": 1}
{"total": 300, "avg_time_per_sample": "0:00:06.357607", "success": 297, "fail": 3, "<class 'KeyError'>": 1, "<class '__main__.ContentFormatError'>": 2}
{"total": 400, "avg_time_per_sample": "0:00:06.638096", "success": 393, "fail": 7, "<class 'KeyError'>": 2, "<class '__main__.ContentFormatError'>": 4, "<class 'requests.exceptions.ReadTimeout'>": 1}
{"total": 500, "avg_time_per_sample": "0:00:06.778950", "success": 492, "fail": 8, "<class 'KeyError'>": 2, "<class '__main__.ContentFormatError'>": 5, "<class 'requests.exceptions.ReadTimeout'>": 1}
{"total": 600, "avg_time_per_sample": "0:00:07.071397", "success": 592, "fail": 8, "<class 'KeyError'>": 2, "<class '__main__.ContentFormatError'>": 5, "<class 'requests.exceptions.ReadTimeout'>": 1}
{"total": 700, "avg_time_per_sample": "0:00:07.488855", "success": 691, "fail": 9, "<class 'KeyError'>": 2, "<class '__main__.ContentFormatError'>": 6, "<class 'requests.exceptions.ReadTimeout'>": 1}
{"total": 800, "avg_time_per_sample": "0:00:07.840047", "success": 789, "fail": 11, "<class 'KeyError'>": 2, "<class '__main__.ContentFormatError'>": 8, "<class 'requests.exceptions.ReadTimeout'>": 1}
{"total": 900, "avg_time_per_sample": "0:00:08.211215", "success": 888, "fail": 12, "<class 'KeyError'>": 2, "<class '__main__.ContentFormatError'>": 8, "<class 'requests.exceptions.ReadTimeout'>": 2}
{"start_time": "2023-09-11 11:00:01", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 1000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 1, "llm": "gpt-4"}
{"start_time": "2023-09-11 11:00:14", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 1000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"total": 100, "avg_time_per_sample": "0:00:06.475828", "success": 99, "fail": 1, "<class '__main__.ContentFormatError'>": 1}
{"total": 200, "avg_time_per_sample": "0:00:07.398048", "success": 198, "fail": 2, "<class '__main__.ContentFormatError'>": 1, "<class 'requests.exceptions.ReadTimeout'>": 1}
{"total": 300, "avg_time_per_sample": "0:00:07.641713", "success": 297, "fail": 3, "<class '__main__.ContentFormatError'>": 2, "<class 'requests.exceptions.ReadTimeout'>": 1}
{"total": 400, "avg_time_per_sample": "0:00:07.772483", "success": 393, "fail": 7, "<class '__main__.ContentFormatError'>": 2, "<class 'requests.exceptions.ReadTimeout'>": 4, "<class 'KeyError'>": 1}
{"total": 500, "avg_time_per_sample": "0:00:07.744384", "success": 491, "fail": 9, "<class '__main__.ContentFormatError'>": 4, "<class 'requests.exceptions.ReadTimeout'>": 4, "<class 'KeyError'>": 1}
{"total": 600, "avg_time_per_sample": "0:00:07.786183", "success": 587, "fail": 13, "<class '__main__.ContentFormatError'>": 6, "<class 'requests.exceptions.ReadTimeout'>": 6, "<class 'KeyError'>": 1}
{"total": 700, "avg_time_per_sample": "0:00:07.803555", "success": 687, "fail": 13, "<class '__main__.ContentFormatError'>": 6, "<class 'requests.exceptions.ReadTimeout'>": 6, "<class 'KeyError'>": 1}
{"total": 800, "avg_time_per_sample": "0:00:07.860747", "success": 786, "fail": 14, "<class '__main__.ContentFormatError'>": 6, "<class 'requests.exceptions.ReadTimeout'>": 7, "<class 'KeyError'>": 1}
{"total": 900, "avg_time_per_sample": "0:00:07.903269", "success": 885, "fail": 15, "<class '__main__.ContentFormatError'>": 6, "<class 'requests.exceptions.ReadTimeout'>": 8, "<class 'KeyError'>": 1}
{"start_time": "2023-09-11 13:37:53", "temperature": 1.0, "top_p": 1.0, "check": true, "tool_graph": "graph_desc_original.json", "tool_desc": "tools_desc_original.json", "api_addr": "10.15.82.10", "api_port": 8002, "play": false, "method": null, "tool_number": null, "number_of_samples": 1000, "seed": 0, "output_dir": "result_20230907162747_tools_desc_original_gpt-4_t1_0_p1_0_check", "save_figure": true, "multiworker": 5, "llm": "gpt-4"}
{"total": 100, "avg_time_per_sample": "0:00:25.528321", "success": 100, "fail": 0}
{"total": 200, "avg_time_per_sample": "0:00:27.927671", "success": 200, "fail": 0}
