{"accuracy": 0.8125, "correct_count": 13, "total_count": 16}
{"id": "live_parallel_0-0-0", "model_name": "D-CORE-14B", "test_category": "live_parallel", "valid": false, "error": ["Could not find a matching function among index [0, 1] of model output for index 0 of possible answers.", {"Model Result Index 0": {"sub_error": ["Invalid value for parameter 'unit': 'celsius'. Expected one of ['', 'fahrenheit']. Case insensitive."], "sub_error_type": "value_error:string", "model_output_item": {"get_current_weather": {"location": "Beijing, China", "unit": "celsius"}}, "possible_answer_item": {"get_current_weather": {"location": ["Beijing, China"], "unit": ["", "fahrenheit"]}}}}, {"Model Result Index 1": {"sub_error": ["Invalid value for parameter 'location': 'Shanghai, China'. Expected one of ['Beijing, China']. Case insensitive."], "sub_error_type": "value_error:string", "model_output_item": {"get_current_weather": {"location": "Shanghai, China", "unit": "celsius"}}, "possible_answer_item": {"get_current_weather": {"location": ["Beijing, China"], "unit": ["", "fahrenheit"]}}}}], "error_type": "parallel_function_checker_no_order:cannot_find_match", "prompt": {"id": "live_parallel_0-0-0", "question": [[{"role": "user", "content": "\u8bf7\u95ee\u5317\u4eac\u7684\u5f53\u524d\u5929\u6c14\u72b6\u51b5\u5982\u4f55\uff1f\u8fd8\u6709\uff0c\u4e0a\u6d77\u7684\u5929\u6c14\u60c5\u51b5\u662f\u600e\u6837\u7684\uff1f"}]], "function": [{"name": "get_current_weather", "description": "Retrieves the current weather conditions for a specified city and state.", "parameters": {"type": "dict", "required": ["location"], "properties": {"location": {"type": "string", "description": "The location for which to get the weather, in the format of 'City, State', such as 'San Francisco, CA' if State for the city exists. 'City, Country' if State for the city doesn't exist."}, "unit": {"type": "string", "description": "The unit of temperature for the weather report.", "enum": ["celsius", "fahrenheit"], "default": "fahrenheit"}}}}]}, "model_result_raw": "\n[get_current_weather(location=\"Beijing, China\", unit=\"celsius\"), get_current_weather(location=\"Shanghai, China\", unit=\"celsius\")]", "model_result_decoded": [{"get_current_weather": {"location": "Beijing, China", "unit": "celsius"}}, {"get_current_weather": {"location": "Shanghai, China", "unit": "celsius"}}], "possible_answer": [{"get_current_weather": {"location": ["Beijing, China"], "unit": ["", "fahrenheit"]}}, {"get_current_weather": {"location": ["Shanghai, China"], "unit": ["", "fahrenheit"]}}]}
{"id": "live_parallel_12-8-0", "model_name": "D-CORE-14B", "test_category": "live_parallel", "valid": false, "error": ["Wrong number of functions."], "error_type": "parallel_function_checker_no_order:wrong_count", "prompt": {"id": "live_parallel_12-8-0", "question": [[{"role": "user", "content": "For breakfast I had a 12 ounce iced coffee and a banana.\n\nFor lunch I had a quesadilla\n\nBreakfast four ounces of asparagus, two eggs, one piece of gluten free bread."}]], "function": [{"name": "log_food", "description": "Logs a food item with details about the portion size and the meal it is associated with.", "parameters": {"type": "dict", "required": ["food_name", "portion_amount", "meal_name"], "properties": {"food_name": {"type": "string", "description": "The name of the food to log."}, "portion_amount": {"type": "float", "description": "The amount of the food item that was consumed, in specified units."}, "portion_unit": {"type": "string", "description": "The unit of measure for the portion amount. Choose a unit such as 'grams', 'ounces', 'pieces', 'cups', or 'tablespoons'.", "enum": ["grams", "ounces", "pieces", "cups", "tablespoons"], "default": "grams"}, "meal_name": {"type": "string", "description": "The name of the meal with which the food item is associated. Options include 'breakfast', 'lunch', 'dinner', or 'snack'."}}}}]}, "model_result_raw": "\n[log_food(food_name=\"asparagus\", portion_amount=4, portion_unit=\"ounces\", meal_name=\"breakfast\"), log_food(food_name=\"eggs\", portion_amount=2, portion_unit=\"pieces\", meal_name=\"breakfast\"), log_food(food_name=\"gluten-free bread\", portion_amount=1, portion_unit=\"pieces\", meal_name=\"breakfast\")]", "model_result_decoded": [{"log_food": {"food_name": "asparagus", "portion_amount": 4, "portion_unit": "ounces", "meal_name": "breakfast"}}, {"log_food": {"food_name": "eggs", "portion_amount": 2, "portion_unit": "pieces", "meal_name": "breakfast"}}, {"log_food": {"food_name": "gluten-free bread", "portion_amount": 1, "portion_unit": "pieces", "meal_name": "breakfast"}}], "possible_answer": [{"log_food": {"food_name": ["iced coffee"], "portion_amount": [12.0], "portion_unit": ["ounces"], "meal_name": ["breakfast"]}}, {"log_food": {"food_name": ["banana"], "portion_amount": [1.0], "portion_unit": ["pieces"], "meal_name": ["breakfast"]}}, {"log_food": {"food_name": ["quesadilla"], "portion_amount": [1.0], "portion_unit": ["pieces"], "meal_name": ["lunch"]}}, {"log_food": {"food_name": ["asparagus"], "portion_amount": [4.0], "portion_unit": ["ounces"], "meal_name": ["breakfast"]}}, {"log_food": {"food_name": ["eggs"], "portion_amount": [2.0], "portion_unit": ["pieces"], "meal_name": ["breakfast"]}}, {"log_food": {"food_name": ["gluten free bread"], "portion_amount": [1.0], "portion_unit": ["pieces"], "meal_name": ["breakfast"]}}]}
{"id": "live_parallel_15-11-0", "model_name": "D-CORE-14B", "test_category": "live_parallel", "valid": false, "error": ["Invalid syntax. Failed to decode AST. invalid syntax. Perhaps you forgot a comma? (<unknown>, line 1)"], "error_type": "ast_decoder:decoder_failed", "prompt": {"id": "live_parallel_15-11-0", "question": [[{"role": "user", "content": "list file in c drive and make file called testing.txt"}]], "function": [{"name": "cmd_controller.execute", "description": "Executes a specified command in the Windows operating system using the os.system() function.", "parameters": {"type": "dict", "required": ["command"], "properties": {"command": {"type": "string", "description": "The command line instruction to be passed to os.system() for execution, formatted as a Windows command prompt statement."}, "unit": {"type": "string", "description": "The unit of measurement for the command execution time, such as 'seconds' or 'milliseconds'", "enum": ["seconds", "milliseconds"], "default": "N/A"}}}}]}, "model_result_raw": "\n[cmd_controller.execute(command=\"dir C:\\\"), cmd_controller.execute(command=\"echo. > C:\\testing.txt\")]", "possible_answer": [{"cmd_controller.execute": {"command": ["dir c:\\", "dir c:\\\\"]}}, {"cmd_controller.execute": {"command": ["echo.>C:\\testing.txt", "echo.>C:\\\\testing.txt"], "unit": ["", "N/A"]}}]}