{"accuracy": 0.9575, "correct_count": 383, "total_count": 400}
{"id": "simple_17", "model_name": "Qwen3-8B", "test_category": "simple", "valid": false, "error": ["Missing required parameter: 'formatted'."], "error_type": "simple_function_checker:missing_required", "prompt": {"id": "simple_17", "question": [[{"role": "user", "content": "Find the prime factors of 450"}]], "function": [{"name": "get_prime_factors", "description": "Function to retrieve prime factors of a number.", "parameters": {"type": "dict", "properties": {"number": {"type": "integer", "description": "Number for which prime factors need to be calculated"}, "formatted": {"type": "boolean", "description": "Return formatted string if true, array if false. Default is true."}}, "required": ["number", "formatted"]}}]}, "model_result_raw": "\n[get_prime_factors(number=450)]", "model_result_decoded": [{"get_prime_factors": {"number": 450}}], "possible_answer": [{"get_prime_factors": {"number": [450], "formatted": [true, ""]}}]}
{"id": "simple_35", "model_name": "Qwen3-8B", "test_category": "simple", "valid": false, "error": ["Invalid value for parameter 'operating_hours': 11. Expected one of [23]."], "error_type": "value_error:others", "prompt": {"id": "simple_35", "question": [[{"role": "user", "content": "Find an all vegan restaurant in New York that opens until at least 11 PM."}]], "function": [{"name": "vegan_restaurant.find_nearby", "description": "Locate nearby vegan restaurants based on specific criteria like operating hours.", "parameters": {"type": "dict", "properties": {"location": {"type": "string", "description": "The city and state, e.g. New York, NY, you should format it as City, State."}, "operating_hours": {"type": "integer", "description": "Preferred latest closing time of the restaurant. E.g. if 11 is given, then restaurants that close at or after 11 PM will be considered. This is in 24 hour format. Default is 24."}}, "required": ["location"]}}]}, "model_result_raw": "\n[vegan_restaurant.find_nearby(location=\"New York, NY\", operating_hours=11)]", "model_result_decoded": [{"vegan_restaurant.find_nearby": {"location": "New York, NY", "operating_hours": 11}}], "possible_answer": [{"vegan_restaurant.find_nearby": {"location": ["New York, NY"], "operating_hours": [23]}}]}
{"id": "simple_55", "model_name": "Qwen3-8B", "test_category": "simple", "valid": false, "error": ["Invalid value for parameter 'cell_type': 'human cell'. Expected one of ['human']. Case insensitive."], "error_type": "value_error:string", "prompt": {"id": "simple_55", "question": [[{"role": "user", "content": "Find me detailed information about the structure of human cell"}]], "function": [{"name": "biology.get_cell_info", "description": "Retrieve information about the structure and functioning of a specified type of cell", "parameters": {"type": "dict", "properties": {"cell_type": {"type": "string", "description": "Type of cell you want information about"}, "detailed": {"type": "boolean", "description": "Indicate if you want a detailed description of the cell", "default": "false"}}, "required": ["cell_type"]}}]}, "model_result_raw": "\n[biology.get_cell_info(cell_type=\"human cell\", detailed=True)]", "model_result_decoded": [{"biology.get_cell_info": {"cell_type": "human cell", "detailed": true}}], "possible_answer": [{"biology.get_cell_info": {"cell_type": ["human"], "detailed": [true]}}]}
{"id": "simple_82", "model_name": "Qwen3-8B", "test_category": "simple", "valid": false, "error": ["Nested type checking failed for parameter 'numbers'. Expected outer type array with inner type <class 'float'>. Parameter value: [12, 15, 18, 20, 21, 26, 30]."], "error_type": "type_error:nested", "prompt": {"id": "simple_82", "question": [[{"role": "user", "content": "Calculate the average of list of integers [12, 15, 18, 20, 21, 26, 30]."}]], "function": [{"name": "calculate_average", "description": "Calculates the average of a list of numbers.", "parameters": {"type": "dict", "properties": {"numbers": {"type": "array", "items": {"type": "float"}, "description": "The list of numbers to calculate the average of."}}, "required": ["numbers"]}}]}, "model_result_raw": "\n[calculate_average(numbers=[12, 15, 18, 20, 21, 26, 30])]", "model_result_decoded": [{"calculate_average": {"numbers": [12, 15, 18, 20, 21, 26, 30]}}], "possible_answer": [{"calculate_average": {"numbers": [[12.0, 15.0, 18.0, 20.0, 21.0, 26.0, 30.0]]}}]}
{"id": "simple_87", "model_name": "Qwen3-8B", "test_category": "simple", "valid": false, "error": ["Nested type checking failed for parameter 'list'. Expected outer type array with inner type <class 'float'>. Parameter value: [5, 3, 4, 1, 2]."], "error_type": "type_error:nested", "prompt": {"id": "simple_87", "question": [[{"role": "user", "content": "Sort the list [5, 3, 4, 1, 2] in ascending order."}]], "function": [{"name": "array_sort", "description": "Sorts a given list in ascending or descending order.", "parameters": {"type": "dict", "properties": {"list": {"type": "array", "items": {"type": "float"}, "description": "The list of numbers to be sorted."}, "order": {"type": "string", "enum": ["ascending", "descending"], "description": "Order of sorting."}}, "required": ["list", "order"]}}]}, "model_result_raw": "\n[array_sort(list=[5, 3, 4, 1, 2], order='ascending')]", "model_result_decoded": [{"array_sort": {"list": [5, 3, 4, 1, 2], "order": "ascending"}}], "possible_answer": [{"array_sort": {"list": [[5.0, 3.0, 4.0, 1.0, 2.0]], "order": ["ascending"]}}]}
{"id": "simple_89", "model_name": "Qwen3-8B", "test_category": "simple", "valid": false, "error": ["Invalid syntax. Failed to decode AST. invalid syntax. Perhaps you forgot a comma? (<unknown>, line 1)"], "error_type": "ast_decoder:decoder_failed", "prompt": {"id": "simple_89", "question": [[{"role": "user", "content": "Fetch all records for students studying Science in 'Bluebird High School' from the StudentDB."}]], "function": [{"name": "db_fetch_records", "description": "Fetch records from a specified database table based on certain conditions.", "parameters": {"type": "dict", "properties": {"database_name": {"type": "string", "description": "The name of the database."}, "table_name": {"type": "string", "description": "The name of the table from which records need to be fetched."}, "conditions": {"type": "dict", "properties": {"department": {"type": "string", "description": "The name of the department of students."}, "school": {"type": "string", "description": "The name of the school students are enrolled in."}}, "description": "The conditions based on which records are to be fetched."}, "fetch_limit": {"type": "integer", "description": "Limits the number of records to be fetched. Default is 0, which means no limit."}}, "required": ["database_name", "table_name", "conditions"]}}]}, "model_result_raw": "\nThe required parameter 'table_name' is missing from the question. Please provide the name of the table from which to fetch the records.", "possible_answer": [{"db_fetch_records": {"database_name": ["StudentDB"], "table_name": ["students"], "conditions": [{"department": ["Science"], "school": ["Bluebird High School", "Bluebird HS"]}], "fetch_limit": ["", 0]}}]}
{"id": "simple_103", "model_name": "Qwen3-8B", "test_category": "simple", "valid": false, "error": ["Nested type checking failed for parameter 'polynomial'. Expected outer type array with inner type <class 'float'>. Parameter value: [3, 2, -4]."], "error_type": "type_error:nested", "prompt": {"id": "simple_103", "question": [[{"role": "user", "content": "Calculate the area under the curve y=3x^2 + 2x - 4, between x = -1 and x = 2."}]], "function": [{"name": "mathematics.calculate_area_under_curve", "description": "Calculate the area under the curve for a given polynomial function within a specified interval.", "parameters": {"type": "dict", "properties": {"polynomial": {"type": "array", "items": {"type": "float"}, "description": "The coefficients of the polynomial, in decreasing order of exponent, where the first element is the coefficient for x^n, the second element is the coefficient for x^(n-1), and so on. The last element is the constant term."}, "limits": {"type": "array", "items": {"type": "float"}, "description": "A list of two numbers specifying the lower and upper limit for the integration interval."}}, "required": ["polynomial", "limits"]}}]}, "model_result_raw": "\n[mathematics.calculate_area_under_curve(polynomial=[3, 2, -4], limits=[-1, 2])]", "model_result_decoded": [{"mathematics.calculate_area_under_curve": {"polynomial": [3, 2, -4], "limits": [-1, 2]}}], "possible_answer": [{"mathematics.calculate_area_under_curve": {"polynomial": [[3.0, 2.0, -4.0]], "limits": [[-1.0, 2.0]]}}]}
{"id": "simple_136", "model_name": "Qwen3-8B", "test_category": "simple", "valid": false, "error": ["Invalid value for parameter 'annual_rate': 0.05. Expected one of [5.0]."], "error_type": "value_error:others", "prompt": {"id": "simple_136", "question": [[{"role": "user", "content": "Find the compound interest for an investment of $10000 with an annual interest rate of 5% compounded monthly for 5 years."}]], "function": [{"name": "compound_interest", "description": "Calculate compound interest for a certain time period.", "parameters": {"type": "dict", "properties": {"principal": {"type": "integer", "description": "The initial amount of money that was invested or loaned out."}, "annual_rate": {"type": "float", "description": "The interest rate for a year as a percentage."}, "compounding_freq": {"type": "string", "enum": ["monthly", "quarterly", "annually"], "description": "The number of times that interest is compounded per unit period."}, "time_in_years": {"type": "integer", "description": "The time the money is invested for in years."}}, "required": ["principal", "annual_rate", "compounding_freq", "time_in_years"]}}]}, "model_result_raw": "\n[compound_interest(principal=10000, annual_rate=0.05, compounding_freq='monthly', time_in_years=5)]", "model_result_decoded": [{"compound_interest": {"principal": 10000, "annual_rate": 0.05, "compounding_freq": "monthly", "time_in_years": 5}}], "possible_answer": [{"compound_interest": {"principal": [10000], "annual_rate": [5.0], "compounding_freq": ["monthly"], "time_in_years": [5]}}]}
{"id": "simple_194", "model_name": "Qwen3-8B", "test_category": "simple", "valid": false, "error": ["Invalid value for parameter 'slope_type': 'hill slope'. Expected one of ['hill', 'steep', 'moderate']. Case insensitive."], "error_type": "value_error:string", "prompt": {"id": "simple_194", "question": [[{"role": "user", "content": "What are the top three plants suitable for a hill slope in terms of erosion prevention?"}]], "function": [{"name": "get_plants_for_slope", "description": "Retrieve the list of plants suitable for slope based on erosion control ability.", "parameters": {"type": "dict", "properties": {"slope_type": {"type": "string", "description": "The type of slope like steep, moderate etc."}, "num_results": {"type": "integer", "description": "The number of top results needed. Default is 5."}}, "required": ["slope_type", "num_results"]}}]}, "model_result_raw": "\n[get_plants_for_slope(slope_type=\"hill slope\", num_results=3)]", "model_result_decoded": [{"get_plants_for_slope": {"slope_type": "hill slope", "num_results": 3}}], "possible_answer": [{"get_plants_for_slope": {"slope_type": ["hill", "steep", "moderate"], "num_results": [3]}}]}
{"id": "simple_234", "model_name": "Qwen3-8B", "test_category": "simple", "valid": false, "error": ["Invalid syntax. Failed to decode AST. unterminated string literal (detected at line 1) (<unknown>, line 1)"], "error_type": "ast_decoder:decoder_failed", "prompt": {"id": "simple_234", "question": [[{"role": "user", "content": "Find important Wars in European history during the 19th century."}]], "function": [{"name": "history_eu.fetch_events", "description": "Fetches significant historical events within a specific time period in European history.", "parameters": {"type": "dict", "properties": {"century": {"type": "integer", "description": "The century you are interested in."}, "region": {"type": "string", "description": "The region of Europe you are interested in.", "enum": ["Northern", "Southern", "Eastern", "Western"]}, "category": {"type": "string", "description": "Category of the historical events. Default is 'Culture'.", "enum": ["Wars", "Culture", "Politics", "Scientific", "Others"]}}, "required": ["century", "region"]}}]}, "model_result_raw": "\nThe user's query lacks the required 'region' parameter for the function. The function requires specifying a region (Northern, Southern, Eastern, Western) in addition to the century. Since the user did not provide a region, the function cannot be called as is. Please specify a region within Europe to proceed.", "possible_answer": [{"history_eu.fetch_events": {"century": [19], "region": ["Northern", "Southern", "Eastern", "Western"], "category": ["Wars"]}}]}
{"id": "simple_267", "model_name": "Qwen3-8B", "test_category": "simple", "valid": false, "error": ["Invalid value for parameter 'month': 'upcoming'. Expected one of ['']. Case insensitive."], "error_type": "value_error:string", "prompt": {"id": "simple_267", "question": [[{"role": "user", "content": "Find the top rated modern sculpture exhibition happening in New York in the upcoming month."}]], "function": [{"name": "find_exhibition", "description": "Locate the most popular exhibitions based on criteria like location, time, art form, and user ratings.", "parameters": {"type": "dict", "properties": {"location": {"type": "string", "description": "The city and state where the exhibition is held, e.g., New York City, NY."}, "art_form": {"type": "string", "description": "The form of art the exhibition is displaying e.g., sculpture."}, "month": {"type": "string", "description": "The month of exhibition. Default value will return upcoming events if not specified."}, "user_ratings": {"type": "string", "enum": ["low", "average", "high"], "description": "Select exhibitions with user rating threshold. Default is 'low'"}}, "required": ["location", "art_form"]}}]}, "model_result_raw": "\n[find_exhibition(location=\"New York City, NY\", art_form=\"modern sculpture\", user_ratings=\"high\", month=\"upcoming\")]", "model_result_decoded": [{"find_exhibition": {"location": "New York City, NY", "art_form": "modern sculpture", "user_ratings": "high", "month": "upcoming"}}], "possible_answer": [{"find_exhibition": {"location": ["New York City, NY"], "art_form": ["sculpture", "modern sculpture"], "month": [""], "user_ratings": ["high"]}}]}
{"id": "simple_304", "model_name": "Qwen3-8B", "test_category": "simple", "valid": false, "error": ["Missing required parameter: 'team'."], "error_type": "simple_function_checker:missing_required", "prompt": {"id": "simple_304", "question": [[{"role": "user", "content": "Get point and rebound stats for player 'LeBron James' from last basketball game"}]], "function": [{"name": "player_stats.getLastGame", "description": "Get last game statistics for a specific player in basketball", "parameters": {"type": "dict", "properties": {"player_name": {"type": "string", "description": "The name of the basketball player."}, "team": {"type": "string", "description": "The team that player currently plays for."}, "metrics": {"type": "array", "items": {"type": "string", "enum": ["Points", "Rebounds", "Assists", "Blocks"]}, "description": "Specific metrics to retrieve. If no value is specified, all available metrics will be returned by default."}}, "required": ["player_name", "team"]}}]}, "model_result_raw": "\n[player_stats.getLastGame(player_name='LeBron James', metrics=['Points', 'Rebounds'])]", "model_result_decoded": [{"player_stats.getLastGame": {"player_name": "LeBron James", "metrics": ["Points", "Rebounds"]}}], "possible_answer": [{"player_stats.getLastGame": {"player_name": ["LeBron James"], "team": ["Los Angeles Lakers", "LAL", "Lakers"], "metrics": [["Points", "Rebounds"]]}}]}
{"id": "simple_352", "model_name": "Qwen3-8B", "test_category": "simple", "valid": false, "error": ["Missing required parameter: 'platform'."], "error_type": "simple_function_checker:missing_required", "prompt": {"id": "simple_352", "question": [[{"role": "user", "content": "Get the average user score for the game 'The Legend of Zelda: Breath of the Wild' from GameSpot."}]], "function": [{"name": "gamespot.getAverageUserScore", "description": "Retrieve the average user score of a game from GameSpot.", "parameters": {"type": "dict", "properties": {"game_name": {"type": "string", "description": "The name of the game."}, "platform": {"type": "string", "description": "The platform the game was released on (e.g., Nintendo Switch, PS5, etc.)", "default": "all platforms"}}, "required": ["game_name", "platform"]}}]}, "model_result_raw": "\n[gamespot.getAverageUserScore(game_name='The Legend of Zelda: Breath of the Wild')]", "model_result_decoded": [{"gamespot.getAverageUserScore": {"game_name": "The Legend of Zelda: Breath of the Wild"}}], "possible_answer": [{"gamespot.getAverageUserScore": {"game_name": ["The Legend of Zelda: Breath of the Wild"], "platform": ["Nintendo Switch", "all platforms"]}}]}
{"id": "simple_367", "model_name": "Qwen3-8B", "test_category": "simple", "valid": false, "error": ["Invalid value for parameter 'recipe_type': 'brownies'. Expected one of ['dessert']. Case insensitive."], "error_type": "value_error:string", "prompt": {"id": "simple_367", "question": [[{"role": "user", "content": "Find me a vegan recipe for brownies which prep time is under 30 minutes."}]], "function": [{"name": "find_recipe", "description": "Find a recipe based on the dietary restrictions, recipe type, and time constraints.", "parameters": {"type": "dict", "properties": {"dietary_restrictions": {"type": "string", "description": "Dietary restrictions e.g. vegan, vegetarian, gluten free, dairy free."}, "recipe_type": {"type": "string", "description": "Type of the recipe. E.g. dessert, main course, breakfast."}, "time": {"type": "integer", "description": "Time limit in minutes to prep the meal."}}, "required": ["dietary_restrictions", "recipe_type", "time"]}}]}, "model_result_raw": "\n[find_recipe(dietary_restrictions=\"vegan\", recipe_type=\"brownies\", time=30)]", "model_result_decoded": [{"find_recipe": {"dietary_restrictions": "vegan", "recipe_type": "brownies", "time": 30}}], "possible_answer": [{"find_recipe": {"dietary_restrictions": ["vegan"], "recipe_type": ["dessert"], "time": [30]}}]}
{"id": "simple_372", "model_name": "Qwen3-8B", "test_category": "simple", "valid": false, "error": ["Invalid value for parameter 'product': 'banana'. Expected one of ['bananas']. Case insensitive."], "error_type": "value_error:string", "prompt": {"id": "simple_372", "question": [[{"role": "user", "content": "Find the top five organic bananas brands on the basis of rating from Whole Foods store."}]], "function": [{"name": "whole_foods.find_top_brands", "description": "Get top brands based on a specific product from Whole Foods", "parameters": {"type": "dict", "properties": {"product": {"type": "string", "description": "The product for which the top brands should be fetched."}, "number": {"type": "integer", "description": "Number of top brands to be fetched. Default is 5"}, "organic": {"type": "boolean", "description": "If the product should be organic. Default is false"}}, "required": ["product"]}}]}, "model_result_raw": "\n[whole_foods.find_top_brands(product=\"banana\", number=5, organic=True)]", "model_result_decoded": [{"whole_foods.find_top_brands": {"product": "banana", "number": 5, "organic": true}}], "possible_answer": [{"whole_foods.find_top_brands": {"product": ["bananas"], "number": [5, ""], "organic": [true]}}]}
{"id": "simple_373", "model_name": "Qwen3-8B", "test_category": "simple", "valid": false, "error": ["Invalid value for parameter 'pack_size': [12]. Expected one of [[1, 1, 12]]."], "error_type": "value_error:list/tuple", "prompt": {"id": "simple_373", "question": [[{"role": "user", "content": "I want to buy apples, rice, and 12 pack of bottled water from a Walmart near San Jose. Show me the product information and stock availability."}]], "function": [{"name": "walmart.purchase", "description": "Retrieve information of items from Walmart including stock availability.", "parameters": {"type": "dict", "properties": {"loc": {"type": "string", "description": "Location of the nearest Walmart."}, "product_list": {"type": "array", "items": {"type": "string"}, "description": "Items to be purchased listed in an array."}, "pack_size": {"type": "array", "items": {"type": "integer"}, "description": "Size of the product pack if applicable. The size of the array should be equal to product_list. Default is not use it if not specified."}}, "required": ["loc", "product_list"]}}]}, "model_result_raw": "\n[walmart.purchase(loc=\"San Jose\", product_list=[\"apples\", \"rice\", \"bottled water\"], pack_size=[12])]", "model_result_decoded": [{"walmart.purchase": {"loc": "San Jose", "product_list": ["apples", "rice", "bottled water"], "pack_size": [12]}}], "possible_answer": [{"walmart.purchase": {"loc": ["San Jose", "San Jose, CA"], "product_list": [["apples", "rice", "bottled water"], ["apples", "rice", "water"]], "pack_size": [[1, 1, 12]]}}]}
{"id": "simple_382", "model_name": "Qwen3-8B", "test_category": "simple", "valid": false, "error": ["Invalid value for parameter 'start_date': '10th December 2022'. Expected one of ['2022-12-10', '10/12/2022', 'Dec 10, 2022', 'December 10, 2022']. Case insensitive."], "error_type": "value_error:string", "prompt": {"id": "simple_382", "question": [[{"role": "user", "content": "Book a single room for two nights at the Hilton Hotel in Chicago, starting from 10th December 2022."}]], "function": [{"name": "book_hotel", "description": "Book a room of specified type for a particular number of nights at a specific hotel, starting from a specified date.", "parameters": {"type": "dict", "properties": {"hotel_name": {"type": "string", "description": "The name of the hotel."}, "location": {"type": "string", "description": "The city in which the hotel is located."}, "room_type": {"type": "string", "description": "The type of room to be booked."}, "start_date": {"type": "string", "description": "The start date for the booking."}, "nights": {"type": "integer", "description": "The number of nights for which the booking is to be made."}}, "required": ["hotel_name", "location", "room_type", "start_date", "nights"]}}]}, "model_result_raw": "\n[book_hotel(hotel_name=\"Hilton Hotel\", location=\"Chicago\", room_type=\"single\", start_date=\"10th December 2022\", nights=2)]", "model_result_decoded": [{"book_hotel": {"hotel_name": "Hilton Hotel", "location": "Chicago", "room_type": "single", "start_date": "10th December 2022", "nights": 2}}], "possible_answer": [{"book_hotel": {"hotel_name": ["Hilton Hotel", "Hilton"], "location": ["Chicago"], "room_type": ["single"], "start_date": ["2022-12-10", "10/12/2022", "Dec 10, 2022", "December 10, 2022"], "nights": [2]}}]}