{"accuracy": 0.7222222222222222, "correct_count": 13, "total_count": 18}
{"id": "live_relevance_3-3-0", "model_name": "D-CORE-14B", "test_category": "live_relevance", "valid": false, "error": ["Invalid syntax. Failed to decode AST when it should have. invalid syntax (<unknown>, line 1)"], "error_type": "relevance_error:decoder_failed", "prompt": {"id": "live_relevance_3-3-0", "question": [[{"role": "user", "content": "Could you tell me the current temperature in New York, using its latitude and longitude coordinates?"}]], "function": [{"name": "open_meteo_api.fetch_weather_data", "description": "Fetches the current temperature from the Open-Meteo API for a specified location using latitude and longitude coordinates.", "parameters": {"type": "dict", "required": ["coordinates"], "properties": {"coordinates": {"type": "tuple", "description": "A tuple containing the latitude and longitude of the location for which the weather data is requested. Latitude and longitude should be in decimal degrees.", "items": {"type": "float"}}, "units": {"type": "string", "description": "The unit system in which the temperature will be returned.", "enum": ["Celsius", "Fahrenheit", "Kelvin"], "default": "Celsius"}}}}]}, "model_result": "\nThe question does not provide the latitude and longitude coordinates for New York. Please provide the coordinates to fetch the current temperature.", "decoded_result": null}
{"id": "live_relevance_6-6-0", "model_name": "D-CORE-14B", "test_category": "live_relevance", "valid": false, "error": ["Invalid syntax. Failed to decode AST when it should have. invalid syntax (<unknown>, line 1)"], "error_type": "relevance_error:decoder_failed", "prompt": {"id": "live_relevance_6-6-0", "question": [[{"role": "system", "content": "You are a bot owned by a courier service, which helps people track the status of their parcels & packages, the status of their complaints, and submit new complaints. The courier service is named \"Tipax\"\nYou are expected to chat in a friendly and polite way with the users when resolving their requests and issues. You should detect the user's intent from the chat and based on it, perform some tasks, answer some questions, or hand over the chat to a human agent.  \nThese are the tasks that you can perform for the user based on the intent:\n\n1. Track the status of their parcels: ask for the parcel tracking number, and then call the function: \"GET_PARCEL_STATE\" providing the tracking number. Then explain the state politely to the user based on the result of the function calling.\n\n2. track complaint state: Ask for the user's phone number and complaint tracking number, and then call the function: \"GET_COMPLAINT_STATE\" providing the phone number and tracking number. Then explain the status politely to the user based on the result of the function calling.  \n\n 3. Submit a complaint:listen to the user's complaints and detect the subject of their complaint, ask for their phone number, their name,  and their parcel tracking number, and then call the function: \"SUBMIT_COMPLAINT\" providing the parameters. The user must be informed that the complaint has been submitted and she will receive an SMS.\n\n4. If you are not certain about user intent, or you can not answer the user question, or you receive any error in function calls, call the \"HANDOVER\" function.\n\n- Do not respond to irrelevant messages and if someone asks irrelevant questions call the \"HANDOVER\" function"}, {"role": "user", "content": "I have forgotten my parcel tracking number! What  should I do?"}]], "function": [{"name": "GET_PARCEL_STATE", "description": "Determines the current state of a parcel based on its tracking ID.", "parameters": {"type": "dict", "required": ["parcelTrackingId"], "properties": {"parcelTrackingId": {"type": "string", "description": "The unique identifier for the parcel, used to track its status."}, "includeHistory": {"type": "boolean", "description": "Specifies whether to include the parcel's state history in the response.", "default": false}}}}, {"name": "submit_complaint", "description": "Submit a complaint for a user who is experiencing issues with parcel delivery, such as non-delivery or incorrect data entry.", "parameters": {"type": "dict", "required": ["userName", "userMobileNumber", "parcelTrackingId", "subject"], "properties": {"userName": {"type": "string", "description": "The user's full name in Persian script."}, "userMobileNumber": {"type": "string", "description": "The user's mobile number, expected to be in the format +[country code][number] (e.g., +989123456789)."}, "parcelTrackingId": {"type": "string", "description": "The unique tracking number of the parcel in question."}, "subject": {"type": "string", "description": "The subject of the complaint.", "enum": ["NON_DELIVERY", "INCORRECT_DATA_ENTRY", "LACK_OF_NOTIFICATION", "INAPPROPRIATE_BEHAVIOR", "DELAYED_PAYMENT", "ADDITIONAL_FEE", "ONLINE_LINK", "DELAY_COMPLAINT", "PACKAGE_LOSS", "PACKAGE_DAMAGE", "PACKAGE_CONTENT_DEDUCTION", "RECIPIENT_DISCREPANCY", "DELAYED_LOADS", "INCORRECT_DIMENSION", "INCORRECT_ADDRESS", "INCORRECT_INFORMATION", "NON_REGISTRATION", "DELAY_IN_COLLECTION", "DELAY_IN_SENDING", "DELAY_IN_DISTRIBUTION", "REFUND", "DELAY_NON_COLLECTION"]}, "statement": {"type": "string", "description": "A detailed description of the issue from the user's perspective.", "default": ""}}}}, {"name": "GET_COMPLAINT_STATE", "description": "Retrieves the current state of a user's complaint using the complaint tracking ID.", "parameters": {"type": "dict", "required": ["userMobileNumber", "complaintTrackingId"], "properties": {"userMobileNumber": {"type": "string", "description": "The mobile number of the user, formatted as a string without spaces or special characters (e.g., '1234567890')."}, "complaintTrackingId": {"type": "string", "description": "A unique identifier for the complaint, used to track the status of the complaint."}}}}, {"name": "handover_to_agent", "description": "This function initiates a handover process to transfer the ongoing chat session from a virtual assistant to a human agent.", "parameters": {"type": "dict", "required": ["agent_id", "session_id"], "properties": {"agent_id": {"type": "string", "description": "The unique identifier of the human agent to whom the chat will be transferred."}, "session_id": {"type": "string", "description": "The session identifier of the current chat that needs to be handed over."}, "priority": {"type": "string", "description": "The priority level for the handover request.", "enum": ["low", "medium", "high"], "default": "medium"}, "message": {"type": "string", "description": "An optional message from the virtual assistant to the human agent.", "default": ""}}}}]}, "model_result": "\nThe user has forgotten their parcel tracking number, which is required to proceed with any parcel state inquiry. None of the provided functions can retrieve or recover a forgotten tracking number. The available functions require the tracking number as a parameter, which is not accessible here.", "decoded_result": null}
{"id": "live_relevance_13-13-0", "model_name": "D-CORE-14B", "test_category": "live_relevance", "valid": false, "error": ["Invalid syntax. Failed to decode AST when it should have. invalid syntax (<unknown>, line 1)"], "error_type": "relevance_error:decoder_failed", "prompt": {"id": "live_relevance_13-13-0", "question": [[{"role": "user", "content": "I have a plan for a short trip for which I need to find for a Train to travel. Can you find me the one in Portland, OR."}]], "function": [{"name": "Trains_1_GetTrainTickets", "description": "Reserve train tickets for a specific journey between two cities on a given date and time, with options for trip protection and class of service.", "parameters": {"type": "dict", "required": ["_from", "to", "date_of_journey", "journey_start_time"], "properties": {"_from": {"type": "string", "description": "The departure city for the train journey, such as 'New York, NY'."}, "to": {"type": "string", "description": "The destination city for the train journey, such as 'Los Angeles, CA'."}, "date_of_journey": {"type": "string", "description": "The date of the train journey, in the format 'MM/DD/YYYY'."}, "journey_start_time": {"type": "string", "description": "The departure time of the train, in 24-hour format 'HH:MM'."}, "number_of_adults": {"type": "integer", "description": "The number of adult passengers to reserve train tickets for.", "default": 1}, "trip_protection": {"type": "boolean", "description": "Indicates whether to add trip protection to the reservation, which incurs an additional fee.", "default": false}, "_class": {"type": "string", "description": "The fare class for the train reservation.", "enum": ["Value", "Flexible", "Business"], "default": "Value"}}}}, {"name": "Trains_1_FindTrains", "description": "Find trains going to a specified destination city on a given date, with options for fare class and number of adult passengers.", "parameters": {"type": "dict", "required": ["_from", "to", "date_of_journey"], "properties": {"_from": {"type": "string", "description": "Starting city for the train journey, in the format of 'City, State', such as 'San Francisco, CA'."}, "to": {"type": "string", "description": "Destination city for the train journey, in the format of 'City, State', such as 'Los Angeles, CA'."}, "date_of_journey": {"type": "string", "description": "Date of the train journey, in the format 'YYYY-MM-DD', such as '2023-04-15'."}, "_class": {"type": "string", "description": "Fare class for the train reservation.", "enum": ["Value", "Flexible", "Business"], "default": "Value"}, "number_of_adults": {"type": "integer", "description": "Number of adult passengers to reserve train tickets for.", "default": 1}}}}]}, "model_result": "\nThe function call cannot be made yet because the required parameters are missing. Please provide the starting city (_from) and the date of the journey (date_of_journey). Additionally, let me know if you have any preferences for fare class or the number of adult passengers.", "decoded_result": null}
{"id": "live_relevance_14-14-0", "model_name": "D-CORE-14B", "test_category": "live_relevance", "valid": false, "error": ["Invalid syntax. Failed to decode AST when it should have. invalid syntax (<unknown>, line 1)"], "error_type": "relevance_error:decoder_failed", "prompt": {"id": "live_relevance_14-14-0", "question": [[{"role": "user", "content": "Could you help me get tickets for an IMAX movie at 3rd Street Cinema for this Saturday evening 2023.10.1?"}]], "function": [{"name": "Movies_1_BuyMovieTickets", "description": "This function facilitates the purchase of movie tickets for a specified show, allowing for selection of the movie, number of tickets, show date, location, and show type.", "parameters": {"type": "dict", "required": ["movie_name", "number_of_tickets", "show_date", "location", "show_time"], "properties": {"movie_name": {"type": "string", "description": "The title of the movie for which tickets are being purchased."}, "number_of_tickets": {"type": "integer", "description": "The total count of tickets to be bought.", "enum": [1, 2, 3, 4, 5, 6, 7, 8, 9]}, "show_date": {"type": "string", "description": "The date of the movie showing, in the format of 'YYYY-MM-DD'."}, "location": {"type": "string", "description": "The location of the theater, in the format of 'City, State', such as 'Los Angeles, CA'."}, "show_time": {"type": "string", "description": "The start time of the movie showing, in 24-hour format 'HH:MM'."}, "show_type": {"type": "string", "description": "The format in which the movie is being shown.", "enum": ["regular", "3d", "imax"], "default": "regular"}}}}, {"name": "Movies_1_FindMovies", "description": "Search for movies based on specific criteria such as location, genre, and show type.", "parameters": {"type": "dict", "required": ["location"], "properties": {"location": {"type": "string", "description": "The city where the theater is located, in the format of 'City, State', such as 'Berkeley, CA' or 'New York, NY'."}, "theater_name": {"type": "string", "description": "The name of the theater. If not provided, all theaters are considered.", "default": "dontcare"}, "genre": {"type": "string", "description": "The genre of the movie.", "enum": ["World", "Offbeat", "Mystery", "Supernatural", "Horror", "Animation", "Sci-fi", "Documentary", "Drama", "War", "Family", "Action", "dontcare"], "default": "dontcare"}, "show_type": {"type": "string", "description": "The format of the movie show such as regular, 3D, or IMAX.", "enum": ["regular", "3d", "imax", "dontcare"], "default": "dontcare"}}}}, {"name": "Movies_1_GetTimesForMovie", "description": "Retrieve available showtimes for a specific movie at a given theater location on a specified date.", "parameters": {"type": "dict", "required": ["movie_name", "location", "show_date"], "properties": {"movie_name": {"type": "string", "description": "The title of the movie for which showtimes are being requested."}, "location": {"type": "string", "description": "The city in which the theater is located, in the format of 'City, State', such as 'Berkeley, CA' and 'New York, NY'."}, "show_date": {"type": "string", "description": "The date for which to retrieve showtimes, in the format 'YYYY-MM-DD'."}, "theater_name": {"type": "string", "description": "The name of the theater where the movie is showing. If not specified, showtimes for all theaters are considered.", "default": "All Theaters"}, "show_type": {"type": "string", "description": "The format of the movie showing, such as 'regular', '3D', or 'IMAX'.", "enum": ["regular", "3d", "imax"], "default": "regular"}}}}, {"name": "Restaurants_2_ReserveRestaurant", "description": "Make a table reservation at a specified restaurant for a given number of guests at a particular date and time.", "parameters": {"type": "dict", "required": ["restaurant_name", "location", "time", "date"], "properties": {"restaurant_name": {"type": "string", "description": "The full name of the restaurant where the reservation is to be made."}, "location": {"type": "string", "description": "The location of the restaurant, in the format of 'City, State', such as 'New York, NY' or 'San Francisco, CA'."}, "time": {"type": "string", "description": "The desired time for the reservation, in 24-hour format 'HH:MM', such as '19:00' for 7 PM."}, "number_of_guests": {"type": "integer", "description": "The number of guests for the reservation.", "default": 2}, "date": {"type": "string", "description": "The date for which the reservation is made, in ISO 8601 format 'YYYY-MM-DD', such as '2023-04-15'."}}}}, {"name": "Restaurants_2_FindRestaurants", "description": "Find restaurants by location and by category, taking into account optional preferences such as price range, vegetarian options, and outdoor seating availability.", "parameters": {"type": "dict", "required": ["category", "location"], "properties": {"category": {"type": "string", "description": "The category of food offered by the restaurant, such as 'Mexican', 'Italian', or 'Japanese'.", "enum": ["Mexican", "Bistro", "Izakaya", "Brunch", "Thai", "Sandwich", "Seafood", "Barbecue", "European", "Steakhouse", "Vietnamese", "Asian", "Coffeehouse", "American", "Gastropub", "Austrian", "Italian", "Indian", "Spanish", "Vegetarian", "Brasserie", "Chinese", "Breakfast", "Greek", "California", "Tapas", "Take-out", "Japanese"]}, "location": {"type": "string", "description": "The location of the restaurant, in the format of 'City, State', such as 'San Francisco, CA'."}, "price_range": {"type": "string", "description": "The price range for the restaurant, with 'dontcare' indicating no preference.", "enum": ["cheap", "moderate", "pricey", "ultra high-end", "dontcare"], "default": "dontcare"}, "has_vegetarian_options": {"type": "boolean", "description": "Flag indicating whether the restaurant offers vegetarian options.", "default": false}, "has_seating_outdoors": {"type": "boolean", "description": "Flag indicating whether the restaurant provides outdoor seating.", "default": false}}}}]}, "model_result": "\nThe required parameters for purchasing movie tickets are missing. Specifically, the movie name, exact location (city and state), and show time are needed. Please provide these details to proceed.", "decoded_result": null}
{"id": "live_relevance_15-15-0", "model_name": "D-CORE-14B", "test_category": "live_relevance", "valid": false, "error": ["Invalid syntax. Failed to decode AST when it should have. invalid syntax (<unknown>, line 1)"], "error_type": "relevance_error:decoder_failed", "prompt": {"id": "live_relevance_15-15-0", "question": [[{"role": "user", "content": "Can you help me reserve a train ticket from Sacramento? I'm looking for one seat and I'm flexible with the fare class."}]], "function": [{"name": "Hotels_2_BookHouse", "description": "Reserve the specified house for a set duration based on the number of adults and the check-in and check-out dates provided.", "parameters": {"type": "dict", "required": ["where_to", "check_in_date", "check_out_date"], "properties": {"where_to": {"type": "string", "description": "The location of the house to be booked, in the format of 'City, State', such as 'Berkeley, CA' or 'New York, NY'."}, "number_of_adults": {"type": "integer", "description": "The number of adults included in the reservation. Select 'dontcare' for no preference.", "enum": ["1", "2", "3", "4", "5", "dontcare"], "default": "dontcare"}, "check_in_date": {"type": "string", "description": "The start date for the reservation in the format 'YYYY-MM-DD'."}, "check_out_date": {"type": "string", "description": "The end date for the reservation in the format 'YYYY-MM-DD'."}}}}, {"name": "Hotels_2_SearchHouse", "description": "Search for a house at a specified location with filter options for laundry service, the number of adults, and rating.", "parameters": {"type": "dict", "required": ["where_to"], "properties": {"where_to": {"type": "string", "description": "The location of the house in the format of 'City, State', such as 'Berkeley, CA' or 'New York, NY'."}, "has_laundry_service": {"type": "boolean", "description": "Indicates if the house must have a laundry service available.", "enum": ["True", "False", "dontcare"], "default": "dontcare"}, "number_of_adults": {"type": "integer", "description": "The number of adults for the reservation. Use 'dontcare' to indicate no preference.", "enum": ["1", "2", "3", "4", "5", "dontcare"], "default": "dontcare"}, "rating": {"type": "float", "description": "The minimum review rating of the house from 1.0 to 5.0, with 5 being the highest. Use 'dontcare' to indicate no preference.", "default": "dontcare"}}}}, {"name": "Trains_1_GetTrainTickets", "description": "Reserves tickets for a train journey between specified cities on a given date and time, with options for the number of adults, trip protection, and fare class.", "parameters": {"type": "dict", "required": ["_from", "to", "date_of_journey", "journey_start_time", "number_of_adults", "trip_protection"], "properties": {"_from": {"type": "string", "description": "The starting city for the train journey, in the format 'City, State', such as 'Berkeley, CA' or 'New York, NY'."}, "to": {"type": "string", "description": "The destination city for the train journey, in the format 'City, State', such as 'Berkeley, CA' or 'New York, NY'."}, "date_of_journey": {"type": "string", "description": "The date of the train journey, in the format 'YYYY-MM-DD'."}, "journey_start_time": {"type": "string", "description": "The start time of the train journey, in 24-hour format 'HH:MM'."}, "number_of_adults": {"type": "integer", "description": "The number of adults to reserve train tickets for."}, "trip_protection": {"type": "boolean", "description": "Indicates whether to add trip protection to the reservation for an additional fee."}, "_class": {"type": "string", "description": "The fare class for the train reservation.", "enum": ["Value", "Flexible", "Business"], "default": "Value"}}}}, {"name": "Trains_1_FindTrains", "description": "Find trains to a given destination city, providing information about available train services for the specified journey.", "parameters": {"type": "dict", "required": ["_from", "to", "date_of_journey"], "properties": {"_from": {"type": "string", "description": "The name of the starting city for the train journey, such as 'New York, NY'."}, "to": {"type": "string", "description": "The name of the destination city for the train journey, such as 'Los Angeles, CA'."}, "date_of_journey": {"type": "string", "description": "The date of the train journey in the format 'MM/DD/YYYY'."}, "_class": {"type": "string", "description": "The fare class for the train reservation.", "enum": ["Value", "Flexible", "Business"], "default": "Value"}, "number_of_adults": {"type": "integer", "description": "The number of adults to reserve train tickets for. Must be an integer between 1 and 5.", "default": 1}}}}]}, "model_result": "\nThe request lacks necessary parameters such as the destination city, date of journey, and journey start time required to reserve a train ticket. Please provide these details to proceed.", "decoded_result": null}