{
  "evaluation_metadata": {
    "timestamp": "2025-08-05 19:40:03",
    "domain": "airline",
    "model": "Qwen/Qwen3-32B",
    "total_tasks_available": 50,
    "tasks_evaluated": 50
  },
  "performance_metrics": {
    "success_rate_percentage": 34.0,
    "successful_tasks": 17,
    "failed_tasks": 33,
    "average_turns_per_task": 20.92
  },
  "status_breakdown": {
    "completed_incorrectly": 32,
    "completed_successfully": 17,
    "incomplete": 1
  },
  "tool_usage_statistics": {
    "SearchDirectFlight": 46,
    "SearchOnestopFlight": 17,
    "GetUserDetails": 36,
    "BookReservation": 243,
    "GetReservationDetails": 117,
    "CancelReservation": 25,
    "Calculate": 15,
    "UpdateReservationCabin": 1,
    "UpdateReservationFlights": 17,
    "UpdateReservationBaggages": 31,
    "TransferToHumanAgents": 93,
    "Think": 37,
    "UpdateReservationPassengers": 2,
    "SendCertificate": 5
  },
  "detailed_results_file": "tau_bench_airline_Qwen_Qwen3_32B_20250805_174409.json"
}