import asyncio
import json
import time
import uuid
import sys
import os
from typing import Dict, List, Any, Optional
from dataclasses import dataclass, asdict
from pathlib import Path

# Ensure we can import coordinator module
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

# Import frontend components
from google.adk.events import Event
from google.adk.runners import Runner
from google.adk.sessions import InMemorySessionService
from google.genai import types
from google.adk import Agent

# Import coordinator
try:
    from coordinator_benchtest_medical import get_root_agent
except ImportError:
    print("Warning: Could not import coordinator_benchtest_medical. Using mock coordinator.")
    def get_root_agent():
        return None

@dataclass
class FunctionCall:
    """Function call - mimicking"""
    function: str
    args: Dict[str, Any]

@dataclass
class MedicalSearchTask:
    """Medical search task definition """
    id: str
    prompt: str
    ground_truth_output: str
    ground_truth_calls: List[FunctionCall]
    difficulty: str = "easy"
    comment: str = ""
    category: str = "basic"

@dataclass 
class TaskResult:
    """Task result"""
    task_id: str
    prompt: str
    response: str
    ground_truth_output: str
    utility_score: float  
    response_time: float
    error: Optional[str] = None
    function_calls_made: List[str] = None
    tool_calls_observed: List[Dict] = None
    a2a_calls_made: int = 0
    category: str = "basic"
    
    def __post_init__(self):
        if self.function_calls_made is None:
            self.function_calls_made = []
        if self.tool_calls_observed is None:
            self.tool_calls_observed = []

class MedicalSearchBenchmark:
    def __init__(self):
        self.tasks = self._create_medical_search_tasks()
        self.results: List[TaskResult] = []
        
        # Use same components as frontend
        self.APP_NAME = 'coordinator_app'
        self.USER_ID = 'benchmark_user'
        self.SESSION_ID = f'benchmark_session_{int(time.time())}'
        
        self.session_service = InMemorySessionService()
        
        self.coordinator_runner = None
        
    async def initialize_coordinator(self):
        """Initialize coordinator using coordinator_benchtest_medical.py for benchmark testing"""
        try:
            # Import the BENCHTEST coordinator components - same pattern as search_tasks.py
            from coordinator_benchtest_medical import MedicalCoordinatorAgent
            from google.adk.runners import Runner
            
            print("🔧 Initializing BENCHTEST coordinator (coordinator_benchtest_medical.py)...")
            
            # Create coordinator instance using coordinator_benchtest_medical.py logic - direct await
            coordinator_instance = await MedicalCoordinatorAgent.create(
                remote_agent_addresses=[
                    os.getenv('DOCTOR_SEARCH_AGENT_URL', 'http://localhost:10001'),  
                    os.getenv('PATIENT_SEARCH_AGENT_URL', 'http://localhost:10003'),
                    os.getenv('HOSPITAL_SEARCH_AGENT_URL', 'http://localhost:10002'),
                ]
            )
            
            # Create agent using coordinator_benchtest_medical.py create_agent() method
            coordinator_agent = coordinator_instance.create_agent()
            
            # Create runner using same configuration as frontend
            self.coordinator_runner = Runner(
                agent=coordinator_agent,
                app_name=self.APP_NAME,
                session_service=self.session_service,
            )
            print("✅ BENCHTEST Coordinator initialized successfully")
            print("🎯 Using coordinator_benchtest_medical.py for benchmark testing!")
            return self.coordinator_runner
            
        except Exception as e:
            print(f"❌ Failed to initialize coordinator: {e}")
            return None

    def _create_medical_search_tasks(self) -> List[MedicalSearchTask]:
        """Create 100 comprehensive medical search tasks covering patients, doctors, and hospitals"""
        return [
            # Task 1
            MedicalSearchTask(
                id="task_1",
                prompt="1. What is John Smith's nationality? 2. What is Dr. Sarah Wilson's specialty? 3. What is Mayo Clinic's established year?",
                comment="Simple questions about specific patient nationality, doctor specialty, and hospital established year",
                ground_truth_output="American\nCardiology\n1889",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is John Smith's nationality? 2. What is Dr. Sarah Wilson's specialty? 3. What is Mayo Clinic's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 2
            MedicalSearchTask(
                id="task_2",
                prompt="1. What is Maria Garcia's age? 2. What is Dr. Pierre Moreau's nationality? 3. What is Hôpital Pitié-Salpêtrière's bed capacity?",
                comment="Simple questions about specific patient age, doctor nationality, and hospital bed capacity",
                ground_truth_output="32\nFrench\n1800",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Maria Garcia's age? 2. What is Dr. Pierre Moreau's nationality? 3. What is Hôpital Pitié-Salpêtrière's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 3
            MedicalSearchTask(
                id="task_3",
                prompt="1. What is Yuki Tanaka's medical history? 2. What is Dr. Kenji Nakamura's hospital name? 3. What is National Cancer Center's city name?",
                comment="Simple questions about specific patient medical history, doctor hospital name, and hospital city",
                ground_truth_output="Migraine, Anxiety\nNational Cancer Center\nTokyo",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Yuki Tanaka's medical history? 2. What is Dr. Kenji Nakamura's hospital name? 3. What is National Cancer Center's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 4
            MedicalSearchTask(
                id="task_4",
                prompt="1. What is Ahmed Hassan's nationality? 2. What is Dr. Elena Rodriguez's specialty? 3. What is Hospital Sant Joan de Déu's departments?",
                comment="Simple questions about specific patient nationality, doctor specialty, and hospital departments",
                ground_truth_output="Egyptian\nPediatrics\nPediatrics, Neonatology, Pediatric Surgery, Child Psychiatry",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Ahmed Hassan's nationality? 2. What is Dr. Elena Rodriguez's specialty? 3. What is Hospital Sant Joan de Déu's departments?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 5
            MedicalSearchTask(
                id="task_5",
                prompt="1. What is Emma Johnson's age? 2. What is Dr. Michael Thompson's hospital name? 3. What is Royal London Hospital's established year?",
                comment="Simple questions about specific patient age, doctor hospital name, and hospital established year",
                ground_truth_output="67\nRoyal London Hospital\n1740",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Emma Johnson's age? 2. What is Dr. Michael Thompson's hospital name? 3. What is Royal London Hospital's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 6
            MedicalSearchTask(
                id="task_6",
                prompt="1. What is Carlos Rodriguez's medical history? 2. What is Dr. Fatima Al-Zahra's nationality? 3. What is Cairo University Hospital's bed capacity?",
                comment="Simple questions about specific patient medical history, doctor nationality, and hospital bed capacity",
                ground_truth_output="Gastritis, Depression\nEgyptian\n2000",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Carlos Rodriguez's medical history? 2. What is Dr. Fatima Al-Zahra's nationality? 3. What is Cairo University Hospital's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 7
            MedicalSearchTask(
                id="task_7",
                prompt="1. What is Li Wei's nationality? 2. What is Dr. Chen Wei's hospital name? 3. What is Peking Union Medical College Hospital's city name?",
                comment="Simple questions about specific patient nationality, doctor hospital name, and hospital city",
                ground_truth_output="Chinese\nPeking Union Medical College Hospital\nBeijing",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Li Wei's nationality? 2. What is Dr. Chen Wei's hospital name? 3. What is Peking Union Medical College Hospital's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 8
            MedicalSearchTask(
                id="task_8",
                prompt="1. What is Sophie Dubois's age? 2. What is Dr. Roberto Silva's specialty? 3. What is Hospital das Clínicas's established year?",
                comment="Simple questions about specific patient age, doctor specialty, and hospital established year",
                ground_truth_output="29\nDermatology\n1944",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Sophie Dubois's age? 2. What is Dr. Roberto Silva's specialty? 3. What is Hospital das Clínicas's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 9
            MedicalSearchTask(
                id="task_9",
                prompt="1. What is Raj Patel's medical history? 2. What is Dr. Priya Sharma's nationality? 3. What is All India Institute of Medical Sciences's bed capacity?",
                comment="Simple questions about specific patient medical history, doctor nationality, and hospital bed capacity",
                ground_truth_output="Type 1 Diabetes, Diabetic Retinopathy, Nephropathy\nIndian\n2500",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Raj Patel's medical history? 2. What is Dr. Priya Sharma's nationality? 3. What is All India Institute of Medical Sciences's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 10
            MedicalSearchTask(
                id="task_10",
                prompt="1. What is Anna Kowalski's nationality? 2. What is Dr. Hans Mueller's hospital name? 3. What is Charité - Universitätsmedizin Berlin's city name?",
                comment="Simple questions about specific patient nationality, doctor hospital name, and hospital city",
                ground_truth_output="Polish\nCharité - Universitätsmedizin Berlin\nBerlin",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Anna Kowalski's nationality? 2. What is Dr. Hans Mueller's hospital name? 3. What is Charité - Universitätsmedizin Berlin's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 11
            MedicalSearchTask(
                id="task_11",
                prompt="1. What is David Kim's age? 2. What is Dr. Maria Santos's specialty? 3. What is Instituto Nacional de Ciencias Médicas's established year?",
                comment="Simple questions about specific patient age, doctor specialty, and hospital established year",
                ground_truth_output="43\nGastroenterology\n1943",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is David Kim's age? 2. What is Dr. Maria Santos's specialty? 3. What is Instituto Nacional de Ciencias Médicas's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 12
            MedicalSearchTask(
                id="task_12",
                prompt="1. What is Isabella Silva's medical history? 2. What is Dr. Ahmed Khan's nationality? 3. What is Aga Khan University Hospital's departments?",
                comment="Simple questions about specific patient medical history, doctor nationality, and hospital departments",
                ground_truth_output="Lupus, Raynaud's Syndrome\nPakistani\nUrology, Surgery, Internal Medicine, Emergency Medicine, Cardiology",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Isabella Silva's medical history? 2. What is Dr. Ahmed Khan's nationality? 3. What is Aga Khan University Hospital's departments?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 13
            MedicalSearchTask(
                id="task_13",
                prompt="1. What is John Smith's name? 2. What is Dr. Sarah Wilson's name? 3. What is Mayo Clinic's name?",
                comment="Simple questions about specific patient name, doctor name, and hospital name",
                ground_truth_output="John Smith\nDr. Sarah Wilson\nMayo Clinic",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is John Smith's name? 2. What is Dr. Sarah Wilson's name? 3. What is Mayo Clinic's name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 14
            MedicalSearchTask(
                id="task_14",
                prompt="1. What is Maria Garcia's medical history? 2. What is Dr. Pierre Moreau's hospital name? 3. What is Hôpital Pitié-Salpêtrière's city name?",
                comment="Simple questions about specific patient medical history, doctor hospital name, and hospital city",
                ground_truth_output="Asthma, Seasonal Allergies\nHôpital Pitié-Salpêtrière\nParis",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Maria Garcia's medical history? 2. What is Dr. Pierre Moreau's hospital name? 3. What is Hôpital Pitié-Salpêtrière's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 15
            MedicalSearchTask(
                id="task_15",
                prompt="1. What is Yuki Tanaka's age? 2. What is Dr. Kenji Nakamura's specialty? 3. What is National Cancer Center's bed capacity?",
                comment="Simple questions about specific patient age, doctor specialty, and hospital bed capacity",
                ground_truth_output="28\nOncology\n900",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Yuki Tanaka's age? 2. What is Dr. Kenji Nakamura's specialty? 3. What is National Cancer Center's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 16
            MedicalSearchTask(
                id="task_16",
                prompt="1. What is Ahmed Hassan's nationality? 2. What is Dr. Elena Rodriguez's hospital name? 3. What is Hospital Sant Joan de Déu's departments?",
                comment="Simple questions about specific patient nationality, doctor hospital name, and hospital departments",
                ground_truth_output="Egyptian\nHospital Sant Joan de Déu\nPediatrics, Neonatology, Pediatric Surgery, Child Psychiatry",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Ahmed Hassan's nationality? 2. What is Dr. Elena Rodriguez's hospital name? 3. What is Hospital Sant Joan de Déu's departments?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 17
            MedicalSearchTask(
                id="task_17",
                prompt="1. What is Emma Johnson's medical history? 2. What is Dr. Michael Thompson's nationality? 3. What is Royal London Hospital's established year?",
                comment="Simple questions about specific patient medical history, doctor nationality, and hospital established year",
                ground_truth_output="Osteoporosis, Cataracts, Hip Replacement\nBritish\n1740",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Emma Johnson's medical history? 2. What is Dr. Michael Thompson's nationality? 3. What is Royal London Hospital's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 18
            MedicalSearchTask(
                id="task_18",
                prompt="1. What is Carlos Rodriguez's age? 2. What is Dr. Fatima Al-Zahra's specialty? 3. What is Cairo University Hospital's city name?",
                comment="Simple questions about specific patient age, doctor specialty, and hospital city",
                ground_truth_output="41\nEndocrinology\nCairo",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Carlos Rodriguez's age? 2. What is Dr. Fatima Al-Zahra's specialty? 3. What is Cairo University Hospital's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 19
            MedicalSearchTask(
                id="task_19",
                prompt="1. What is Li Wei's medical history? 2. What is Dr. Chen Wei's nationality? 3. What is Peking Union Medical College Hospital's bed capacity?",
                comment="Simple questions about specific patient medical history, doctor nationality, and hospital bed capacity",
                ground_truth_output="Chronic Kidney Disease, Anemia\nChinese\n1500",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Li Wei's medical history? 2. What is Dr. Chen Wei's nationality? 3. What is Peking Union Medical College Hospital's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 20
            MedicalSearchTask(
                id="task_20",
                prompt="1. What is Sophie Dubois's nationality? 2. What is Dr. Roberto Silva's hospital name? 3. What is Hospital das Clínicas's city name?",
                comment="Simple questions about specific patient nationality, doctor hospital name, and hospital city",
                ground_truth_output="French\nHospital das Clínicas\nSão Paulo",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Sophie Dubois's nationality? 2. What is Dr. Roberto Silva's hospital name? 3. What is Hospital das Clínicas's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),
            # Task 21
            MedicalSearchTask(
                id="task_21",
                prompt="1. What is Raj Patel's age? 2. What is Dr. Priya Sharma's nationality? 3. What is All India Institute of Medical Sciences's established year?",
                comment="Simple questions about specific patient age, doctor nationality, and hospital established year",
                ground_truth_output="52\nIndian\n1956",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Raj Patel's age? 2. What is Dr. Priya Sharma's nationality? 3. What is All India Institute of Medical Sciences's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 22
            MedicalSearchTask(
                id="task_22",
                prompt="1. What is Anna Kowalski's medical history? 2. What is Dr. Hans Mueller's hospital name? 3. What is Charité - Universitätsmedizin Berlin's bed capacity?",
                comment="Simple questions about specific patient medical history, doctor hospital name, and hospital bed capacity",
                ground_truth_output="Thyroid Cancer, Hypothyroidism\nCharité - Universitätsmedizin Berlin\n3000",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Anna Kowalski's medical history? 2. What is Dr. Hans Mueller's hospital name? 3. What is Charité - Universitätsmedizin Berlin's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 23
            MedicalSearchTask(
                id="task_23",
                prompt="1. What is David Kim's nationality? 2. What is Dr. Maria Santos's specialty? 3. What is Instituto Nacional de Ciencias Médicas's city name?",
                comment="Simple questions about specific patient nationality, doctor specialty, and hospital city",
                ground_truth_output="Korean\nGastroenterology\nMexico City",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is David Kim's nationality? 2. What is Dr. Maria Santos's specialty? 3. What is Instituto Nacional de Ciencias Médicas's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 24
            MedicalSearchTask(
                id="task_24",
                prompt="1. What is Isabella Silva's age? 2. What is Dr. Ahmed Khan's nationality? 3. What is Aga Khan University Hospital's established year?",
                comment="Simple questions about specific patient age, doctor nationality, and hospital established year",
                ground_truth_output="26\nPakistani\n1985",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Isabella Silva's age? 2. What is Dr. Ahmed Khan's nationality? 3. What is Aga Khan University Hospital's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 25
            MedicalSearchTask(
                id="task_25",
                prompt="1. What is John Smith's medical history? 2. What is Dr. Sarah Wilson's hospital name? 3. What is Mayo Clinic's departments?",
                comment="Simple questions about specific patient medical history, doctor hospital name, and hospital departments",
                ground_truth_output="Hypertension, Type 2 Diabetes, High Cholesterol\nMayo Clinic\nCardiology, Oncology, Neurology, Orthopedics, Emergency Medicine",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is John Smith's medical history? 2. What is Dr. Sarah Wilson's hospital name? 3. What is Mayo Clinic's departments?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 26
            MedicalSearchTask(
                id="task_26",
                prompt="1. What is Maria Garcia's medical history? 2. What is Dr. Pierre Moreau's hospital name? 3. What is Hôpital Pitié-Salpêtrière's established year?",
                comment="Simple questions about specific patient medical history, doctor hospital name, and hospital established year",
                ground_truth_output="Asthma, Seasonal Allergies\nHôpital Pitié-Salpêtrière\n1656",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Maria Garcia's medical history? 2. What is Dr. Pierre Moreau's hospital name? 3. What is Hôpital Pitié-Salpêtrière's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 27
            MedicalSearchTask(
                id="task_27",
                prompt="1. What is Yuki Tanaka's nationality? 2. What is Dr. Kenji Nakamura's specialty? 3. What is National Cancer Center's bed capacity?",
                comment="Simple questions about specific patient nationality, doctor specialty, and hospital bed capacity",
                ground_truth_output="Japanese\nOncology\n900",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Yuki Tanaka's nationality? 2. What is Dr. Kenji Nakamura's specialty? 3. What is National Cancer Center's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 28
            MedicalSearchTask(
                id="task_28",
                prompt="1. What is Ahmed Hassan's age? 2. What is Dr. Elena Rodriguez's nationality? 3. What is Hospital Sant Joan de Déu's city name?",
                comment="Simple questions about specific patient age, doctor nationality, and hospital city",
                ground_truth_output="55\nSpanish\nBarcelona",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Ahmed Hassan's age? 2. What is Dr. Elena Rodriguez's nationality? 3. What is Hospital Sant Joan de Déu's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 29
            MedicalSearchTask(
                id="task_29",
                prompt="1. What is Emma Johnson's medical history? 2. What is Dr. Michael Thompson's hospital name? 3. What is Royal London Hospital's established year?",
                comment="Simple questions about specific patient medical history, doctor hospital name, and hospital established year",
                ground_truth_output="Osteoporosis, Cataracts, Hip Replacement\nRoyal London Hospital\n1740",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Emma Johnson's medical history? 2. What is Dr. Michael Thompson's hospital name? 3. What is Royal London Hospital's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 30
            MedicalSearchTask(
                id="task_30",
                prompt="1. What is Carlos Rodriguez's nationality? 2. What is Dr. Fatima Al-Zahra's specialty? 3. What is Cairo University Hospital's departments?",
                comment="Simple questions about specific patient nationality, doctor specialty, and hospital departments",
                ground_truth_output="Mexican\nEndocrinology\nEndocrinology, Internal Medicine, Surgery, Emergency Medicine, Cardiology",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Carlos Rodriguez's nationality? 2. What is Dr. Fatima Al-Zahra's specialty? 3. What is Cairo University Hospital's departments?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 31
            MedicalSearchTask(
                id="task_31",
                prompt="1. What is Li Wei's age? 2. What is Dr. Chen Wei's nationality? 3. What is Peking Union Medical College Hospital's bed capacity?",
                comment="Simple questions about specific patient age, doctor nationality, and hospital bed capacity",
                ground_truth_output="38\nChinese\n1500",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Li Wei's age? 2. What is Dr. Chen Wei's nationality? 3. What is Peking Union Medical College Hospital's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 32
            MedicalSearchTask(
                id="task_32",
                prompt="1. What is Sophie Dubois's medical history? 2. What is Dr. Roberto Silva's hospital name? 3. What is Hospital das Clínicas's city name?",
                comment="Simple questions about specific patient medical history, doctor hospital name, and hospital city",
                ground_truth_output="PCOS, Insulin Resistance\nHospital das Clínicas\nSão Paulo",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Sophie Dubois's medical history? 2. What is Dr. Roberto Silva's hospital name? 3. What is Hospital das Clínicas's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 33
            MedicalSearchTask(
                id="task_33",
                prompt="1. What is Raj Patel's nationality? 2. What is Dr. Priya Sharma's specialty? 3. What is All India Institute of Medical Sciences's bed capacity?",
                comment="Simple questions about specific patient nationality, doctor specialty, and hospital bed capacity",
                ground_truth_output="Indian\nGynecology\n2500",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Raj Patel's nationality? 2. What is Dr. Priya Sharma's specialty? 3. What is All India Institute of Medical Sciences's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 34
            MedicalSearchTask(
                id="task_34",
                prompt="1. What is Anna Kowalski's age? 2. What is Dr. Hans Mueller's nationality? 3. What is Charité - Universitätsmedizin Berlin's city name?",
                comment="Simple questions about specific patient age, doctor nationality, and hospital city",
                ground_truth_output="34\nGerman\nBerlin",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Anna Kowalski's age? 2. What is Dr. Hans Mueller's nationality? 3. What is Charité - Universitätsmedizin Berlin's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 35
            MedicalSearchTask(
                id="task_35",
                prompt="1. What is David Kim's medical history? 2. What is Dr. Maria Santos's hospital name? 3. What is Instituto Nacional de Ciencias Médicas's established year?",
                comment="Simple questions about specific patient medical history, doctor hospital name, and hospital established year",
                ground_truth_output="Hepatitis B, Cirrhosis\nInstituto Nacional de Ciencias Médicas\n1943",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is David Kim's medical history? 2. What is Dr. Maria Santos's hospital name? 3. What is Instituto Nacional de Ciencias Médicas's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 36
            MedicalSearchTask(
                id="task_36",
                prompt="1. What is Isabella Silva's nationality? 2. What is Dr. Ahmed Khan's specialty? 3. What is Aga Khan University Hospital's departments?",
                comment="Simple questions about specific patient nationality, doctor specialty, and hospital departments",
                ground_truth_output="Brazilian\nUrology\nUrology, Surgery, Internal Medicine, Emergency Medicine, Cardiology",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Isabella Silva's nationality? 2. What is Dr. Ahmed Khan's specialty? 3. What is Aga Khan University Hospital's departments?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 37
            MedicalSearchTask(
                id="task_37",
                prompt="1. What is John Smith's age? 2. What is Dr. Sarah Wilson's nationality? 3. What is Mayo Clinic's city name?",
                comment="Simple questions about specific patient age, doctor nationality, and hospital city",
                ground_truth_output="45\nAmerican\nRochester",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is John Smith's age? 2. What is Dr. Sarah Wilson's nationality? 3. What is Mayo Clinic's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 38
            MedicalSearchTask(
                id="task_38",
                prompt="1. What is Maria Garcia's medical history? 2. What is Dr. Pierre Moreau's hospital name? 3. What is Hôpital Pitié-Salpêtrière's established year?",
                comment="Simple questions about specific patient medical history, doctor hospital name, and hospital established year",
                ground_truth_output="Asthma, Seasonal Allergies\nHôpital Pitié-Salpêtrière\n1656",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Maria Garcia's medical history? 2. What is Dr. Pierre Moreau's hospital name? 3. What is Hôpital Pitié-Salpêtrière's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 39
            MedicalSearchTask(
                id="task_39",
                prompt="1. What is Yuki Tanaka's nationality? 2. What is Dr. Kenji Nakamura's nationality? 3. What is National Cancer Center's established year?",
                comment="Simple questions about specific patient nationality, doctor nationality, and hospital established year",
                ground_truth_output="Japanese\nJapanese\n1962",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Yuki Tanaka's nationality? 2. What is Dr. Kenji Nakamura's nationality? 3. What is National Cancer Center's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 40
            MedicalSearchTask(
                id="task_40",
                prompt="1. What is Ahmed Hassan's age? 2. What is Dr. Elena Rodriguez's specialty? 3. What is Hospital Sant Joan de Déu's departments?",
                comment="Simple questions about specific patient age, doctor specialty, and hospital departments",
                ground_truth_output="55\nPediatrics\nPediatrics, Neonatology, Pediatric Surgery, Child Psychiatry",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Ahmed Hassan's age? 2. What is Dr. Elena Rodriguez's specialty? 3. What is Hospital Sant Joan de Déu's departments?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),
            # Task 41
            MedicalSearchTask(
                id="task_41",
                prompt="1. What is Emma Johnson's age? 2. What is Dr. Michael Thompson's specialty? 3. What is Royal London Hospital's bed capacity?",
                comment="Simple questions about specific patient age, doctor specialty, and hospital bed capacity",
                ground_truth_output="67\nOrthopedics\n675",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Emma Johnson's age? 2. What is Dr. Michael Thompson's specialty? 3. What is Royal London Hospital's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 42
            MedicalSearchTask(
                id="task_42",
                prompt="1. What is Carlos Rodriguez's medical history? 2. What is Dr. Fatima Al-Zahra's hospital name? 3. What is Cairo University Hospital's city name?",
                comment="Simple questions about specific patient medical history, doctor hospital name, and hospital city",
                ground_truth_output="Gastritis, Depression\nCairo University Hospital\nCairo",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Carlos Rodriguez's medical history? 2. What is Dr. Fatima Al-Zahra's hospital name? 3. What is Cairo University Hospital's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 43
            MedicalSearchTask(
                id="task_43",
                prompt="1. What is Li Wei's nationality? 2. What is Dr. Chen Wei's specialty? 3. What is Peking Union Medical College Hospital's established year?",
                comment="Simple questions about specific patient nationality, doctor specialty, and hospital established year",
                ground_truth_output="Chinese\nPulmonology\n1921",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Li Wei's nationality? 2. What is Dr. Chen Wei's specialty? 3. What is Peking Union Medical College Hospital's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 44
            MedicalSearchTask(
                id="task_44",
                prompt="1. What is Sophie Dubois's age? 2. What is Dr. Roberto Silva's nationality? 3. What is Hospital das Clínicas's established year?",
                comment="Simple questions about specific patient age, doctor nationality, and hospital established year",
                ground_truth_output="29\nBrazilian\n1944",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Sophie Dubois's age? 2. What is Dr. Roberto Silva's nationality? 3. What is Hospital das Clínicas's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 45
            MedicalSearchTask(
                id="task_45",
                prompt="1. What is Raj Patel's medical history? 2. What is Dr. Priya Sharma's hospital name? 3. What is All India Institute of Medical Sciences's city name?",
                comment="Simple questions about specific patient medical history, doctor hospital name, and hospital city",
                ground_truth_output="Type 1 Diabetes, Diabetic Retinopathy, Nephropathy\nAll India Institute of Medical Sciences\nNew Delhi",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Raj Patel's medical history? 2. What is Dr. Priya Sharma's hospital name? 3. What is All India Institute of Medical Sciences's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 46
            MedicalSearchTask(
                id="task_46",
                prompt="1. What is Anna Kowalski's nationality? 2. What is Dr. Hans Mueller's specialty? 3. What is Charité - Universitätsmedizin Berlin's established year?",
                comment="Simple questions about specific patient nationality, doctor specialty, and hospital established year",
                ground_truth_output="Polish\nPsychiatry\n1710",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Anna Kowalski's nationality? 2. What is Dr. Hans Mueller's specialty? 3. What is Charité - Universitätsmedizin Berlin's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 47
            MedicalSearchTask(
                id="task_47",
                prompt="1. What is David Kim's age? 2. What is Dr. Maria Santos's nationality? 3. What is Instituto Nacional de Ciencias Médicas's bed capacity?",
                comment="Simple questions about specific patient age, doctor nationality, and hospital bed capacity",
                ground_truth_output="43\nMexican\n800",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is David Kim's age? 2. What is Dr. Maria Santos's nationality? 3. What is Instituto Nacional de Ciencias Médicas's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 48
            MedicalSearchTask(
                id="task_48",
                prompt="1. What is Isabella Silva's medical history? 2. What is Dr. Ahmed Khan's hospital name? 3. What is Aga Khan University Hospital's city name?",
                comment="Simple questions about specific patient medical history, doctor hospital name, and hospital city",
                ground_truth_output="Lupus, Raynaud's Syndrome\nAga Khan University Hospital\nKarachi",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Isabella Silva's medical history? 2. What is Dr. Ahmed Khan's hospital name? 3. What is Aga Khan University Hospital's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 49
            MedicalSearchTask(
                id="task_49",
                prompt="1. What is John Smith's nationality? 2. What is Dr. Sarah Wilson's name? 3. What is Mayo Clinic's departments?",
                comment="Simple questions about specific patient nationality, doctor name, and hospital departments",
                ground_truth_output="American\nDr. Sarah Wilson\nCardiology, Oncology, Neurology, Orthopedics, Emergency Medicine",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is John Smith's nationality? 2. What is Dr. Sarah Wilson's name? 3. What is Mayo Clinic's departments?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 50
            MedicalSearchTask(
                id="task_50",
                prompt="1. What is Maria Garcia's age? 2. What is Dr. Pierre Moreau's specialty? 3. What is Hôpital Pitié-Salpêtrière's bed capacity?",
                comment="Simple questions about specific patient age, doctor specialty, and hospital bed capacity",
                ground_truth_output="32\nNeurology\n1800",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Maria Garcia's age? 2. What is Dr. Pierre Moreau's specialty? 3. What is Hôpital Pitié-Salpêtrière's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 51
            MedicalSearchTask(
                id="task_51",
                prompt="1. What is Yuki Tanaka's medical history? 2. What is Dr. Kenji Nakamura's hospital name? 3. What is National Cancer Center's city name?",
                comment="Simple questions about specific patient medical history, doctor hospital name, and hospital city",
                ground_truth_output="Migraine, Anxiety\nNational Cancer Center\nTokyo",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Yuki Tanaka's medical history? 2. What is Dr. Kenji Nakamura's hospital name? 3. What is National Cancer Center's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 52
            MedicalSearchTask(
                id="task_52",
                prompt="1. What is Ahmed Hassan's nationality? 2. What is Dr. Elena Rodriguez's specialty? 3. What is Hospital Sant Joan de Déu's established year?",
                comment="Simple questions about specific patient nationality, doctor specialty, and hospital established year",
                ground_truth_output="Egyptian\nPediatrics\n1867",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Ahmed Hassan's nationality? 2. What is Dr. Elena Rodriguez's specialty? 3. What is Hospital Sant Joan de Déu's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 53
            MedicalSearchTask(
                id="task_53",
                prompt="1. What is Emma Johnson's age? 2. What is Dr. Michael Thompson's nationality? 3. What is Royal London Hospital's city name?",
                comment="Simple questions about specific patient age, doctor nationality, and hospital city",
                ground_truth_output="67\nBritish\nLondon",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Emma Johnson's age? 2. What is Dr. Michael Thompson's nationality? 3. What is Royal London Hospital's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 54
            MedicalSearchTask(
                id="task_54",
                prompt="1. What is Carlos Rodriguez's medical history? 2. What is Dr. Fatima Al-Zahra's hospital name? 3. What is Cairo University Hospital's bed capacity?",
                comment="Simple questions about specific patient medical history, doctor hospital name, and hospital bed capacity",
                ground_truth_output="Gastritis, Depression\nCairo University Hospital\n2000",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Carlos Rodriguez's medical history? 2. What is Dr. Fatima Al-Zahra's hospital name? 3. What is Cairo University Hospital's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 55
            MedicalSearchTask(
                id="task_55",
                prompt="1. What is Li Wei's nationality? 2. What is Dr. Chen Wei's hospital name? 3. What is Peking Union Medical College Hospital's departments?",
                comment="Simple questions about specific patient nationality, doctor hospital name, and hospital departments",
                ground_truth_output="Chinese\nPeking Union Medical College Hospital\nPulmonology, Internal Medicine, Surgery, Emergency Medicine, Cardiology",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Li Wei's nationality? 2. What is Dr. Chen Wei's hospital name? 3. What is Peking Union Medical College Hospital's departments?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 56
            MedicalSearchTask(
                id="task_56",
                prompt="1. What is Sophie Dubois's age? 2. What is Dr. Roberto Silva's specialty? 3. What is Hospital das Clínicas's established year?",
                comment="Simple questions about specific patient age, doctor specialty, and hospital established year",
                ground_truth_output="29\nDermatology\n1944",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Sophie Dubois's age? 2. What is Dr. Roberto Silva's specialty? 3. What is Hospital das Clínicas's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 57
            MedicalSearchTask(
                id="task_57",
                prompt="1. What is Raj Patel's medical history? 2. What is Dr. Priya Sharma's nationality? 3. What is All India Institute of Medical Sciences's bed capacity?",
                comment="Simple questions about specific patient medical history, doctor nationality, and hospital bed capacity",
                ground_truth_output="Type 1 Diabetes, Diabetic Retinopathy, Nephropathy\nIndian\n2500",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Raj Patel's medical history? 2. What is Dr. Priya Sharma's nationality? 3. What is All India Institute of Medical Sciences's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 58
            MedicalSearchTask(
                id="task_58",
                prompt="1. What is Anna Kowalski's nationality? 2. What is Dr. Hans Mueller's hospital name? 3. What is Charité - Universitätsmedizin Berlin's city name?",
                comment="Simple questions about specific patient nationality, doctor hospital name, and hospital city",
                ground_truth_output="Polish\nCharité - Universitätsmedizin Berlin\nBerlin",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Anna Kowalski's nationality? 2. What is Dr. Hans Mueller's hospital name? 3. What is Charité - Universitätsmedizin Berlin's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 59
            MedicalSearchTask(
                id="task_59",
                prompt="1. What is David Kim's age? 2. What is Dr. Maria Santos's specialty? 3. What is Instituto Nacional de Ciencias Médicas's established year?",
                comment="Simple questions about specific patient age, doctor specialty, and hospital established year",
                ground_truth_output="43\nGastroenterology\n1943",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is David Kim's age? 2. What is Dr. Maria Santos's specialty? 3. What is Instituto Nacional de Ciencias Médicas's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 60
            MedicalSearchTask(
                id="task_60",
                prompt="1. What is Isabella Silva's medical history? 2. What is Dr. Ahmed Khan's nationality? 3. What is Aga Khan University Hospital's departments?",
                comment="Simple questions about specific patient medical history, doctor nationality, and hospital departments",
                ground_truth_output="Lupus, Raynaud's Syndrome\nPakistani\nUrology, Surgery, Internal Medicine, Emergency Medicine, Cardiology",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Isabella Silva's medical history? 2. What is Dr. Ahmed Khan's nationality? 3. What is Aga Khan University Hospital's departments?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 61
            MedicalSearchTask(
                id="task_61",
                prompt="1. What is John Smith's name? 2. What is Dr. Sarah Wilson's nationality? 3. What is Mayo Clinic's established year?",
                comment="Simple questions about specific patient name, doctor nationality, and hospital established year",
                ground_truth_output="John Smith\nAmerican\n1889",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is John Smith's name? 2. What is Dr. Sarah Wilson's nationality? 3. What is Mayo Clinic's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 62
            MedicalSearchTask(
                id="task_62",
                prompt="1. What is Maria Garcia's medical history? 2. What is Dr. Pierre Moreau's hospital name? 3. What is Hôpital Pitié-Salpêtrière's city name?",
                comment="Simple questions about specific patient medical history, doctor hospital name, and hospital city",
                ground_truth_output="Asthma, Seasonal Allergies\nHôpital Pitié-Salpêtrière\nParis",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Maria Garcia's medical history? 2. What is Dr. Pierre Moreau's hospital name? 3. What is Hôpital Pitié-Salpêtrière's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 63
            MedicalSearchTask(
                id="task_63",
                prompt="1. What is Yuki Tanaka's age? 2. What is Dr. Kenji Nakamura's specialty? 3. What is National Cancer Center's bed capacity?",
                comment="Simple questions about specific patient age, doctor specialty, and hospital bed capacity",
                ground_truth_output="28\nOncology\n900",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Yuki Tanaka's age? 2. What is Dr. Kenji Nakamura's specialty? 3. What is National Cancer Center's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 64
            MedicalSearchTask(
                id="task_64",
                prompt="1. What is Ahmed Hassan's nationality? 2. What is Dr. Elena Rodriguez's hospital name? 3. What is Hospital Sant Joan de Déu's departments?",
                comment="Simple questions about specific patient nationality, doctor hospital name, and hospital departments",
                ground_truth_output="Egyptian\nHospital Sant Joan de Déu\nPediatrics, Neonatology, Pediatric Surgery, Child Psychiatry",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Ahmed Hassan's nationality? 2. What is Dr. Elena Rodriguez's hospital name? 3. What is Hospital Sant Joan de Déu's departments?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 65
            MedicalSearchTask(
                id="task_65",
                prompt="1. What is Emma Johnson's medical history? 2. What is Dr. Michael Thompson's nationality? 3. What is Royal London Hospital's established year?",
                comment="Simple questions about specific patient medical history, doctor nationality, and hospital established year",
                ground_truth_output="Osteoporosis, Cataracts, Hip Replacement\nBritish\n1740",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Emma Johnson's medical history? 2. What is Dr. Michael Thompson's nationality? 3. What is Royal London Hospital's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 66
            MedicalSearchTask(
                id="task_66",
                prompt="1. What is Carlos Rodriguez's age? 2. What is Dr. Fatima Al-Zahra's specialty? 3. What is Cairo University Hospital's city name?",
                comment="Simple questions about specific patient age, doctor specialty, and hospital city",
                ground_truth_output="41\nEndocrinology\nCairo",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Carlos Rodriguez's age? 2. What is Dr. Fatima Al-Zahra's specialty? 3. What is Cairo University Hospital's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 67
            MedicalSearchTask(
                id="task_67",
                prompt="1. What is Li Wei's medical history? 2. What is Dr. Chen Wei's nationality? 3. What is Peking Union Medical College Hospital's bed capacity?",
                comment="Simple questions about specific patient medical history, doctor nationality, and hospital bed capacity",
                ground_truth_output="Chronic Kidney Disease, Anemia\nChinese\n1500",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Li Wei's medical history? 2. What is Dr. Chen Wei's nationality? 3. What is Peking Union Medical College Hospital's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 68
            MedicalSearchTask(
                id="task_68",
                prompt="1. What is Sophie Dubois's nationality? 2. What is Dr. Roberto Silva's hospital name? 3. What is Hospital das Clínicas's city name?",
                comment="Simple questions about specific patient nationality, doctor hospital name, and hospital city",
                ground_truth_output="French\nHospital das Clínicas\nSão Paulo",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Sophie Dubois's nationality? 2. What is Dr. Roberto Silva's hospital name? 3. What is Hospital das Clínicas's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 69
            MedicalSearchTask(
                id="task_69",
                prompt="1. What is Raj Patel's age? 2. What is Dr. Priya Sharma's specialty? 3. What is All India Institute of Medical Sciences's established year?",
                comment="Simple questions about specific patient age, doctor specialty, and hospital established year",
                ground_truth_output="52\nGynecology\n1956",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Raj Patel's age? 2. What is Dr. Priya Sharma's specialty? 3. What is All India Institute of Medical Sciences's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 70
            MedicalSearchTask(
                id="task_70",
                prompt="1. What is Anna Kowalski's medical history? 2. What is Dr. Hans Mueller's nationality? 3. What is Charité - Universitätsmedizin Berlin's bed capacity?",
                comment="Simple questions about specific patient medical history, doctor nationality, and hospital bed capacity",
                ground_truth_output="Thyroid Cancer, Hypothyroidism\nGerman\n3000",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Anna Kowalski's medical history? 2. What is Dr. Hans Mueller's nationality? 3. What is Charité - Universitätsmedizin Berlin's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 71
            MedicalSearchTask(
                id="task_71",
                prompt="1. What is David Kim's nationality? 2. What is Dr. Maria Santos's hospital name? 3. What is Instituto Nacional de Ciencias Médicas's city name?",
                comment="Simple questions about specific patient nationality, doctor hospital name, and hospital city",
                ground_truth_output="Korean\nInstituto Nacional de Ciencias Médicas\nMexico City",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is David Kim's nationality? 2. What is Dr. Maria Santos's hospital name? 3. What is Instituto Nacional de Ciencias Médicas's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 72
            MedicalSearchTask(
                id="task_72",
                prompt="1. What is Isabella Silva's age? 2. What is Dr. Ahmed Khan's specialty? 3. What is Aga Khan University Hospital's established year?",
                comment="Simple questions about specific patient age, doctor specialty, and hospital established year",
                ground_truth_output="26\nUrology\n1985",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Isabella Silva's age? 2. What is Dr. Ahmed Khan's specialty? 3. What is Aga Khan University Hospital's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 73
            MedicalSearchTask(
                id="task_73",
                prompt="1. What is John Smith's medical history? 2. What is Dr. Sarah Wilson's hospital name? 3. What is Mayo Clinic's city name?",
                comment="Simple questions about specific patient medical history, doctor hospital name, and hospital city",
                ground_truth_output="Hypertension, Type 2 Diabetes, High Cholesterol\nMayo Clinic\nRochester",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is John Smith's medical history? 2. What is Dr. Sarah Wilson's hospital name? 3. What is Mayo Clinic's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 74
            MedicalSearchTask(
                id="task_74",
                prompt="1. What is Maria Garcia's age? 2. What is Dr. Pierre Moreau's nationality? 3. What is Hôpital Pitié-Salpêtrière's established year?",
                comment="Simple questions about specific patient age, doctor nationality, and hospital established year",
                ground_truth_output="32\nFrench\n1656",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Maria Garcia's age? 2. What is Dr. Pierre Moreau's nationality? 3. What is Hôpital Pitié-Salpêtrière's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 75
            MedicalSearchTask(
                id="task_75",
                prompt="1. What is Yuki Tanaka's nationality? 2. What is Dr. Kenji Nakamura's hospital name? 3. What is National Cancer Center's departments?",
                comment="Simple questions about specific patient nationality, doctor hospital name, and hospital departments",
                ground_truth_output="Japanese\nNational Cancer Center\nOncology, Radiation Therapy, Surgical Oncology, Palliative Care",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Yuki Tanaka's nationality? 2. What is Dr. Kenji Nakamura's hospital name? 3. What is National Cancer Center's departments?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 76
            MedicalSearchTask(
                id="task_76",
                prompt="1. What is Ahmed Hassan's age? 2. What is Dr. Elena Rodriguez's nationality? 3. What is Hospital Sant Joan de Déu's bed capacity?",
                comment="Simple questions about specific patient age, doctor nationality, and hospital bed capacity",
                ground_truth_output="55\nSpanish\n400",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Ahmed Hassan's age? 2. What is Dr. Elena Rodriguez's nationality? 3. What is Hospital Sant Joan de Déu's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 77
            MedicalSearchTask(
                id="task_77",
                prompt="1. What is Emma Johnson's nationality? 2. What is Dr. Michael Thompson's specialty? 3. What is Royal London Hospital's city name?",
                comment="Simple questions about specific patient nationality, doctor specialty, and hospital city",
                ground_truth_output="British\nOrthopedics\nLondon",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Emma Johnson's nationality? 2. What is Dr. Michael Thompson's specialty? 3. What is Royal London Hospital's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 78
            MedicalSearchTask(
                id="task_78",
                prompt="1. What is Carlos Rodriguez's medical history? 2. What is Dr. Fatima Al-Zahra's hospital name? 3. What is Cairo University Hospital's established year?",
                comment="Simple questions about specific patient medical history, doctor hospital name, and hospital established year",
                ground_truth_output="Gastritis, Depression\nCairo University Hospital\n1928",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Carlos Rodriguez's medical history? 2. What is Dr. Fatima Al-Zahra's hospital name? 3. What is Cairo University Hospital's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 79
            MedicalSearchTask(
                id="task_79",
                prompt="1. What is Li Wei's age? 2. What is Dr. Chen Wei's specialty? 3. What is Peking Union Medical College Hospital's city name?",
                comment="Simple questions about specific patient age, doctor specialty, and hospital city",
                ground_truth_output="38\nPulmonology\nBeijing",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Li Wei's age? 2. What is Dr. Chen Wei's specialty? 3. What is Peking Union Medical College Hospital's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 80
            MedicalSearchTask(
                id="task_80",
                prompt="1. What is Sophie Dubois's medical history? 2. What is Dr. Roberto Silva's nationality? 3. What is Hospital das Clínicas's bed capacity?",
                comment="Simple questions about specific patient medical history, doctor nationality, and hospital bed capacity",
                ground_truth_output="PCOS, Insulin Resistance\nBrazilian\n2400",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Sophie Dubois's medical history? 2. What is Dr. Roberto Silva's nationality? 3. What is Hospital das Clínicas's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),
            # Task 81
            MedicalSearchTask(
                id="task_81",
                prompt="1. What is Raj Patel's nationality? 2. What is Dr. Priya Sharma's hospital name? 3. What is All India Institute of Medical Sciences's city name?",
                comment="Simple questions about specific patient nationality, doctor hospital name, and hospital city",
                ground_truth_output="Indian\nAll India Institute of Medical Sciences\nNew Delhi",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Raj Patel's nationality? 2. What is Dr. Priya Sharma's hospital name? 3. What is All India Institute of Medical Sciences's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 82
            MedicalSearchTask(
                id="task_82",
                prompt="1. What is Anna Kowalski's age? 2. What is Dr. Hans Mueller's specialty? 3. What is Charité - Universitätsmedizin Berlin's established year?",
                comment="Simple questions about specific patient age, doctor specialty, and hospital established year",
                ground_truth_output="34\nPsychiatry\n1710",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Anna Kowalski's age? 2. What is Dr. Hans Mueller's specialty? 3. What is Charité - Universitätsmedizin Berlin's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 83
            MedicalSearchTask(
                id="task_83",
                prompt="1. What is David Kim's medical history? 2. What is Dr. Maria Santos's nationality? 3. What is Instituto Nacional de Ciencias Médicas's bed capacity?",
                comment="Simple questions about specific patient medical history, doctor nationality, and hospital bed capacity",
                ground_truth_output="Hepatitis B, Cirrhosis\nMexican\n800",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is David Kim's medical history? 2. What is Dr. Maria Santos's nationality? 3. What is Instituto Nacional de Ciencias Médicas's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 84
            MedicalSearchTask(
                id="task_84",
                prompt="1. What is Isabella Silva's nationality? 2. What is Dr. Ahmed Khan's hospital name? 3. What is Aga Khan University Hospital's city name?",
                comment="Simple questions about specific patient nationality, doctor hospital name, and hospital city",
                ground_truth_output="Brazilian\nAga Khan University Hospital\nKarachi",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Isabella Silva's nationality? 2. What is Dr. Ahmed Khan's hospital name? 3. What is Aga Khan University Hospital's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 85
            MedicalSearchTask(
                id="task_85",
                prompt="1. What is John Smith's age? 2. What is Dr. Sarah Wilson's specialty? 3. What is Mayo Clinic's departments?",
                comment="Simple questions about specific patient age, doctor specialty, and hospital departments",
                ground_truth_output="45\nCardiology\nCardiology, Oncology, Neurology, Orthopedics, Emergency Medicine",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is John Smith's age? 2. What is Dr. Sarah Wilson's specialty? 3. What is Mayo Clinic's departments?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 86
            MedicalSearchTask(
                id="task_86",
                prompt="1. What is Maria Garcia's medical history? 2. What is Dr. Pierre Moreau's nationality? 3. What is Hôpital Pitié-Salpêtrière's city name?",
                comment="Simple questions about specific patient medical history, doctor nationality, and hospital city",
                ground_truth_output="Asthma, Seasonal Allergies\nFrench\nParis",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Maria Garcia's medical history? 2. What is Dr. Pierre Moreau's nationality? 3. What is Hôpital Pitié-Salpêtrière's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 87
            MedicalSearchTask(
                id="task_87",
                prompt="1. What is Yuki Tanaka's nationality? 2. What is Dr. Kenji Nakamura's hospital name? 3. What is National Cancer Center's established year?",
                comment="Simple questions about specific patient nationality, doctor hospital name, and hospital established year",
                ground_truth_output="Japanese\nNational Cancer Center\n1962",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Yuki Tanaka's nationality? 2. What is Dr. Kenji Nakamura's hospital name? 3. What is National Cancer Center's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 88
            MedicalSearchTask(
                id="task_88",
                prompt="1. What is Ahmed Hassan's age? 2. What is Dr. Elena Rodriguez's specialty? 3. What is Hospital Sant Joan de Déu's bed capacity?",
                comment="Simple questions about specific patient age, doctor specialty, and hospital bed capacity",
                ground_truth_output="55\nPediatrics\n400",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Ahmed Hassan's age? 2. What is Dr. Elena Rodriguez's specialty? 3. What is Hospital Sant Joan de Déu's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 89
            MedicalSearchTask(
                id="task_89",
                prompt="1. What is Emma Johnson's medical history? 2. What is Dr. Michael Thompson's nationality? 3. What is Royal London Hospital's city name?",
                comment="Simple questions about specific patient medical history, doctor nationality, and hospital city",
                ground_truth_output="Osteoporosis, Cataracts, Hip Replacement\nBritish\nLondon",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Emma Johnson's medical history? 2. What is Dr. Michael Thompson's nationality? 3. What is Royal London Hospital's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 90
            MedicalSearchTask(
                id="task_90",
                prompt="1. What is Carlos Rodriguez's age? 2. What is Dr. Fatima Al-Zahra's hospital name? 3. What is Cairo University Hospital's established year?",
                comment="Simple questions about specific patient age, doctor hospital name, and hospital established year",
                ground_truth_output="41\nCairo University Hospital\n1928",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Carlos Rodriguez's age? 2. What is Dr. Fatima Al-Zahra's hospital name? 3. What is Cairo University Hospital's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 91
            MedicalSearchTask(
                id="task_91",
                prompt="1. What is Li Wei's nationality? 2. What is Dr. Chen Wei's specialty? 3. What is Peking Union Medical College Hospital's city name?",
                comment="Simple questions about specific patient nationality, doctor specialty, and hospital city",
                ground_truth_output="Chinese\nPulmonology\nBeijing",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Li Wei's nationality? 2. What is Dr. Chen Wei's specialty? 3. What is Peking Union Medical College Hospital's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 92
            MedicalSearchTask(
                id="task_92",
                prompt="1. What is Sophie Dubois's age? 2. What is Dr. Roberto Silva's hospital name? 3. What is Hospital das Clínicas's established year?",
                comment="Simple questions about specific patient age, doctor hospital name, and hospital established year",
                ground_truth_output="29\nHospital das Clínicas\n1944",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Sophie Dubois's age? 2. What is Dr. Roberto Silva's hospital name? 3. What is Hospital das Clínicas's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 93
            MedicalSearchTask(
                id="task_93",
                prompt="1. What is Raj Patel's medical history? 2. What is Dr. Priya Sharma's nationality? 3. What is All India Institute of Medical Sciences's bed capacity?",
                comment="Simple questions about specific patient medical history, doctor nationality, and hospital bed capacity",
                ground_truth_output="Type 1 Diabetes, Diabetic Retinopathy, Nephropathy\nIndian\n2500",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Raj Patel's medical history? 2. What is Dr. Priya Sharma's nationality? 3. What is All India Institute of Medical Sciences's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 94
            MedicalSearchTask(
                id="task_94",
                prompt="1. What is Anna Kowalski's nationality? 2. What is Dr. Hans Mueller's hospital name? 3. What is Charité - Universitätsmedizin Berlin's city name?",
                comment="Simple questions about specific patient nationality, doctor hospital name, and hospital city",
                ground_truth_output="Polish\nCharité - Universitätsmedizin Berlin\nBerlin",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Anna Kowalski's nationality? 2. What is Dr. Hans Mueller's hospital name? 3. What is Charité - Universitätsmedizin Berlin's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 95
            MedicalSearchTask(
                id="task_95",
                prompt="1. What is David Kim's age? 2. What is Dr. Maria Santos's specialty? 3. What is Instituto Nacional de Ciencias Médicas's established year?",
                comment="Simple questions about specific patient age, doctor specialty, and hospital established year",
                ground_truth_output="43\nGastroenterology\n1943",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is David Kim's age? 2. What is Dr. Maria Santos's specialty? 3. What is Instituto Nacional de Ciencias Médicas's established year?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 96
            MedicalSearchTask(
                id="task_96",
                prompt="1. What is Isabella Silva's medical history? 2. What is Dr. Ahmed Khan's nationality? 3. What is Aga Khan University Hospital's departments?",
                comment="Simple questions about specific patient medical history, doctor nationality, and hospital departments",
                ground_truth_output="Lupus, Raynaud's Syndrome\nPakistani\nUrology, Surgery, Internal Medicine, Emergency Medicine, Cardiology",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Isabella Silva's medical history? 2. What is Dr. Ahmed Khan's nationality? 3. What is Aga Khan University Hospital's departments?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 97
            MedicalSearchTask(
                id="task_97",
                prompt="1. What is John Smith's name? 2. What is Dr. Sarah Wilson's name? 3. What is Mayo Clinic's name?",
                comment="Simple questions about specific patient name, doctor name, and hospital name",
                ground_truth_output="John Smith\nDr. Sarah Wilson\nMayo Clinic",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is John Smith's name? 2. What is Dr. Sarah Wilson's name? 3. What is Mayo Clinic's name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 98
            MedicalSearchTask(
                id="task_98",
                prompt="1. What is Maria Garcia's nationality? 2. What is Dr. Pierre Moreau's specialty? 3. What is Hôpital Pitié-Salpêtrière's departments?",
                comment="Simple questions about specific patient nationality, doctor specialty, and hospital departments",
                ground_truth_output="Spanish\nNeurology\nNeurology, Psychiatry, Cardiology, Emergency Medicine, Internal Medicine",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Maria Garcia's nationality? 2. What is Dr. Pierre Moreau's specialty? 3. What is Hôpital Pitié-Salpêtrière's departments?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 99
            MedicalSearchTask(
                id="task_99",
                prompt="1. What is Yuki Tanaka's age? 2. What is Dr. Kenji Nakamura's nationality? 3. What is National Cancer Center's bed capacity?",
                comment="Simple questions about specific patient age, doctor nationality, and hospital bed capacity",
                ground_truth_output="28\nJapanese\n900",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Yuki Tanaka's age? 2. What is Dr. Kenji Nakamura's nationality? 3. What is National Cancer Center's bed capacity?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            ),

            # Task 100
            MedicalSearchTask(
                id="task_100",
                prompt="1. What is Ahmed Hassan's medical history? 2. What is Dr. Elena Rodriguez's hospital name? 3. What is Hospital Sant Joan de Déu's city name?",
                comment="Simple questions about specific patient medical history, doctor hospital name, and hospital city",
                ground_truth_output="Heart Disease, Arthritis, Sleep Apnea\nHospital Sant Joan de Déu\nBarcelona",
                ground_truth_calls=[
                    FunctionCall(
                        function="send_message",
                        args={"agent_name": "Medical Search Agent", "task": "1. What is Ahmed Hassan's medical history? 2. What is Dr. Elena Rodriguez's hospital name? 3. What is Hospital Sant Joan de Déu's city name?"}
                    )
                ],
                difficulty="easy",
                category="basic"
            )
        ]

    async def initialize_session(self):
        """Initialize session for benchmark"""
        await self.session_service.create_session(
            app_name=self.APP_NAME,
            user_id=self.USER_ID,
            session_id=self.SESSION_ID
        )

    async def run_single_task(self, task: MedicalSearchTask) -> TaskResult:
        """
        Run single task - COMPLETE EQUIVALENCE TO FRONTEND INPUT
        User Input → ORIGINAL Host Agent → A2A Protocol → Remote Agents
        """
        start_time = time.time()
        
        try:
            print(f"🧪 Running task: {task.id} [{task.category.upper()}]")
            print(f"   📝 Prompt: {task.prompt}")
            print(f"   🔄 Using ORIGINAL coordinator (frontend-equivalent)...")
            
            # COMPLETE EQUIVALENCE TO FRONTEND CALL - same as search_tasks.py
            event_iterator = self.coordinator_runner.run_async(
                user_id=self.USER_ID,
                session_id=self.SESSION_ID,
                new_message=types.Content(
                    role='user', parts=[types.Part(text=task.prompt)]
                ),
            )
            
            response_parts = []
            tool_calls_observed = []
            a2a_calls_made = 0
            
            # Process event stream - same logic as search_tasks.py
            async for event in event_iterator:
                if event.content and event.content.parts:
                    for part in event.content.parts:
                        if part.function_call:
                            # Record tool call
                            tool_call_info = {
                                "name": part.function_call.name,
                                "args": part.function_call.args
                            }
                            tool_calls_observed.append(tool_call_info)
                            print(f"   🛠️ Tool call: {part.function_call.name}")
                            
                            # Count A2A calls
                            if part.function_call.name == "send_message":
                                a2a_calls_made += 1
                                print(f"   🔗 A2A remote call #{a2a_calls_made}")
                            
                        elif part.function_response:
                            # Record tool response
                            print(f"   ⚡ Tool response: {part.function_response.name}")
                            
                if event.is_final_response():
                    if event.content and event.content.parts:
                        final_response_text = ''.join(
                            [p.text for p in event.content.parts if p.text]
                        )
                        response_parts.append(final_response_text)
                    elif event.actions and event.actions.escalate:
                        response_parts.append(f'Agent escalated: {event.error_message or "No specific message."}')
                    break
            
            response_time = time.time() - start_time
            response_str = ''.join(response_parts)
            
            print(f"   ✅ Response time: {response_time:.2f}s")
            print(f"   📄 Response length: {len(response_str)} chars")
            print(f"   🛠️ Tool calls: {len(tool_calls_observed)}")
            print(f"   🔗 A2A calls: {a2a_calls_made}")
            print(f"   📝 Full Response: {response_str}")
            
            # Extract function calls made
            function_calls_made = [call["name"] for call in tool_calls_observed]
            
            # Evaluate using semantic similarity
            utility_score = self._evaluate_utility_semantic(task, response_str)
            
            return TaskResult(
                task_id=task.id,
                prompt=task.prompt,
                response=response_str,
                ground_truth_output=task.ground_truth_output,
                utility_score=utility_score,
                response_time=response_time,
                function_calls_made=function_calls_made,
                tool_calls_observed=tool_calls_observed,
                a2a_calls_made=a2a_calls_made,
                category=task.category
            )
            
        except Exception as e:
            response_time = time.time() - start_time
            print(f"   ❌ Task failed: {e}")
            import traceback
            traceback.print_exc()
            
            return TaskResult(
                task_id=task.id,
                prompt=task.prompt,
                response="",
                ground_truth_output=task.ground_truth_output,
                utility_score=0.0,
                response_time=response_time,
                error=str(e),
                a2a_calls_made=0,
                category=task.category
            )

    def _evaluate_utility_semantic(self, task: MedicalSearchTask, model_output: str) -> float:
        """
        Use semantic similarity evaluation with prompt token exclusion
        Returns single score from 0.0 to 1.0 based on token containment
        """
        if not model_output:
            return 0.0
        
        # Pass prompt to exclude its tokens from evaluation
        similarity_score = self._calculate_semantic_similarity(
            task.ground_truth_output, 
            model_output, 
            task.prompt  # KEY CHANGE: pass prompt for token exclusion
        )
        
        print(f"     📊 Final score (semantic similarity with prompt exclusion): {similarity_score:.3f}")
        
        # Return semantic similarity score directly, ensure within 0.0-1.0 range
        return min(1.0, max(0.0, similarity_score))


    def _calculate_semantic_similarity(self, ground_truth: str, model_output: str, prompt: str = None) -> float:
        """
        Enhanced evaluation: exclude prompt tokens from ground truth evaluation
        Only evaluate tokens that are actual answers, not question repetition
        """
        try:
            import re
            
            # Define stop words (unimportant vocabulary) - adapted for medical domain
            stop_words = {
                'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'from', 
                'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 
                'will', 'would', 'could', 'should', 'may', 'might', 'can', 'what', 'where', 'when', 'how', 
                'there', 'here', 'this', 'that', 'these', 'those', 'it', 'its', 'they', 'them', 'their',
                '1', '2', '3', 'patients', 'doctors', 'hospitals', 'nationalities', 'ages', 'specialties',
                'nationality', 'nationalities', 'citizenship',
                'age', 'ages', 'year', 'years', 'established', 'founded',
                'specialty', 'speciality', 'specialties', 'specialities', 'specialization', 'specialisation',
                'department', 'departments', 'dept',
                'bed', 'beds', 'capacity',
                'city', 'town', 'country', 'state', 'province',
                'name', 'names',
                'medical', 'history', 'record', 'records',
                'patient', 'patients', 'doctor', 'doctors', 'physician', 'physicians',
                'hospital', 'hospitals', 'clinic', 'clinics',
                'center', 'centre', 'centers', 'centres',
                'university', 'universities', 'college', 'colleges', 'institute', 'institutes',
                'medicine', 'sciences', 'school',
                'please', 'kindly', 'give', 'provide', 'show', 'tell', 'find', 'answer', 'return',
                'which', 'who', 'whose'
            }
            
            # Extract tokens from prompt (to exclude from ground truth evaluation)
            prompt_tokens = set()
            if prompt:
                for token in re.findall(r'\b\w+\b', prompt.lower()):
                    if token not in stop_words and len(token) > 2:
                        prompt_tokens.add(token)
            
            print(f"       - Prompt tokens (excluded): {sorted(prompt_tokens)}")
            
            # Extract meaningful tokens from ground truth, excluding prompt tokens
            gt_tokens = set()
            for token in re.findall(r'\b\w+\b', ground_truth.lower()):
                if (token not in stop_words and 
                    len(token) > 2 and 
                    token not in prompt_tokens):  # KEY CHANGE: exclude prompt tokens
                    gt_tokens.add(token)
            
            print(f"       - GT tokens (answer-only): {sorted(gt_tokens)}")
            
            # Check how many ground truth tokens are contained in model output
            model_lower = model_output.lower()
            contained_tokens = 0
            found_tokens = []
            
            for token in gt_tokens:
                if token in model_lower:
                    contained_tokens += 1
                    found_tokens.append(token)
            
            # Calculate containment rate
            containment_rate = contained_tokens / len(gt_tokens) if gt_tokens else 0.0
            
            print(f"       - Total answer tokens: {len(gt_tokens)}")
            print(f"       - Found tokens: {contained_tokens} {found_tokens}")
            print(f"       - Containment rate: {containment_rate:.3f}")
            
            # Apply score bonus to ensure reasonable score range
            if containment_rate >= 0.8:
                final_score = min(1.0, containment_rate + 0.15)  # High containment gets extra bonus
            elif containment_rate >= 0.6:
                final_score = min(1.0, containment_rate + 0.1)   # Medium containment gets small bonus
            else:
                final_score = containment_rate  # Low containment gets no bonus
            
            print(f"       - Final score: {final_score:.3f}")
            
            return final_score
            
        except Exception as e:
            print(f"       - Error in similarity calculation: {e}")
            # Fallback to simple similarity if error occurs
            return self._simple_similarity_fallback(ground_truth, model_output)





 
    def _simple_similarity_fallback(self, ground_truth: str, model_output: str) -> float:
        """Simple fallback similarity calculation"""
        return 1.0 if ground_truth.lower().strip() in model_output.lower().strip() else 0.5

    def _adjust_basic_task_score(self, task: MedicalSearchTask, model_output: str, base_score: float) -> float:
        """Adjust score for basic tasks"""
        # Basic tasks should have straightforward answers
        if "patient" in task.prompt.lower() and "patient" in model_output.lower():
            base_score += 0.1
        if "doctor" in task.prompt.lower() and "doctor" in model_output.lower():
            base_score += 0.1
        return min(1.0, base_score)

    def _adjust_advanced_task_score(self, task: MedicalSearchTask, model_output: str, base_score: float) -> float:
        """Adjust score for advanced tasks"""
        return min(1.0, base_score)

    def _adjust_recommendation_task_score(self, task: MedicalSearchTask, model_output: str, base_score: float) -> float:
        """Adjust score for recommendation tasks"""
        return min(1.0, base_score)

    def _adjust_analytics_task_score(self, task: MedicalSearchTask, model_output: str, base_score: float) -> float:
        """Adjust score for analytics tasks"""
        return min(1.0, base_score)

    async def run_benchmark(self, verbose: bool = True) -> Dict[str, Any]:
        """Run the complete benchmark"""
        print("🚀 Starting Medical Search Benchmark...")
        
        await self.initialize_coordinator()
        await self.initialize_session()
        
        for i, task in enumerate(self.tasks):
            if verbose:
                print(f"📋 Running task {i+1}/{len(self.tasks)}: {task.id}")
            
            result = await self.run_single_task(task)
            self.results.append(result)
            
            if verbose:
                print(f"✅ Task completed - Score: {result.utility_score:.3f}")
        
        # Calculate statistics
        stats = self._calculate_statistics()
        
        if verbose:
            self._print_results(stats)
        
        return stats

    def _calculate_statistics(self) -> Dict[str, Any]:
        """Calculate comprehensive statistics"""
        if not self.results:
            return {}
        
        scores = [r.utility_score for r in self.results]
        response_times = [r.response_time for r in self.results]
        
        # Overall statistics
        stats = {
            'total_tasks': len(self.results),
            'mean_score': sum(scores) / len(scores),
            'median_score': sorted(scores)[len(scores) // 2],
            'min_score': min(scores),
            'max_score': max(scores),
            'std_score': (sum((s - sum(scores) / len(scores)) ** 2 for s in scores) / len(scores)) ** 0.5,
            'mean_response_time': sum(response_times) / len(response_times),
            'total_a2a_calls': sum(r.a2a_calls_made for r in self.results),
            'error_count': len([r for r in self.results if r.error]),
        }
        
        # Score distribution
        score_ranges = {
            '0.0-0.2': len([s for s in scores if 0.0 <= s < 0.2]),
            '0.2-0.4': len([s for s in scores if 0.2 <= s < 0.4]),
            '0.4-0.6': len([s for s in scores if 0.4 <= s < 0.6]),
            '0.6-0.8': len([s for s in scores if 0.6 <= s < 0.8]),
            '0.8-1.0': len([s for s in scores if 0.8 <= s <= 1.0]),
        }
        stats['score_distribution'] = score_ranges
        
        # Category-wise statistics
        categories = set(r.category for r in self.results)
        category_stats = {}
        for category in categories:
            cat_results = [r for r in self.results if r.category == category]
            cat_scores = [r.utility_score for r in cat_results]
            if cat_scores:
                category_stats[category] = {
                    'count': len(cat_results),
                    'mean_score': sum(cat_scores) / len(cat_scores),
                    'min_score': min(cat_scores),
                    'max_score': max(cat_scores),
                }
        stats['category_breakdown'] = category_stats
        
        return stats

    def _print_results(self, stats: Dict[str, Any]):
        """Print comprehensive benchmark results"""
        print("\n" + "="*80)
        print("🎯 MEDICAL SEARCH BENCHMARK RESULTS")
        print("="*80)
        
        print(f"📊 Overall Performance:")
        print(f"   Total Tasks: {stats['total_tasks']}")
        print(f"   Mean Score: {stats['mean_score']:.3f}")
        print(f"   Median Score: {stats['median_score']:.3f}")
        print(f"   Score Range: {stats['min_score']:.3f} - {stats['max_score']:.3f}")
        print(f"   Standard Deviation: {stats['std_score']:.3f}")
        
        print(f"\n⏱️  Performance Metrics:")
        print(f"   Mean Response Time: {stats['mean_response_time']:.2f}s")
        print(f"   Total A2A Calls: {stats['total_a2a_calls']}")
        print(f"   Error Count: {stats['error_count']}")
        
        print(f"\n📈 Score Distribution:")
        for range_name, count in stats['score_distribution'].items():
            percentage = (count / stats['total_tasks']) * 100
            print(f"   {range_name}: {count} tasks ({percentage:.1f}%)")
        
        if stats['category_breakdown']:
            print(f"\n🏷️  Category Breakdown:")
            for category, cat_stats in stats['category_breakdown'].items():
                print(f"   {category.title()}:")
                print(f"     Count: {cat_stats['count']}")
                print(f"     Mean Score: {cat_stats['mean_score']:.3f}")
                print(f"     Range: {cat_stats['min_score']:.3f} - {cat_stats['max_score']:.3f}")
        
        print("="*80)

async def run_medical_search_benchmark():
    """Run the medical search benchmark with JSON output and timeout handling"""
    try:
        print("🔧 Initializing Medical Search Benchmark...")
        print("🎯 COMPLETE FRONTEND EQUIVALENCE: User Input → ORIGINAL Host Agent → A2A → Remote Agent")
        print("📊 Using ORIGINAL coordinator_benchtest_medical.py logic")
        
        # Create benchmark instance
        benchmark = MedicalSearchBenchmark()
        
        # Run benchmark with timeout handling
        stats = await benchmark.run_benchmark(verbose=True)
        
        # Save results to JSON
        import time
        from pathlib import Path
        from dataclasses import asdict
        
        timestamp = int(time.time())
        results_file = f"benchmark_results/medical_search_benchmark_{timestamp}.json"
        
        results_data = {
            "benchmark_info": {
                "name": "medical_search_benchmark",
                "version": "1.0.0", 
                "timestamp": timestamp,
                "total_tasks": len(benchmark.tasks),
                "evaluation_method": "semantic_similarity_utility_scoring",
                "agent_flow": "User Input → ORIGINAL Host Agent → A2A Protocol → Remote Agents",
                "description": "Medical search benchmark using coordinator_benchtest_medical.py",
                "categories": ["basic"],
                "frontend_equivalence": True,
                "original_coordinator": True
            },
            "tasks": [
                {
                    "id": task.id,
                    "prompt": task.prompt,
                    "comment": task.comment,
                    "ground_truth_output": task.ground_truth_output,
                    "difficulty": task.difficulty,
                    "category": task.category
                }
                for task in benchmark.tasks
            ],
            "results": [asdict(result) for result in benchmark.results],
            "statistics": stats
        }
        
        # Create results directory and save
        Path(results_file).parent.mkdir(parents=True, exist_ok=True)
        with open(results_file, 'w', encoding='utf-8') as f:
            json.dump(results_data, f, ensure_ascii=False, indent=2)
        
        print(f"💾 Results saved to: {results_file}")
        print(f"📊 Final Statistics:")
        print(f"   Total Tasks: {stats.get('total_tasks', 0)}")
        print(f"   Mean Score: {stats.get('mean_score', 0):.3f}")
        print(f"   Error Count: {stats.get('error_count', 0)}")
        
        return stats
        
    except Exception as e:
        print(f"❌ Benchmark failed: {e}")
        import traceback
        traceback.print_exc()
        return None

if __name__ == "__main__":
    asyncio.run(run_medical_search_benchmark())