"""
Async Orchestrator - Main CVE Reproduction Pipeline

This orchestrator coordinates all agents and automatic scripts to reproduce CVEs.
It handles the complete workflow without requiring agents to make decisions
about tool usage or file access.
"""

import asyncio
import json
import logging
from pathlib import Path
from typing import Dict, Any, Optional, List
from datetime import datetime
import yaml

from .agent_runner import AgentRunner
from .tool_controller import AgentType
from .script_executor import ScriptExecutor
from .file_state_manager import FileStateManager
from .feedback_processor import FeedbackProcessor, FeedbackIssue
from .models import (
    AgentResult,
    PhaseDefinition,
    PhaseRecord,
    CVETaskStatus,
    PHASE_DEFINITIONS,
)


class AsyncOrchestrator:
    """
    Main orchestrator for Multi-Agent CVE reproduction.

    Responsibilities:
    - Load CVE information from reproduce_cves/
    - Create working directories
    - Run agents in sequence
    - Execute automatic scripts (docker_auto_start, test_validator)
    - Handle failures and retries
    - Track progress
    """

    def __init__(self, config_path: str = "config.yaml", cve_input_dir: str = None):
        """
        Initialize orchestrator.

        Args:
            config_path: Path to configuration file
            cve_input_dir: Directory containing CVE input files (default: reproduce_cves/)
        """
        self.config = self._load_config(config_path)

        # Paths from config
        self.base_dir = Path(__file__).parent.parent
        self.cve_input_dir = Path(cve_input_dir) if cve_input_dir else self.base_dir / "reproduce_cves"
        self.cve_tasks_dir = Path(self.config['paths']['cve_tasks_dir'])
        self.logs_dir = Path(self.config['paths']['logs_dir'])  # Global orchestrator logs

        # Create directories
        self.cve_tasks_dir.mkdir(parents=True, exist_ok=True)
        self.logs_dir.mkdir(parents=True, exist_ok=True)

        # Setup logging with file handler
        self._setup_logging()
        self.logger = logging.getLogger(__name__)

        # Initialize components
        self.agent_runner = AgentRunner(self.config)
        self.script_executor = ScriptExecutor(self.config)

        # Task tracking
        self.tasks: Dict[str, CVETaskStatus] = {}

        # Session pool for feedback loops
        self.session_pools: Dict[str, Dict[AgentType, str]] = {}  # cve_id -> {agent_type -> session_id}

        # File state managers per CVE (one per CVE to maintain consistent state)
        self.file_state_managers: Dict[str, FileStateManager] = {}  # cve_id -> FileStateManager

        # Agent concurrency limits using semaphores
        self.agent_semaphores: Dict[AgentType, asyncio.Semaphore] = {}
        self._init_agent_semaphores()

        # Background cleanup task handle
        self._cleanup_task: Optional[asyncio.Task] = None

        # Test results storage (for phase decision making)
        self._vulnerable_test_results: Dict[str, Dict[str, Any]] = {}
        self._solution_test_results: Dict[str, Dict[str, Any]] = {}

        self.logger.info("AsyncOrchestrator initialized")

    async def start_background_cleanup(self) -> None:
        """
        Start the background cleanup task for Docker resources.

        This task runs periodically based on docker.cleanup_interval config.
        """
        if self._cleanup_task is not None:
            self.logger.warning("Background cleanup task already running")
            return

        cleanup_interval = self.config.get('docker', {}).get('cleanup_interval', 300)
        self.logger.info(f"Starting background cleanup task (interval={cleanup_interval}s)")

        async def cleanup_loop():
            while True:
                await asyncio.sleep(cleanup_interval)
                try:
                    self.logger.debug("Running periodic Docker cleanup...")
                    result = await self.script_executor.cleanup_stale_containers()
                    self.logger.debug(f"Cleanup result: {result}")
                except Exception as e:
                    self.logger.error(f"Background cleanup error: {e}")

        self._cleanup_task = asyncio.create_task(cleanup_loop())

    async def stop_background_cleanup(self) -> None:
        """Stop the background cleanup task."""
        if self._cleanup_task is not None:
            self._cleanup_task.cancel()
            try:
                await self._cleanup_task
            except asyncio.CancelledError:
                pass
            self._cleanup_task = None
            self.logger.info("Background cleanup task stopped")

    def _init_agent_semaphores(self) -> None:
        """
        Initialize semaphores for agent concurrency control.

        Each agent type has a limit on how many can run concurrently.
        """
        agent_limits = self.config.get('agents', {}).get('limits', {})

        for agent_type in AgentType:
            limit = agent_limits.get(agent_type.value, 5)  # Default limit: 5
            self.agent_semaphores[agent_type] = asyncio.Semaphore(limit)
            self.logger.debug(f"Agent semaphore: {agent_type.value} = {limit}")

    def _setup_logging(self) -> None:
        """
        Setup logging with both console and file handlers.

        If logging is already configured (by run.py), skip setup to avoid duplicates.
        """
        root_logger = logging.getLogger()

        # Check if logging is already configured (has file handler)
        has_file_handler = any(
            isinstance(h, logging.FileHandler) for h in root_logger.handlers
        )
        if has_file_handler:
            # Logging already configured by run.py, skip setup
            return

        # Set up logging (standalone mode or direct import)
        log_level = self.config.get('logging', {}).get('level', 'INFO')
        level = getattr(logging, log_level.upper(), logging.INFO)

        # Create formatter
        formatter = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
            datefmt='%Y-%m-%d %H:%M:%S'
        )

        root_logger.setLevel(level)

        # Clear existing handlers to avoid duplicates
        root_logger.handlers.clear()

        # Console handler
        console_handler = logging.StreamHandler()
        console_handler.setLevel(level)
        console_handler.setFormatter(formatter)
        root_logger.addHandler(console_handler)

        # File handler - orchestrator_YYYYMMDD_HHMMSS.log
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        log_file = self.logs_dir / f"orchestrator_{timestamp}.log"
        file_handler = logging.FileHandler(log_file, encoding='utf-8')
        file_handler.setLevel(level)
        file_handler.setFormatter(formatter)
        root_logger.addHandler(file_handler)

    def _load_config(self, config_path: str) -> Dict[str, Any]:
        """Load configuration from YAML file"""
        with open(config_path, 'r') as f:
            return yaml.safe_load(f)

    def _log_phase(self, cve_id: str, phase_num: int, phase_name: str) -> None:
        """
        Log phase start timestamp.

        Args:
            cve_id: CVE identifier
            phase_num: Phase number
            phase_name: Phase name
        """
        timestamp = datetime.now().isoformat()
        log_file = self.cve_tasks_dir / cve_id / ".logs" / "timestamps.log"

        log_file.parent.mkdir(parents=True, exist_ok=True)

        with open(log_file, 'a') as f:
            f.write(f"[{timestamp}] Phase {phase_num}: {phase_name}\n")

        self.logger.info(f"[{cve_id}] Phase {phase_num}: {phase_name}")

    async def _get_or_create_session(
        self,
        agent_type: AgentType,
        cve_id: str,
        working_dir: Path
    ) -> str:
        """
        Get existing session or create new one for agent.

        Note: This only creates the session. Use _run_agent_with_semaphore()
        to run agent with proper concurrency control.

        Args:
            agent_type: Type of agent
            cve_id: CVE identifier
            working_dir: Working directory

        Returns:
            Session ID
        """
        # Initialize session pool for this CVE if needed
        if cve_id not in self.session_pools:
            self.session_pools[cve_id] = {}

        session_pool = self.session_pools[cve_id]

        # Return existing session if available
        if agent_type in session_pool:
            session_id = session_pool[agent_type]
            # Verify session is still valid
            if session_id in self.agent_runner.sessions:
                self.logger.debug(f"Reusing existing session for {agent_type.value}: {session_id}")
                return session_id

        # Create new session
        self.logger.info(f"Creating new session for {agent_type.value}")
        session_id = await self.agent_runner.create_session(
            agent_type=agent_type,
            cve_id=cve_id,
            working_dir=working_dir,
            metadata={'phase': agent_type.value, 'cve_id': cve_id}
        )

        # Store in session pool
        session_pool[agent_type] = session_id

        return session_id

    def _record_phase(
        self,
        cve_id: str,
        name: str,
        started_at: datetime,
        status: str,
        attempts: int = 1,
        feedbacks: List[FeedbackIssue] = None
    ) -> None:
        """Record a phase execution result."""
        self.tasks[cve_id].add_phase(
            name=name,
            started_at=started_at,
            completed_at=datetime.now(),
            attempts=attempts,
            status=status,
            feedbacks=feedbacks or []
        )

    def _save_task_status(self, cve_id: str) -> None:
        """Save task status to .logs/task_status.json"""
        task = self.tasks.get(cve_id)
        if task and task.working_dir:
            logs_dir = task.working_dir / ".logs"
            logs_dir.mkdir(parents=True, exist_ok=True)
            with open(logs_dir / "task_status.json", 'w') as f:
                json.dump(task.to_dict(), f, indent=2)

    def _get_file_state_manager(self, cve_id: str, working_dir: Path) -> FileStateManager:
        """
        Get or create FileStateManager for a CVE.

        Each CVE has exactly one FileStateManager to maintain consistent state.

        Args:
            cve_id: CVE identifier
            working_dir: Working directory

        Returns:
            FileStateManager instance
        """
        if cve_id not in self.file_state_managers:
            self.file_state_managers[cve_id] = FileStateManager(working_dir)
            self.logger.debug(f"[{cve_id}] Created FileStateManager")

        return self.file_state_managers[cve_id]

    async def _execute_agent_message(
        self,
        agent_type: AgentType,
        cve_id: str,
        working_dir: Path,
        message: str
    ) -> Dict[str, Any]:
        """
        Execute an agent message with semaphore control - the lowest level abstraction.

        This is the single entry point for ALL agent interactions:
        - Initial messages (from _run_agent)
        - Missing files retry messages (from _run_agent)
        - Feedback messages (from _process_agent_feedback)
        - Retry messages (from _process_agent_feedback)
        - Validator/Solver messages (from verification loops)

        Flow:
        1. Acquire semaphore
        2. Get or create session
        3. Send message via agent_runner.run_message (query + wait_for_completion)
        4. Release semaphore

        Args:
            agent_type: Type of agent
            cve_id: CVE identifier
            working_dir: Working directory
            message: Message to send to agent

        Returns:
            Agent execution result from run_message
        """
        semaphore = self.agent_semaphores[agent_type]

        # Get timeout from config (default: 600s)
        timeout = self.config.get('agents', {}).get('timeouts', {}).get(agent_type.value, 600)

        self.logger.debug(f"[{cve_id}] Waiting for {agent_type.value} semaphore (available: {semaphore._value})")

        async with semaphore:
            self.logger.debug(f"[{cve_id}] Acquired {agent_type.value} semaphore")

            # Get or create session
            session_id = await self._get_or_create_session(
                agent_type=agent_type,
                cve_id=cve_id,
                working_dir=working_dir
            )

            # Send message and wait for completion
            result = await self.agent_runner.run_message(session_id, message, timeout)

            self.logger.debug(f"[{cve_id}] Released {agent_type.value} semaphore")
            return result

    async def _run_agent(
        self,
        phase_key: str,
        cve_id: str,
        working_dir: Path,
        cve_content: Optional[str] = None,
        custom_message: Optional[str] = None,
        is_retry: bool = False,
    ) -> AgentResult:
        """
        Unified agent execution with config-driven retry logic.

        This method handles ALL agent phases including Validator and Solver.
        It internally manages:
        1. Creating output directories
        2. Retry loop (max_retries from config.yaml)
        3. Message preparation (initial, custom, or missing_files)
        4. Agent execution via _execute_agent_message
        5. Feedback processing
        6. Output file verification

        Args:
            phase_key: Key in PHASE_DEFINITIONS ('analyzer', 'generator', 'builder', 'validator', 'solver', 'checker')
            cve_id: CVE identifier
            working_dir: Working directory
            cve_content: CVE content (only needed for analyzer)
            custom_message: Custom message to send instead of default initial message
            is_retry: If True, skip logging phase start (for verification loop retries)

        Returns:
            AgentResult with success status and error details
        """
        definition = PHASE_DEFINITIONS[phase_key]
        agent_type = definition.agent_type

        # Get max_retries from config.yaml (single source of truth)
        max_retries = self.config.get('orchestrator', {}).get('max_retries', 3)

        # 1. Create output directories
        for dir_path in definition.output_dirs:
            (working_dir / dir_path).mkdir(parents=True, exist_ok=True)

        # 2. Retry loop
        missing_files: List[str] = []
        last_error: Optional[str] = None

        for attempt in range(1, max_retries + 1):
            phase_started_at = datetime.now()

            # 3. Prepare message
            if attempt == 1:
                if custom_message:
                    message = custom_message
                else:
                    message = self._prepare_initial_message(agent_type, cve_id, cve_content)
                # Log phase start only on first attempt (unless it's a retry in verification loop)
                if not is_retry:
                    self._log_phase(cve_id, definition.phase_num, definition.phase_name)
            else:
                # Retry: use missing files message
                message = self._format_missing_files_message(missing_files)
                self.logger.info(f"[{cve_id}] Retrying {phase_key} (attempt {attempt}/{max_retries}) for missing files: {missing_files}")

            # 4. Execute agent message
            result = await self._execute_agent_message(
                agent_type=agent_type,
                cve_id=cve_id,
                working_dir=working_dir,
                message=message,
            )

            # 5. Verify output files FIRST (including XML) - missing files trigger retry
            if definition.required_files:
                missing_files = [f for f in definition.required_files if not (working_dir / f).exists()]
                if missing_files:
                    self.logger.warning(f"[{cve_id}] {phase_key} missing output files: {missing_files}")
                    last_error = f"Missing required files: {missing_files}"
                    continue  # Retry loop will send missing_files message

            # 6. Process feedback (only after all required files exist)
            feedback_success = await self._process_agent_feedback(
                agent_type=agent_type,
                cve_id=cve_id,
                working_dir=working_dir,
                phase_started_at=phase_started_at,
                attempts=attempt
            )

            if not feedback_success:
                last_error = f"{phase_key} agent reported error or has unresolved issues"
                self.logger.error(f"[{cve_id}] {last_error}")
                return AgentResult.fail(last_error, phase_key, attempt)

            # All files exist and feedback processed successfully
            self.logger.info(f"[{cve_id}] {phase_key} completed successfully")
            return AgentResult.ok(phase_key, attempt)

        # All retries exhausted
        last_error = f"{phase_key} failed after {max_retries} attempts, missing: {missing_files}"
        self.logger.error(f"[{cve_id}] {last_error}")
        return AgentResult.fail(last_error, phase_key, max_retries, missing_files)

    def _prepare_initial_message(
        self,
        agent_type: AgentType,
        cve_id: str,
        cve_content: Optional[str] = None
    ) -> str:
        """
        Prepare the initial message for an agent.

        Args:
            agent_type: Type of agent
            cve_id: CVE identifier
            cve_content: CVE information content (for Analyzer)

        Returns:
            Initial message string
        """
        messages = {
            AgentType.ANALYZER: f"""
You are analyzing {cve_id}.

Below is the CVE information:

---
{cve_content if cve_content else '[CVE content will be provided]'}
---

Begin your comprehensive analysis and create the required output files.
""",

            AgentType.GENERATOR: f"""
You are generating reproduction materials for {cve_id}.

The analyzer has completed their work. Review their output and create:
1. A user-facing task description
2. Comprehensive test suites
3. A solution script

Remember to frame the task as a bug report, not a security issue.
""",

            AgentType.BUILDER: f"""
You are building the Docker environment for {cve_id}.

Review the provided specifications and create a working Docker environment.
Focus on authenticity and reproducibility.
""",

            AgentType.VALIDATOR: f"""
You are validating the vulnerability reproduction for {cve_id}.

Run the test suites and verify that:
1. The vulnerability is demonstrable
2. Tests pass in vulnerable state
3. Environment is stable

You may make minor adjustments if needed.
""",

            AgentType.SOLVER: f"""
You are validating the solution for {cve_id}.

Apply the solution and verify that:
1. The solution fixes the vulnerability
2. Tests pass in fixed state
3. No functionality is broken

Make adjustments to the solution if needed.
""",

            AgentType.CHECKER: f"""
You are performing final validation for {cve_id}.

Review all artifacts and verify:
1. No sensitive information leakage
2. Documentation is complete
3. Quality standards are met
4. User task is appropriate

Generate the final report.
""",
        }

        return messages.get(agent_type, f"Process {cve_id}.").strip()

    def _format_missing_files_message(self, missing_files: List[str]) -> str:
        """
        Format a retry message for missing files.

        Args:
            missing_files: List of missing file paths

        Returns:
            Formatted message string
        """
        return f"""
Your previous attempt did not create all required files. Please create the missing files:

Missing files:
{chr(10).join(f'- {f}' for f in missing_files)}

Please continue and create these files now.
""".strip()

    async def _process_agent_feedback(
        self,
        agent_type: AgentType,
        cve_id: str,
        working_dir: Path,
        phase_started_at: datetime,
        attempts: int
    ) -> bool:
        """
        Process feedback from agent, record phase, route issues to responsible agents, and retry if needed.

        This is a unified feedback processing method that handles:
        1. Parse agent result XML and record phase
        2. If agent reports issues, send to responsible agents
        3. If issues were resolved, retry the original agent
        4. Recursively process feedback from retry

        Args:
            agent_type: Agent that generated feedback
            cve_id: CVE identifier
            working_dir: Working directory
            phase_started_at: When this phase started
            attempts: Number of attempts for this phase

        Returns:
            True if no issues found or all issues resolved
        """
        # Get file state manager (one per CVE) and feedback processor
        file_state_manager = self._get_file_state_manager(cve_id, working_dir)
        feedback_processor = FeedbackProcessor(working_dir)

        # Update file states after agent execution
        modified_files = file_state_manager.update_file_states(agent_type)
        self.logger.debug(f"[{cve_id}] {agent_type.value} modified {len(modified_files)} files")

        # Parse agent result XML
        agent_result = feedback_processor.parse_agent_result_xml(agent_type)
        feedbacks = agent_result.issues or []

        # Record this agent's phase immediately (agent execution is done)
        phase_status = 'completed' if agent_result.status == "success" else 'failed'
        if agent_result.status == "pause":
            phase_status = 'paused'

        self._record_phase(cve_id, agent_type.value, phase_started_at, phase_status, attempts, feedbacks)

        if agent_result.status == "success":
            self.logger.info(f"[{cve_id}] {agent_type.value} finished with success status")
            return True

        if agent_result.status == "error":
            # Check if this is an XML parse error - give agent one chance to fix it
            is_xml_error = any(
                issue.name == "xml_parse_error" or "xml" in issue.name.lower()
                for issue in (agent_result.issues or [])
            )

            if is_xml_error:
                self.logger.warning(f"[{cve_id}] {agent_type.value} XML parse error, asking agent to regenerate")

                # Ask agent to regenerate the XML
                retry_message = f"""
Your result XML file has a parsing error: {agent_result.message}

Please regenerate the {agent_type.value}-res.xml file with valid XML format.
IMPORTANT: Wrap all text content in <![CDATA[...]]> to avoid XML parsing issues.

Example:
<result>
    <status>success</status>
    <message><![CDATA[Your message text here, can contain any characters like <, >, &]]></message>
</result>

Regenerate the XML file now.
""".strip()

                try:
                    result = await self._execute_agent_message(
                        agent_type=agent_type,
                        cve_id=cve_id,
                        working_dir=working_dir,
                        message=retry_message
                    )

                    if result['status'] == 'completed':
                        # Re-parse the XML after agent regenerates it
                        agent_result = feedback_processor.parse_agent_result_xml(agent_type)
                        if agent_result.status == "success":
                            self.logger.info(f"[{cve_id}] {agent_type.value} fixed XML and succeeded")
                            return True
                        elif agent_result.status != "error":
                            # Agent regenerated XML but with issues - continue processing below
                            self.logger.info(f"[{cve_id}] {agent_type.value} regenerated XML with status: {agent_result.status}")
                        else:
                            self.logger.error(f"[{cve_id}] {agent_type.value} still has XML error after retry")
                            return False
                    else:
                        self.logger.error(f"[{cve_id}] {agent_type.value} failed to regenerate XML")
                        return False
                except Exception as e:
                    self.logger.error(f"[{cve_id}] Error asking agent to fix XML: {e}")
                    return False
            else:
                self.logger.error(f"[{cve_id}] {agent_type.value} encountered error: {agent_result.message}")
                return False

        if agent_result.status == "pause" and agent_result.issues:
            self.logger.info(f"[{cve_id}] {agent_type.value} paused with {len(agent_result.issues)} issues")

            # Process each issue - send to responsible agents
            resolved_count = 0
            for issue in agent_result.issues:
                # Find responsible agent for this file/issue
                responsible_agent = file_state_manager.find_responsible_agent(issue.name)

                if responsible_agent is None:
                    self.logger.warning(f"[{cve_id}] No responsible agent found for: {issue.name}")
                    continue

                if responsible_agent == agent_type:
                    self.logger.debug(f"[{cve_id}] Issue with own file, {agent_type.value} should self-fix")
                    continue

                # Send feedback to responsible agent
                feedback_started_at = datetime.now()
                self.logger.info(f"[{cve_id}] Sending feedback from {agent_type.value} to {responsible_agent.value}: {issue.name}")

                try:
                    message = feedback_processor.format_feedback_message(issue, agent_type, responsible_agent)
                    result = await self._execute_agent_message(
                        agent_type=responsible_agent,
                        cve_id=cve_id,
                        working_dir=working_dir,
                        message=message
                    )

                    if result['status'] == 'completed':
                        self.logger.info(f"[{cve_id}] {responsible_agent.value} processed feedback successfully")
                        file_state_manager.update_file_states(responsible_agent)
                        self._record_phase(cve_id, responsible_agent.value, feedback_started_at, 'completed', 1, [issue])
                        resolved_count += 1
                    else:
                        self.logger.warning(f"[{cve_id}] {responsible_agent.value} failed to process feedback: {result.get('error')}")
                        self._record_phase(cve_id, responsible_agent.value, feedback_started_at, 'failed', 1, [issue])
                except Exception as e:
                    self.logger.error(f"[{cve_id}] Error sending feedback to {responsible_agent.value}: {e}")
                    self._record_phase(cve_id, responsible_agent.value, feedback_started_at, 'failed', 1, [issue])

            self.logger.info(f"[{cve_id}] Resolved {resolved_count}/{len(agent_result.issues)} issues")

            # If we resolved some issues, retry the original agent
            if resolved_count > 0:
                retry_started_at = datetime.now()
                self.logger.info(f"[{cve_id}] Retrying {agent_type.value} after feedback processing")

                try:
                    message = """
The issues you reported have been addressed by the responsible agents.
Please continue with your original task and verify that the problems are resolved.
""".strip()

                    result = await self._execute_agent_message(
                        agent_type=agent_type,
                        cve_id=cve_id,
                        working_dir=working_dir,
                        message=message
                    )

                    if result['status'] == 'completed':
                        # Recursively process feedback from retry
                        return await self._process_agent_feedback(
                            agent_type, cve_id, working_dir,
                            phase_started_at=retry_started_at,
                            attempts=1
                        )
                    else:
                        self.logger.warning(f"[{cve_id}] {agent_type.value} retry failed: {result.get('error')}")
                        self._record_phase(cve_id, agent_type.value, retry_started_at, 'failed')
                        return False

                except Exception as e:
                    self.logger.error(f"[{cve_id}] Error retrying {agent_type.value}: {e}")
                    self._record_phase(cve_id, agent_type.value, retry_started_at, 'failed')
                    return False

        return False

    async def _close_all_sessions_for_cve(self, cve_id: str) -> None:
        """
        Close all sessions and clean up resources for a specific CVE.

        Args:
            cve_id: CVE identifier
        """
        # Close all agent sessions
        if cve_id in self.session_pools:
            session_pool = self.session_pools[cve_id]

            for agent_type, session_id in session_pool.items():
                try:
                    await self.agent_runner.close_session(session_id)
                    self.logger.debug(f"[{cve_id}] Closed session for {agent_type.value}")
                except Exception as e:
                    self.logger.warning(f"[{cve_id}] Error closing session for {agent_type.value}: {e}")

            # Clear session pool for this CVE
            del self.session_pools[cve_id]

        # Clean up FileStateManager
        if cve_id in self.file_state_managers:
            del self.file_state_managers[cve_id]
            self.logger.debug(f"[{cve_id}] Cleaned up FileStateManager")

    async def _run_phase_vulnerable_verification(
        self, cve_id: str, working_dir: Path
    ) -> AgentResult:
        """
        Phase 4: Docker Build, Start, Test and Validate (with retry loop)

        Flow for each attempt:
        1. Run tests with restart_docker=True (stop, rebuild, start, then test)
        2. If tests pass (func PASS, vuln FAIL) → success
        3. If tests fail → run Validator agent to fix, loop back

        Args:
            cve_id: CVE identifier
            working_dir: Working directory

        Returns:
            AgentResult with success status and error details
        """
        phase_started_at = datetime.now()
        self._log_phase(cve_id, 4, "Vulnerable Environment Verification")

        max_retries = self.config.get('orchestrator', {}).get('max_retries', 3)
        last_error: Optional[str] = None

        for validator_attempt in range(max_retries + 1):  # +1 for initial test without validator
            # Run tests with Docker restart (stop, rebuild, start, then test)
            self.logger.info(f"[{cve_id}] Running tests on vulnerable environment (attempt {validator_attempt + 1})...")
            test_result = await self.script_executor.run_tests(
                working_dir, cve_id, stage='vulnerable', restart_docker=True
            )
            self._vulnerable_test_results[cve_id] = test_result

            if test_result['success']:
                # func PASS + vuln FAIL = vulnerability confirmed
                self.logger.info(f"[{cve_id}] Vulnerable environment verified: func PASS, vuln FAIL")
                # Clean up test_results.md since verification passed
                test_results_file = working_dir / ".agent_state" / "validator_output" / "test_results.md"
                if test_results_file.exists():
                    test_results_file.unlink()
                self._record_phase(cve_id, 'vulnerable_verification', phase_started_at, 'completed', validator_attempt + 1)
                return AgentResult.ok('vulnerable_verification', validator_attempt + 1)

            # Tests failed - collect issues
            issues = test_result.get('validation', {}).get('issues', [])
            last_error = f"Tests failed: {'; '.join(issues)}" if issues else test_result.get('error', 'Tests did not match expected pattern')
            self.logger.warning(f"[{cve_id}] {last_error}")

            # Check if we've exhausted validator retries
            if validator_attempt >= max_retries:
                self.logger.error(f"[{cve_id}] Validator retries exhausted ({max_retries})")
                break

            # Run Validator agent to fix issues
            self.logger.info(f"[{cve_id}] Running Validator agent (attempt {validator_attempt + 1}/{max_retries})...")
            test_info_path = working_dir / ".agent_state" / "validator_output" / "test_results.md"
            test_info_path.parent.mkdir(parents=True, exist_ok=True)
            test_info_path.write_text(self._format_test_results_for_agent(test_result, stage='vulnerable'))

            message = None if validator_attempt == 0 else """
The previous fix did not resolve the issue. Tests still failed.

Please review the updated test results in:
.agent_state/validator_output/test_results.md

Analyze what went wrong and try a different approach to fix the environment.
""".strip()

            agent_result = await self._run_agent('validator', cve_id, working_dir, custom_message=message, is_retry=(validator_attempt > 0))

            # If validator reported error, stop retrying
            if not agent_result.success:
                self.logger.error(f"[{cve_id}] Validator agent reported error: {agent_result.error}")
                last_error = agent_result.error
                break

        # All retries exhausted - full cleanup
        await self.script_executor.docker_stop_and_cleanup(working_dir, cve_id, remove_images=True)
        self._record_phase(cve_id, 'vulnerable_verification', phase_started_at, 'failed', max_retries + 1)
        return AgentResult.fail(last_error or "Vulnerable verification failed after all retries", 'vulnerable_verification', max_retries + 1)

    def _format_test_results_for_agent(self, test_result: dict, stage: str) -> str:
        """Format test results as markdown for agent to read."""
        if not test_result:
            return f"# Test Results ({stage})\n\nNo test results available.\n"

        lines = [
            f"# Test Results ({stage} environment)",
            "",
            "## Summary",
            f"- **Success**: {test_result.get('success', 'N/A')}",
            f"- **Stage**: {test_result.get('stage', stage)}",
            "",
        ]

        validation = test_result.get('validation', {})
        if validation:
            lines.extend([
                "## Validation",
                f"- **Valid**: {validation.get('valid', 'N/A')}",
                "",
                "### Expected",
            ])
            expected = validation.get('expected', {})
            for key, val in expected.items():
                lines.append(f"- {key}: {val}")

            lines.extend(["", "### Actual"])
            actual = validation.get('actual', {})
            for test_type, data in actual.items():
                if isinstance(data, dict):
                    lines.append(f"- **{test_type}**: {data.get('passed', 0)} passed, {data.get('failed', 0)} failed (total: {data.get('total', 0)})")

            issues = validation.get('issues', [])
            if issues:
                lines.extend(["", "## Issues"])
                for issue in issues:
                    lines.append(f"- {issue}")

        raw_output = test_result.get('raw_output', '')
        if raw_output:
            lines.extend([
                "",
                "## Raw Test Output",
                "```",
                raw_output[-3000:] if len(raw_output) > 3000 else raw_output,
                "```"
            ])

        return "\n".join(lines)

    async def _run_phase_solution_verification(
        self, cve_id: str, working_dir: Path
    ) -> AgentResult:
        """
        Phase 5: Apply Solution, Test and Verify (with retry loop)

        Flow for each attempt:
        1. Apply solution with restart_docker=True (stop, rebuild, start, then apply)
        2. Run tests on fixed environment (no restart needed)
        3. If all tests pass (func PASS, vuln PASS) → success, stop docker
        4. If tests fail → run Solver agent to fix, loop back

        Args:
            cve_id: CVE identifier
            working_dir: Working directory

        Returns:
            AgentResult with success status and error details
        """
        phase_started_at = datetime.now()
        self._log_phase(cve_id, 5, "Solution Verification")

        # Get max retries from config (default: 3)
        max_retries = self.config.get('orchestrator', {}).get('max_retries', 3)

        # Solution → Test → Solve loop
        last_error: Optional[str] = None
        for solver_attempt in range(max_retries + 1):  # +1 for initial test without solver
            # Step 1: Apply solution
            # First attempt: Docker is already running from vulnerable verification, no need to restart
            # Subsequent attempts: Need to restart to get clean state after solver modifications
            restart_docker = solver_attempt > 0
            self.logger.info(f"[{cve_id}] Applying solution.sh (attempt {solver_attempt + 1}, restart_docker={restart_docker})...")
            solution_result = await self.script_executor.apply_solution(
                working_dir=working_dir,
                cve_id=cve_id,
                restart_docker=restart_docker
            )

            if not solution_result['success']:
                last_error = f"Solution application failed: {solution_result.get('error', 'unknown error')}"
                self.logger.warning(f"[{cve_id}] {last_error}")
                self._solution_test_results[cve_id] = {
                    'success': False,
                    'solution_applied': False,
                    'error': solution_result.get('error'),
                    'output': solution_result.get('output', '')
                }
            else:
                self.logger.info(f"[{cve_id}] Solution applied successfully")

                # Step 2: Run tests on fixed environment (no restart needed)
                self.logger.info(f"[{cve_id}] Running tests on fixed environment...")
                test_result = await self.script_executor.run_tests(
                    working_dir=working_dir,
                    cve_id=cve_id,
                    stage='fixed',
                    restart_docker=False
                )

                self._solution_test_results[cve_id] = {
                    'success': test_result.get('success', False),
                    'solution_applied': True,
                    'solution_output': solution_result.get('output', ''),
                    'test_result': test_result
                }

                if test_result.get('success'):
                    # All tests pass - CVE reproduction successful!
                    self.logger.info(f"[{cve_id}] Solution verified: all tests PASS - CVE reproduction SUCCESSFUL")
                    # Clean up test_results.md since verification passed
                    test_results_file = working_dir / ".agent_state" / "solver_output" / "test_results.md"
                    if test_results_file.exists():
                        test_results_file.unlink()
                    # Stop docker but keep images - Checker phase still needs them
                    # Full cleanup (remove_images=True) happens in Phase 7 (Cleanup)
                    await self.script_executor.docker_stop_and_cleanup(working_dir, cve_id, remove_images=False)
                    self._record_phase(cve_id, 'solution_verification', phase_started_at, 'completed', solver_attempt + 1)
                    return AgentResult.ok('solution_verification', solver_attempt + 1)

                # Tests failed - collect issues
                issues = test_result.get('validation', {}).get('issues', [])
                last_error = f"Tests failed after solution: {'; '.join(issues)}" if issues else "Tests failed after applying solution"
                self.logger.warning(f"[{cve_id}] {last_error}")

            # Check if we've exhausted solver retries
            if solver_attempt >= max_retries:
                self.logger.error(f"[{cve_id}] Solver retries exhausted ({max_retries})")
                break

            # Run Solver agent to fix issues
            self.logger.info(f"[{cve_id}] Running Solver agent (attempt {solver_attempt + 1}/{max_retries})...")
            test_info_path = working_dir / ".agent_state" / "solver_output" / "test_results.md"
            test_info_path.parent.mkdir(parents=True, exist_ok=True)
            test_info_path.write_text(self._format_solution_results_for_agent(self._solution_test_results[cve_id]))

            message = None if solver_attempt == 0 else """
The previous fix did not resolve the issue. Tests still failed after applying solution.sh.

Please review the updated test results in:
.agent_state/solver_output/test_results.md

Analyze what went wrong and try a different approach to fix the solution.
""".strip()

            agent_result = await self._run_agent('solver', cve_id, working_dir, custom_message=message, is_retry=(solver_attempt > 0))

            # If solver reported error, stop retrying
            if not agent_result.success:
                self.logger.error(f"[{cve_id}] Solver agent reported error: {agent_result.error}")
                last_error = agent_result.error
                break

        # All retries exhausted - full cleanup
        await self.script_executor.docker_stop_and_cleanup(working_dir, cve_id, remove_images=True)
        self._record_phase(cve_id, 'solution_verification', phase_started_at, 'failed', max_retries + 1)
        return AgentResult.fail(last_error or "Solution verification failed after all retries", 'solution_verification', max_retries + 1)

    def _format_solution_results_for_agent(self, result: dict) -> str:
        """Format solution test results as markdown for solver agent to read."""
        if not result:
            return "# Solution Test Results\n\nNo results available.\n"

        lines = [
            "# Solution Test Results",
            "",
            "## Summary",
            f"- **Overall Success**: {result.get('success', 'N/A')}",
            f"- **Solution Applied**: {result.get('solution_applied', 'N/A')}",
            "",
        ]

        # Solution application error
        if not result.get('solution_applied'):
            lines.extend([
                "## Solution Application Failed",
                f"- **Error**: {result.get('error', 'Unknown error')}",
                "",
                "### Solution Output",
                "```",
                result.get('output', 'No output'),
                "```",
            ])
            return "\n".join(lines)

        # Solution output
        solution_output = result.get('solution_output', '')
        if solution_output:
            lines.extend([
                "## Solution Output",
                "```",
                solution_output[-2000:] if len(solution_output) > 2000 else solution_output,
                "```",
                "",
            ])

        # Test results
        test_result = result.get('test_result', {})
        if test_result:
            lines.append(self._format_test_results_for_agent(test_result, stage='fixed'))

        return "\n".join(lines)

    def _format_check_results_for_checker(self, check_result: dict) -> str:
        """Format CVE check results as markdown for Checker agent to read."""
        lines = [
            "# CVE Check Results",
            "",
            "## Summary",
            f"- **Ready**: {check_result.get('ready', False)}",
            f"- **CVE ID**: {check_result.get('cve_id', 'N/A')}",
            "",
        ]

        checks = check_result.get('checks', {})

        # Files check
        files_check = checks.get('files', {})
        lines.append("## File Check")
        lines.append(f"- **Success**: {files_check.get('success', False)}")
        if files_check.get('missing'):
            lines.append("- **Missing files**:")
            for f in files_check['missing']:
                lines.append(f"  - {f}")
        lines.append("")

        # Vulnerable test
        vuln_check = checks.get('vulnerable_test', {})
        if vuln_check:
            lines.append("## Vulnerable Environment Test")
            lines.append(f"- **Success**: {vuln_check.get('success', False)}")
            if vuln_check.get('skipped'):
                lines.append("- **Skipped**: Yes")
            elif not vuln_check.get('success'):
                details = vuln_check.get('details', {})
                issues = details.get('validation', {}).get('issues', [])
                if issues:
                    lines.append("- **Issues**:")
                    for issue in issues:
                        lines.append(f"  - {issue}")
                raw_output = details.get('raw_output', '')
                if raw_output:
                    lines.extend([
                        "",
                        "### Test Output (last 2000 chars)",
                        "```",
                        raw_output[-2000:] if len(raw_output) > 2000 else raw_output,
                        "```"
                    ])
            lines.append("")

        # Solution check
        solution_check = checks.get('solution', {})
        if solution_check:
            lines.append("## Solution Application")
            lines.append(f"- **Success**: {solution_check.get('success', False)}")
            if not solution_check.get('success') and not solution_check.get('skipped'):
                details = solution_check.get('details', {})
                if details.get('error'):
                    lines.append(f"- **Error**: {details['error']}")
                output = details.get('output', '') or details.get('errors', '')
                if output:
                    lines.extend([
                        "",
                        "### Solution Output",
                        "```",
                        output[-1000:] if len(output) > 1000 else output,
                        "```"
                    ])
            lines.append("")

        # Fixed test
        fixed_check = checks.get('fixed_test', {})
        if fixed_check:
            lines.append("## Fixed Environment Test")
            lines.append(f"- **Success**: {fixed_check.get('success', False)}")
            if not fixed_check.get('success') and not fixed_check.get('skipped'):
                details = fixed_check.get('details', {})
                issues = details.get('validation', {}).get('issues', [])
                if issues:
                    lines.append("- **Issues**:")
                    for issue in issues:
                        lines.append(f"  - {issue}")
                raw_output = details.get('raw_output', '')
                if raw_output:
                    lines.extend([
                        "",
                        "### Test Output (last 2000 chars)",
                        "```",
                        raw_output[-2000:] if len(raw_output) > 2000 else raw_output,
                        "```"
                    ])
            lines.append("")

        return "\n".join(lines)

    async def _run_phase_check(
        self, cve_id: str, working_dir: Path
    ) -> AgentResult:
        """
        Phase 6: CVE Ready Check with Checker agent

        Flow:
        1. Run check_cve_ready.py
        2. If fails → Checker fixes functional issues + reviews format → loop until pass
        3. If passes → Checker reviews format requirements only → run check again to verify

        Checker always runs to ensure both functional correctness and format compliance.

        Args:
            cve_id: CVE identifier
            working_dir: Working directory

        Returns:
            AgentResult with success status and error details
        """
        phase_started_at = datetime.now()
        self._log_phase(cve_id, 6, "CVE Ready Check")

        max_retries = self.config.get('orchestrator', {}).get('max_retries', 3)
        last_error: Optional[str] = None
        check_results_path = working_dir / ".agent_state" / "checker_output" / "check_results.md"
        check_results_path.parent.mkdir(parents=True, exist_ok=True)

        for checker_attempt in range(max_retries + 1):
            # Step 1: Run check_cve_ready.py
            self.logger.info(f"[{cve_id}] Running CVE ready check (attempt {checker_attempt + 1})...")
            check_result = await self.script_executor.run_cve_check(
                working_dir=working_dir,
                cve_id=cve_id
            )

            check_passed = check_result['ready']

            if check_passed:
                self.logger.info(f"[{cve_id}] CVE ready check PASSED")
            else:
                checks = check_result.get('checks', {})
                failed_steps = [k for k, v in checks.items() if not v.get('success') and not v.get('skipped')]
                last_error = f"Check failed at: {', '.join(failed_steps)}"
                self.logger.warning(f"[{cve_id}] {last_error}")

            # Step 2: Prepare message for Checker based on check result
            if check_passed:
                # Check passed - Checker reviews format requirements only
                message = """
check_cve_ready.py has PASSED. Now review the "Other Requirements" section in your instructions:

1. Reproduction must use real source code (not mocked/simplified implementations)
2. Tests must call real running services (not static analysis or isolated function calls)
3. Dockerfile format requirements (no COPY tests/, no hardcoded proxy, etc.)
4. task.yaml must be a realistic user report (no CVE identifiers)
5. Clean up unused files in task-deps/

Review each requirement carefully and fix any issues found. When done, run check_cve_ready.py to verify nothing is broken.
""".strip()
            else:
                # Check failed - Checker fixes functional issues + reviews format
                check_results_path.write_text(self._format_check_results_for_checker(check_result))

                if checker_attempt == 0:
                    message = """
check_cve_ready.py has FAILED. Please:

1. Read .agent_state/checker_output/check_results.md to understand what failed
2. Fix the functional issues to make the check pass
3. Also review "Other Requirements" (real source code, real service tests, Dockerfile format, etc.)

When done, run check_cve_ready.py to verify all checks pass.
""".strip()
                else:
                    message = """
The previous fixes did not resolve all issues. check_cve_ready.py still FAILED.

Please review the updated results in .agent_state/checker_output/check_results.md

Analyze what went wrong, try a different approach, and fix the issues.
Don't forget to also check "Other Requirements" (real source code, real service tests, etc.).

When done, run check_cve_ready.py to verify.
""".strip()

            # Check if we've exhausted retries (only for failed checks)
            if not check_passed and checker_attempt >= max_retries:
                self.logger.error(f"[{cve_id}] Checker retries exhausted ({max_retries})")
                break

            # Step 3: Run Checker agent
            self.logger.info(f"[{cve_id}] Running Checker agent (check_passed={check_passed}, attempt {checker_attempt + 1})...")

            agent_result = await self._run_agent(
                'checker', cve_id, working_dir,
                custom_message=message,
                is_retry=(checker_attempt > 0)
            )

            if not agent_result.success:
                self.logger.error(f"[{cve_id}] Checker agent reported error: {agent_result.error}")
                last_error = agent_result.error
                break

            # Step 4: After Checker completes, run check again to verify
            self.logger.info(f"[{cve_id}] Verifying after Checker completed...")
            final_check = await self.script_executor.run_cve_check(
                working_dir=working_dir,
                cve_id=cve_id
            )

            if final_check['ready']:
                self.logger.info(f"[{cve_id}] Final verification PASSED - CVE check complete!")
                # Clean up check_results.md since check passed
                if check_results_path.exists():
                    check_results_path.unlink()
                self._record_phase(cve_id, 'cve_check', phase_started_at, 'completed', checker_attempt + 1)
                return AgentResult.ok('cve_check', checker_attempt + 1)

            # Final check failed - if original check passed, this means Checker broke something
            if check_passed:
                last_error = "Checker's format fixes broke the functional check"
                self.logger.warning(f"[{cve_id}] {last_error}, retrying...")
            # Continue loop to retry

        # All retries exhausted
        self._record_phase(cve_id, 'cve_check', phase_started_at, 'failed', max_retries + 1)
        return AgentResult.fail(
            last_error or "CVE ready check failed after all retries",
            'cve_check',
            max_retries + 1
        )

    async def _run_phase_cleanup(self, cve_id: str, working_dir: Path) -> bool:
        """
        Phase 7: Cleanup - Orchestrator executes cleanup script

        Records phase manually since there's no agent feedback.

        Args:
            cve_id: CVE identifier
            working_dir: Working directory

        Returns:
            True if successful
        """
        phase_started_at = datetime.now()
        self._log_phase(cve_id, 7, "Cleanup")

        result = await self.script_executor.cleanup_cve_task(
            working_dir=working_dir,
            cve_id=cve_id
        )

        if not result['success']:
            self.logger.warning(f"[{cve_id}] Cleanup had issues: {result.get('error')}")

        # Record phase (cleanup always succeeds even with warnings)
        self._record_phase(cve_id, 'cleanup', phase_started_at, 'completed', 1)
        self.logger.info(f"[{cve_id}] Cleanup completed")
        return True

    def _mark_phase_failed(self, cve_id: str, result: AgentResult) -> bool:
        """Mark current phase as failed with error details and save status. Returns False for easy chaining."""
        self.tasks[cve_id].status = 'failed'
        self.tasks[cve_id].error = result.error or f"Failed at phase: {result.phase_key}"
        self._save_task_status(cve_id)
        return False

    async def _run_phase(
        self,
        phase_key: str,
        cve_id: str,
        working_dir: Path,
        cve_content: Optional[str] = None,
        custom_message: Optional[str] = None,
        is_retry: bool = False
    ) -> bool:
        """
        Run a phase and handle failure marking automatically.

        This is a convenience wrapper that:
        1. Updates task current_phase
        2. Runs _run_agent
        3. Marks failure if needed

        Returns:
            True if phase succeeded, False if failed
        """
        self.tasks[cve_id].current_phase = phase_key
        result = await self._run_agent(phase_key, cve_id, working_dir, cve_content, custom_message, is_retry)
        if not result.success:
            return self._mark_phase_failed(cve_id, result)
        return True

    async def process_cve(self, cve_id: str) -> bool:
        """
        Process a single CVE through the complete pipeline.

        Args:
            cve_id: CVE identifier (e.g., CVE-2024-12345)

        Returns:
            True if successful
        """
        self.logger.info(f"[{cve_id}] Starting CVE processing")

        # Create working directory
        working_dir = self.cve_tasks_dir / cve_id
        working_dir.mkdir(parents=True, exist_ok=True)

        # Load CVE content for analyzer
        cve_file = self.cve_input_dir / f"{cve_id}.md"
        if not cve_file.exists():
            self.logger.error(f"[{cve_id}] CVE file not found: {cve_file}")
            return False
        cve_content = cve_file.read_text()

        # Create task status with working_dir for persistence
        self.tasks[cve_id] = CVETaskStatus(
            cve_id=cve_id,
            status='in_progress',
            current_phase='initialization',
            started_at=datetime.now(),
            working_dir=working_dir
        )

        try:
            # Phase 1-3: Analyzer, Generator, Builder
            for phase_key in ['analyzer', 'generator', 'builder']:
                content = cve_content if phase_key == 'analyzer' else None
                if not await self._run_phase(phase_key, cve_id, working_dir, content):
                    return False

            # Phase 4: Vulnerable environment verification (Docker + Validator loop)
            self.tasks[cve_id].current_phase = 'vulnerable_verification'
            result = await self._run_phase_vulnerable_verification(cve_id, working_dir)
            if not result.success:
                return self._mark_phase_failed(cve_id, result)

            # Phase 5: Solution verification (Solution + Solver loop)
            self.tasks[cve_id].current_phase = 'solution_verification'
            result = await self._run_phase_solution_verification(cve_id, working_dir)
            if not result.success:
                return self._mark_phase_failed(cve_id, result)

            # Phase 6: CVE Ready Check (check_cve_ready + Checker agent fix loop)
            self.tasks[cve_id].current_phase = 'cve_check'
            result = await self._run_phase_check(cve_id, working_dir)
            if not result.success:
                return self._mark_phase_failed(cve_id, result)

            # Phase 7: Cleanup
            self.tasks[cve_id].current_phase = 'cleanup'
            await self._run_phase_cleanup(cve_id, working_dir)

            # Success
            self.tasks[cve_id].status = 'completed'
            self.tasks[cve_id].completed_at = datetime.now()
            self._save_task_status(cve_id)
            self.logger.info(f"[{cve_id}] Successfully completed all phases")
            return True

        except Exception as e:
            self.tasks[cve_id].status = 'failed'
            self.tasks[cve_id].error = str(e)
            self._save_task_status(cve_id)
            self.logger.error(f"[{cve_id}] Exception: {e}", exc_info=True)
            return False

        finally:
            # Always close all sessions for this CVE
            # Catch ALL exceptions (including CancelledError) to prevent overriding the return value
            try:
                await self._close_all_sessions_for_cve(cve_id)
            except BaseException as e:
                self.logger.debug(f"[{cve_id}] Error closing sessions: {type(e).__name__}")

    async def process_multiple_cves(self, cve_ids: List[str]) -> Dict[str, bool]:
        """
        Process multiple CVEs in parallel using sliding window (respecting max_concurrent_cves).

        Uses semaphore-based sliding window instead of batch processing:
        - As soon as one CVE completes, the next CVE starts immediately
        - More efficient than waiting for entire batch to complete

        Args:
            cve_ids: List of CVE identifiers

        Returns:
            Dictionary mapping CVE IDs to success status
        """
        max_concurrent = self.config['orchestrator']['max_concurrent_cves']
        self.logger.info(f"Processing {len(cve_ids)} CVEs (max concurrent: {max_concurrent}, sliding window)")

        # Semaphore for CVE-level concurrency control
        cve_semaphore = asyncio.Semaphore(max_concurrent)
        results: Dict[str, bool] = {}

        async def process_with_semaphore(cve_id: str) -> tuple:
            """Process a single CVE with semaphore control."""
            async with cve_semaphore:
                self.logger.debug(f"[{cve_id}] Acquired CVE semaphore (available: {cve_semaphore._value})")
                try:
                    result = await self.process_cve(cve_id)
                    return cve_id, result
                except Exception as e:
                    self.logger.error(f"[{cve_id}] Exception: {e}")
                    return cve_id, False

        # Launch all CVEs concurrently - semaphore controls actual parallelism
        tasks = [process_with_semaphore(cve_id) for cve_id in cve_ids]
        completed = await asyncio.gather(*tasks, return_exceptions=True)

        # Collect results
        for item in completed:
            if isinstance(item, BaseException):
                # CancelledError is BaseException, not Exception in Python 3.8+
                self.logger.error(f"Unexpected gather exception: {type(item).__name__}: {item}")
            elif isinstance(item, tuple) and len(item) == 2:
                cve_id, result = item
                results[cve_id] = result
            else:
                self.logger.error(f"Unexpected gather result type: {type(item)}")

        return results

    def get_task_status(self, cve_id: str) -> Optional[CVETaskStatus]:
        """Get status of a CVE task"""
        return self.tasks.get(cve_id)

    def get_all_tasks(self) -> Dict[str, CVETaskStatus]:
        """Get all task statuses"""
        return self.tasks.copy()

    # ========== Two-Phase Execution Methods ==========

    async def run_phase_check(self, cve_ids: List[str]) -> Dict[str, bool]:
        """
        Run Check Phase only (check_cve_ready.py + Checker agent fix loop).

        This assumes Phase 1 (Analyzer + Generator) and Phase 2 (Builder + Validator + Solver)
        have already completed. It only runs the CVE ready check with Checker agent loop.

        Args:
            cve_ids: List of CVE identifiers

        Returns:
            Dictionary mapping CVE IDs to success status
        """
        self.logger.info(f"=== Check Phase: Running CVE ready check for {len(cve_ids)} CVEs ===")

        # Semaphore for CVE-level concurrency control (sliding window)
        max_concurrent = self.config['orchestrator']['max_concurrent_cves']
        cve_semaphore = asyncio.Semaphore(max_concurrent)
        results = {}

        async def run_check_for_cve(cve_id: str) -> tuple:
            """Run check phase for a single CVE with semaphore control."""
            async with cve_semaphore:
                self.logger.debug(f"[{cve_id}] Acquired CVE semaphore for check phase")
                working_dir = self.cve_tasks_dir / cve_id

                # Check if working directory exists
                if not working_dir.exists():
                    self.logger.error(f"[{cve_id}] Working directory not found: {working_dir}")
                    return cve_id, False

                # Restore FileStateManager from .logs/ if exists
                file_states_path = working_dir / ".logs" / "file_states.json"
                if file_states_path.exists():
                    self.file_state_managers[cve_id] = FileStateManager.load_from_logs(working_dir)
                    self.logger.debug(f"[{cve_id}] Restored FileStateManager from .logs/")
                else:
                    self.file_state_managers[cve_id] = FileStateManager(working_dir)

                # Initialize or restore task status
                existing_status = CVETaskStatus.load_from_logs(working_dir)
                if existing_status is not None:
                    self.tasks[cve_id] = existing_status
                    self.tasks[cve_id].status = 'in_progress'
                    self.tasks[cve_id].working_dir = working_dir
                else:
                    self.tasks[cve_id] = CVETaskStatus(
                        cve_id=cve_id,
                        status='in_progress',
                        current_phase='cve_check',
                        started_at=datetime.now(),
                        working_dir=working_dir
                    )

                try:
                    # Run CVE Ready Check (check_cve_ready + Checker agent fix loop)
                    self.tasks[cve_id].current_phase = 'cve_check'
                    result = await self._run_phase_check(cve_id, working_dir)

                    if result.success:
                        self.tasks[cve_id].status = 'completed'
                        self.tasks[cve_id].completed_at = datetime.now()
                        self._save_task_status(cve_id)
                        return cve_id, True
                    else:
                        self._mark_phase_failed(cve_id, result)
                        return cve_id, False

                except Exception as e:
                    self.tasks[cve_id].status = 'failed'
                    self.tasks[cve_id].error = str(e)
                    self._save_task_status(cve_id)
                    self.logger.error(f"[{cve_id}] Exception: {e}", exc_info=True)
                    return cve_id, False

                finally:
                    try:
                        await self._close_all_sessions_for_cve(cve_id)
                    except BaseException as e:
                        self.logger.debug(f"[{cve_id}] Error closing sessions: {type(e).__name__}")

        # Launch all CVEs - semaphore controls actual parallelism (sliding window)
        tasks = [run_check_for_cve(cve_id) for cve_id in cve_ids]
        completed = await asyncio.gather(*tasks, return_exceptions=True)

        for item in completed:
            if isinstance(item, BaseException):
                self.logger.error(f"Unexpected gather exception: {type(item).__name__}: {item}")
            elif isinstance(item, tuple) and len(item) == 2:
                cve_id, result = item
                results[cve_id] = result
            else:
                self.logger.error(f"Unexpected gather result type: {type(item)}")

        # Summary
        success_count = sum(1 for v in results.values() if v)
        self.logger.info(f"=== Check Phase Complete: {success_count}/{len(cve_ids)} succeeded ===")

        return results

    async def run_phase1_analysis(self, cve_ids: List[str]) -> Dict[str, bool]:
        """
        Phase 1: Run Analyzer and Generator for all CVEs.

        This phase can be run separately, and results are persisted to .logs/
        for later resumption with run_phase2_remaining().

        Phase 1 includes:
        - Analyzer: Information gathering and analysis
        - Generator: Task and test creation

        Args:
            cve_ids: List of CVE identifiers

        Returns:
            Dictionary mapping CVE IDs to success status
        """
        self.logger.info(f"=== Phase 1: Running Analyzer + Generator for {len(cve_ids)} CVEs ===")

        results = {}

        # Prepare working directories and load CVE content
        working_dirs = {}
        cve_contents = {}
        for cve_id in cve_ids:
            working_dir = self.cve_tasks_dir / cve_id
            working_dir.mkdir(parents=True, exist_ok=True)
            working_dirs[cve_id] = working_dir

            # Load CVE content
            cve_file = self.cve_input_dir / f"{cve_id}.md"
            if cve_file.exists():
                cve_contents[cve_id] = cve_file.read_text()
            else:
                self.logger.error(f"[{cve_id}] CVE file not found: {cve_file}")
                cve_contents[cve_id] = None

            # Initialize task status with working_dir for persistence
            self.tasks[cve_id] = CVETaskStatus(
                cve_id=cve_id,
                status='in_progress',
                current_phase='analyzer',
                started_at=datetime.now(),
                working_dir=working_dir
            )

        # Semaphore for CVE-level concurrency control (sliding window)
        max_concurrent = self.config['orchestrator']['max_concurrent_cves']
        cve_semaphore = asyncio.Semaphore(max_concurrent)

        async def run_phase1_for_cve(cve_id: str) -> tuple:
            """Run phase 1 for a single CVE with semaphore control."""
            async with cve_semaphore:
                self.logger.debug(f"[{cve_id}] Acquired CVE semaphore for phase 1")
                working_dir = working_dirs[cve_id]
                cve_content = cve_contents.get(cve_id)

                if cve_content is None:
                    self.tasks[cve_id].status = 'failed'
                    self.tasks[cve_id].error = "CVE file not found"
                    self._save_task_status(cve_id)
                    return cve_id, False

                try:
                    # Phase 1: Analyzer + Generator
                    for phase_key in ['analyzer', 'generator']:
                        content = cve_content if phase_key == 'analyzer' else None
                        if not await self._run_phase(phase_key, cve_id, working_dir, content):
                            return cve_id, False

                    # All phase 1 phases completed successfully
                    # Save FileStateManager to .logs/ for phase 2
                    file_state_manager = self._get_file_state_manager(cve_id, working_dir)
                    file_state_manager.save_to_logs()
                    self._save_task_status(cve_id)
                    self.logger.info(f"[{cve_id}] Phase 1 (Analyzer + Generator) completed, state saved to .logs/")
                    return cve_id, True

                except Exception as e:
                    self.logger.error(f"[{cve_id}] Phase 1 exception: {e}")
                    self.tasks[cve_id].status = 'failed'
                    self.tasks[cve_id].error = str(e)
                    self._save_task_status(cve_id)
                    return cve_id, False

                finally:
                    # Close sessions for this CVE after phase 1
                    # Catch ALL exceptions (including CancelledError) to prevent overriding the return value
                    try:
                        await self._close_all_sessions_for_cve(cve_id)
                    except BaseException as e:
                        self.logger.debug(f"[{cve_id}] Error closing sessions: {type(e).__name__}")

        # Launch all CVEs - semaphore controls actual parallelism (sliding window)
        tasks = [run_phase1_for_cve(cve_id) for cve_id in cve_ids]
        completed = await asyncio.gather(*tasks, return_exceptions=True)

        for item in completed:
            if isinstance(item, BaseException):
                # CancelledError is BaseException, not Exception in Python 3.8+
                self.logger.error(f"Unexpected gather exception: {type(item).__name__}: {item}")
            elif isinstance(item, tuple) and len(item) == 2:
                cve_id, result = item
                results[cve_id] = result
            else:
                self.logger.error(f"Unexpected gather result type: {type(item)}")

        # Summary
        success_count = sum(1 for v in results.values() if v)
        self.logger.info(f"=== Phase 1 Complete: {success_count}/{len(cve_ids)} succeeded ===")

        return results

    async def run_phase2_remaining(self, cve_ids: List[str]) -> Dict[str, bool]:
        """
        Phase 2: Run remaining phases (Builder → Checker) for CVEs.

        This assumes Phase 1 (Analyzer + Generator) has already completed.
        FileStateManager is restored from .logs/file_states.json.

        Phase 2 includes:
        - Builder: Docker environment construction
        - Vulnerable Verification: Docker + Validator loop
        - Solution Verification: Solution + Solver loop
        - Checker: Final validation
        - Cleanup: Remove temporary files

        Args:
            cve_ids: List of CVE identifiers that passed Phase 1

        Returns:
            Dictionary mapping CVE IDs to success status
        """
        self.logger.info(f"=== Phase 2: Running remaining phases for {len(cve_ids)} CVEs ===")

        # Semaphore for CVE-level concurrency control (sliding window)
        max_concurrent = self.config['orchestrator']['max_concurrent_cves']
        cve_semaphore = asyncio.Semaphore(max_concurrent)
        results = {}

        async def run_remaining_for_cve(cve_id: str) -> tuple:
            """Run phase 2 for a single CVE with semaphore control."""
            async with cve_semaphore:
                self.logger.debug(f"[{cve_id}] Acquired CVE semaphore for phase 2")
                working_dir = self.cve_tasks_dir / cve_id

                # Check if phase 1 outputs exist (analyzer + generator)
                if not (working_dir / ".agent_state" / "analyzer_output").exists():
                    self.logger.error(f"[{cve_id}] Analyzer output not found, skipping")
                    return cve_id, False

                # Check generator outputs
                required_generator_files = ['task.yaml', 'tests/test_func.py', 'tests/test_vuln.py', 'solution.sh', 'tests/run-tests.sh']
                missing_files = [f for f in required_generator_files if not (working_dir / f).exists()]
                if missing_files:
                    self.logger.error(f"[{cve_id}] Generator output missing: {missing_files}, skipping")
                    return cve_id, False

                # Restore FileStateManager from .logs/
                self.file_state_managers[cve_id] = FileStateManager.load_from_logs(working_dir)
                self.logger.debug(f"[{cve_id}] Restored FileStateManager from .logs/")

                # Try to load existing task status from .logs/ or create new one
                existing_status = CVETaskStatus.load_from_logs(working_dir)
                if existing_status is not None:
                    self.tasks[cve_id] = existing_status
                    self.tasks[cve_id].status = 'in_progress'
                    self.tasks[cve_id].working_dir = working_dir
                    self.logger.debug(f"[{cve_id}] Restored task status from .logs/")
                elif cve_id not in self.tasks:
                    self.tasks[cve_id] = CVETaskStatus(
                        cve_id=cve_id,
                        status='in_progress',
                        current_phase='builder',
                        started_at=datetime.now(),
                        working_dir=working_dir
                    )
                else:
                    self.tasks[cve_id].status = 'in_progress'
                    self.tasks[cve_id].working_dir = working_dir

                try:
                    # Phase: Builder
                    if not await self._run_phase('builder', cve_id, working_dir):
                        return cve_id, False

                    # Save FileStateManager after builder
                    file_state_manager = self._get_file_state_manager(cve_id, working_dir)
                    file_state_manager.save_to_logs()

                    # Phase: Vulnerable environment verification (Docker + Validator loop)
                    self.tasks[cve_id].current_phase = 'vulnerable_verification'
                    result = await self._run_phase_vulnerable_verification(cve_id, working_dir)
                    if not result.success:
                        return cve_id, self._mark_phase_failed(cve_id, result)

                    file_state_manager.save_to_logs()

                    # Phase: Solution verification (Solution + Solver loop)
                    self.tasks[cve_id].current_phase = 'solution_verification'
                    result = await self._run_phase_solution_verification(cve_id, working_dir)
                    if not result.success:
                        return cve_id, self._mark_phase_failed(cve_id, result)

                    file_state_manager.save_to_logs()

                    # Phase: CVE Ready Check (check_cve_ready + Checker agent fix loop)
                    self.tasks[cve_id].current_phase = 'cve_check'
                    result = await self._run_phase_check(cve_id, working_dir)
                    if not result.success:
                        return cve_id, self._mark_phase_failed(cve_id, result)

                    file_state_manager.save_to_logs()

                    # Phase: Cleanup
                    self.tasks[cve_id].current_phase = 'cleanup'
                    await self._run_phase_cleanup(cve_id, working_dir)

                    # Success
                    self.tasks[cve_id].status = 'completed'
                    self.tasks[cve_id].completed_at = datetime.now()
                    self._save_task_status(cve_id)
                    return cve_id, True

                except Exception as e:
                    self.tasks[cve_id].status = 'failed'
                    self.tasks[cve_id].error = str(e)
                    self._save_task_status(cve_id)
                    self.logger.error(f"[{cve_id}] Exception: {e}", exc_info=True)
                    return cve_id, False

                finally:
                    # Catch ALL exceptions (including CancelledError) to prevent overriding the return value
                    try:
                        await self._close_all_sessions_for_cve(cve_id)
                    except BaseException as e:
                        self.logger.debug(f"[{cve_id}] Error closing sessions: {type(e).__name__}")

        # Launch all CVEs - semaphore controls actual parallelism (sliding window)
        tasks = [run_remaining_for_cve(cve_id) for cve_id in cve_ids]
        completed = await asyncio.gather(*tasks, return_exceptions=True)

        for item in completed:
            if isinstance(item, BaseException):
                # CancelledError is BaseException, not Exception in Python 3.8+
                self.logger.error(f"Unexpected gather exception: {type(item).__name__}: {item}")
            elif isinstance(item, tuple) and len(item) == 2:
                cve_id, result = item
                results[cve_id] = result
            else:
                self.logger.error(f"Unexpected gather result type: {type(item)}")

        # Summary
        success_count = sum(1 for v in results.values() if v)
        self.logger.info(f"=== Phase 2 Complete: {success_count}/{len(cve_ids)} succeeded ===")

        return results


async def main():
    """Example usage"""
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    )

    orchestrator = AsyncOrchestrator("config.yaml")

    # Start background cleanup task
    await orchestrator.start_background_cleanup()

    try:
        # Process single CVE
        success = await orchestrator.process_cve("CVE-2024-EXAMPLE")

        logging.info(f"Result: {'Success' if success else 'Failed'}")
        logging.info(f"Status: {orchestrator.get_task_status('CVE-2024-EXAMPLE')}")
    finally:
        # Always stop background cleanup
        await orchestrator.stop_background_cleanup()


if __name__ == "__main__":
    asyncio.run(main())
