#!/usr/bin/env python3
"""
Clean Phase 2 files for specified CVE(s).

Removes files generated by Phase 2 (Builder, Validator, Solver, Checker) while
preserving Phase 1 output (Analyzer, Generator).

Usage:
    python scripts/clean_phase2.py CVE-2025-12345
    python scripts/clean_phase2.py CVE-2025-12345 CVE-2025-12346
    python scripts/clean_phase2.py --all
    python scripts/clean_phase2.py --file cve_list.txt
"""

import argparse
import shutil
import subprocess
from pathlib import Path


# Phase 2 generated files/directories to remove
PHASE2_FILES = [
    'Dockerfile',
    'docker-compose.yaml',
    'final_report.md',
]

PHASE2_AGENT_OUTPUTS = [
    '.agent_state/builder_output',
    '.agent_state/validator_output',
    '.agent_state/solver_output',
    '.agent_state/checker_output',
]

PHASE2_RESULT_FILES = [
    '.agent_state/builder-res.xml',
    '.agent_state/validator-res.xml',
    '.agent_state/solver-res.xml',
    '.agent_state/checker-res.xml',
]

PHASE2_LOG_FILES = [
    '.logs/builder_conversation.md',
    '.logs/validator_conversation.md',
    '.logs/solver_conversation.md',
    '.logs/checker_conversation.md',
    '.logs/builder_conversation.json',
    '.logs/validator_conversation.json',
    '.logs/solver_conversation.json',
    '.logs/checker_conversation.json',
]


def get_cve_tasks_dir() -> Path:
    """Get the cve_tasks directory from config or default."""
    script_dir = Path(__file__).parent.parent
    config_file = script_dir / 'config.yaml'

    if config_file.exists():
        import yaml
        with open(config_file) as f:
            config = yaml.safe_load(f)
        cve_tasks_dir = config.get('paths', {}).get('cve_tasks_dir', './cve_tasks')
    else:
        cve_tasks_dir = './cve_tasks'

    return (script_dir / cve_tasks_dir).resolve()


def clean_docker_resources(cve_id: str):
    """Remove Docker containers and images for a CVE."""
    container_name = cve_id.lower().replace('-', '_')

    # Stop and remove containers
    subprocess.run(
        ['docker', 'compose', 'down', '-v', '--remove-orphans'],
        capture_output=True,
        cwd=get_cve_tasks_dir() / cve_id,
    )

    # Remove images matching the CVE pattern
    result = subprocess.run(
        ['docker', 'images', '-q', f'*{container_name}*'],
        capture_output=True,
        text=True
    )
    if result.stdout.strip():
        image_ids = result.stdout.strip().split('\n')
        for img_id in image_ids:
            subprocess.run(['docker', 'rmi', '-f', img_id], capture_output=True)


def clean_phase2(cve_id: str, cve_tasks_dir: Path, clean_docker: bool = True) -> bool:
    """
    Clean Phase 2 files for a single CVE.

    Returns True if successful, False if CVE directory not found.
    """
    cve_dir = cve_tasks_dir / cve_id

    if not cve_dir.exists():
        print(f"  [SKIP] {cve_id}: directory not found")
        return False

    removed_count = 0

    # Remove Phase 2 files
    for filename in PHASE2_FILES:
        file_path = cve_dir / filename
        if file_path.exists():
            file_path.unlink()
            removed_count += 1

    # Remove Phase 2 agent output directories
    for dirname in PHASE2_AGENT_OUTPUTS:
        dir_path = cve_dir / dirname
        if dir_path.exists():
            shutil.rmtree(dir_path)
            removed_count += 1

    # Remove Phase 2 result files
    for filename in PHASE2_RESULT_FILES:
        file_path = cve_dir / filename
        if file_path.exists():
            file_path.unlink()
            removed_count += 1

    # Remove Phase 2 log files
    for filename in PHASE2_LOG_FILES:
        file_path = cve_dir / filename
        if file_path.exists():
            file_path.unlink()
            removed_count += 1

    # Clean Docker resources
    if clean_docker:
        try:
            clean_docker_resources(cve_id)
        except Exception as e:
            print(f"  [WARN] {cve_id}: Docker cleanup failed: {e}")

    print(f"  [OK] {cve_id}: removed {removed_count} items")
    return True


def main():
    parser = argparse.ArgumentParser(
        description='Clean Phase 2 files for specified CVE(s)',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python scripts/clean_phase2.py CVE-2025-12345
  python scripts/clean_phase2.py CVE-2025-12345 CVE-2025-12346
  python scripts/clean_phase2.py --all
  python scripts/clean_phase2.py --file cve_list.txt
  python scripts/clean_phase2.py CVE-2025-12345 --no-docker
"""
    )

    parser.add_argument(
        'cves',
        nargs='*',
        help='CVE ID(s) to clean'
    )

    parser.add_argument(
        '--all',
        action='store_true',
        help='Clean all CVEs in cve_tasks directory'
    )

    parser.add_argument(
        '--file', '-f',
        type=Path,
        help='File containing CVE IDs (one per line)'
    )

    parser.add_argument(
        '--no-docker',
        action='store_true',
        help='Skip Docker cleanup (faster but leaves containers/images)'
    )

    args = parser.parse_args()

    cve_tasks_dir = get_cve_tasks_dir()

    # Collect CVE IDs
    cve_ids = []

    if args.all:
        cve_ids = [d.name for d in cve_tasks_dir.iterdir()
                   if d.is_dir() and d.name.startswith('CVE-')]
    elif args.file:
        if not args.file.exists():
            print(f"Error: File not found: {args.file}")
            return 1
        cve_ids = [line.strip() for line in args.file.read_text().splitlines()
                   if line.strip() and line.strip().startswith('CVE-')]
    elif args.cves:
        cve_ids = args.cves
    else:
        parser.print_help()
        return 1

    if not cve_ids:
        print("No CVEs to clean")
        return 0

    print(f"Cleaning Phase 2 files for {len(cve_ids)} CVE(s)...")
    print(f"CVE tasks dir: {cve_tasks_dir}")
    print()

    success_count = 0
    for cve_id in sorted(cve_ids):
        if clean_phase2(cve_id, cve_tasks_dir, clean_docker=not args.no_docker):
            success_count += 1

    print()
    print(f"Done. Cleaned {success_count}/{len(cve_ids)} CVEs.")

    return 0 if success_count == len(cve_ids) else 1


if __name__ == '__main__':
    exit(main())
