#!/usr/bin/env python3
"""
Experiment Runner - Meta script for running batch experiments.

Reads config from experiment_configs/ folder and runs multiple experiments
with different models, collecting results for analysis.

Usage:
    python run_experiments.py                              # Use default config
    python run_experiments.py --config experiment_configs/my_sweep.json  # Use custom config
    python run_experiments.py --dry-run                    # Show what would run
    python run_experiments.py --list                       # List available experiments/models
    python run_experiments.py --no-archive                 # Don't archive sessions after sweep
    python run_experiments.py --clean                      # Clean agent_records after archiving
"""

import os
import sys
import json
import time
import shutil
import subprocess
import argparse
import requests
from pathlib import Path
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from dataclasses import dataclass, field, asdict
from typing import List, Dict, Any, Optional, Set, Tuple
import threading

# Add project root to path
sys.path.insert(0, str(Path(__file__).parent))

try:
    from dotenv import load_dotenv
    load_dotenv()
except ImportError:
    pass  # dotenv not required


@dataclass
class RunResult:
    """Result of a single experiment run."""
    experiment: str
    model: str
    run_number: int
    session_id: Optional[str] = None
    success: bool = False
    victory: bool = False
    survival_rate: Optional[float] = None
    error: Optional[str] = None
    duration_seconds: float = 0
    start_time: str = ""
    end_time: str = ""
    log_file: Optional[str] = None


@dataclass
class ExperimentSummary:
    """Summary of all runs for an experiment-model combination."""
    experiment: str
    model: str
    total_runs: int = 0
    successful_runs: int = 0
    victories: int = 0
    avg_survival_rate: float = 0
    min_survival_rate: float = 0
    max_survival_rate: float = 0
    avg_duration: float = 0
    results: List[RunResult] = field(default_factory=list)


class ExperimentRunner:
    """Runs batch experiments based on configuration."""

    def __init__(
        self,
        config_path: str = "experiment_configs/default.json",
        use_docker: bool = False,
        container_name: str = "silent-canyon",
        archive_sessions: bool = True,
        clean_after_archive: bool = False,
        clear_sessions: bool = True
    ):
        self.config_path = Path(config_path)
        self.config = self._load_config()
        self.results: List[RunResult] = []
        self.base_url = os.environ.get("SILENT_CANYON_URL", "http://localhost:8000")
        self.results_dir = Path(self.config["output"]["results_dir"])
        self.results_dir.mkdir(parents=True, exist_ok=True)
        self._lock = threading.Lock()
        self.use_docker = use_docker
        self.container_name = container_name
        self.archive_sessions = archive_sessions
        self.clean_after_archive = clean_after_archive
        self.clear_sessions = clear_sessions

        # Sweep folder will be created at run time
        self.sweep_dir: Optional[Path] = None
        self.sweep_timestamp: Optional[str] = None

        # Track session IDs created during this sweep
        self.sweep_session_ids: Set[str] = set()

        # Agent records location (Docker volume mount or local)
        self.agent_records_dir = Path("agent_records/sessions")

        # Resume support (skip completed runs, delete and re-run failed runs)
        self.resume_sweep_dir: Optional[Path] = None
        self.completed_runs: Set[Tuple[str, str, int]] = set()  # (experiment, model, run_number)

        # Auto-load completed experiments from agent_records
        self._load_completed_from_sessions()

    def _load_config(self) -> Dict[str, Any]:
        """Load experiment configuration."""
        if not self.config_path.exists():
            raise FileNotFoundError(f"Config file not found: {self.config_path}")
        with open(self.config_path) as f:
            return json.load(f)

    def _load_completed_from_sessions(self):
        """Load completed experiments from agent_records/sessions directory.

        Also removes incomplete sessions via API to avoid duplicates on re-run.
        """
        if not self.agent_records_dir.exists():
            return

        completed_count = 0
        incomplete_sessions = []  # (session_id, file_path)

        for session_file in self.agent_records_dir.glob("*.json"):
            try:
                with open(session_file) as f:
                    data = json.load(f)

                session_id = data.get("session_id", session_file.stem)

                # Check if session is completed (has final_result)
                if data.get("game_over") and data.get("final_result"):
                    model = data.get("model_name", "")
                    experiment = data.get("experiment_name", "")
                    if model and experiment:
                        # Add as completed (run_number=1 since we only do 1 run per combo)
                        self.completed_runs.add((experiment, model, 1))
                        completed_count += 1
                else:
                    # Mark incomplete session for removal
                    incomplete_sessions.append(session_id)
            except Exception:
                pass  # Skip invalid files

        # Remove incomplete sessions via API
        removed_count = 0
        for session_id in incomplete_sessions:
            try:
                response = requests.delete(
                    f"{self.base_url}/api/admin/sessions/{session_id}",
                    timeout=10
                )
                if response.status_code == 200:
                    removed_count += 1
            except Exception:
                pass

        if completed_count > 0 or removed_count > 0:
            print(f"[AUTO] Loaded {completed_count} completed, removed {removed_count} incomplete sessions via API")

    def setup_resume(self, sweep_dir: str) -> bool:
        """
        Set up resume from a previous sweep.

        Args:
            sweep_dir: Path to the previous sweep folder

        Returns:
            True if resume setup successful, False otherwise
        """
        sweep_path = Path(sweep_dir)
        if not sweep_path.exists():
            print(f"[ERROR] Sweep folder not found: {sweep_path}")
            return False

        results_file = sweep_path / "results.json"
        if not results_file.exists():
            print(f"[ERROR] Results file not found: {results_file}")
            return False

        try:
            with open(results_file) as f:
                data = json.load(f)

            # Load completed runs; delete failed sessions and re-run them
            failed_sessions = []
            for result in data.get("results", []):
                key = (result["experiment"], result["model"], result["run_number"])
                if result.get("success", False):
                    self.completed_runs.add(key)
                elif result.get("session_id"):
                    # Failed run - delete the session and re-run
                    failed_sessions.append(result["session_id"])

            # Delete failed sessions from backend
            deleted_count = 0
            for session_id in failed_sessions:
                if self._delete_session(session_id):
                    deleted_count += 1

            # Load session IDs (only keep successful ones)
            for sid in data.get("session_ids", []):
                if sid not in failed_sessions:
                    self.sweep_session_ids.add(sid)

            # Use the existing sweep folder
            self.resume_sweep_dir = sweep_path
            self.sweep_dir = sweep_path

            # Extract timestamp from folder name
            folder_name = sweep_path.name
            parts = folder_name.rsplit("_", 2)
            if len(parts) >= 2:
                self.sweep_timestamp = f"{parts[-2]}_{parts[-1]}"

            print(f"[RESUME] Loaded {len(self.completed_runs)} completed, deleted {deleted_count} failed sessions")
            print(f"[RESUME] Will skip completed runs, re-run failed runs from scratch")

            return True
        except Exception as e:
            print(f"[ERROR] Failed to load resume data: {e}")
            return False

    def _is_run_completed(self, experiment: str, model: str, run_number: int) -> bool:
        """Check if a run has already been completed."""
        return (experiment, model, run_number) in self.completed_runs

    def _create_sweep_folder(self) -> Path:
        """Create a timestamped folder for this sweep."""
        self.sweep_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        sweep_name = self.config.get("sweep_name", "default")
        self.sweep_dir = self.results_dir / f"sweep_{sweep_name}_{self.sweep_timestamp}"
        self.sweep_dir.mkdir(parents=True, exist_ok=True)

        # Create subdirectories
        (self.sweep_dir / "logs").mkdir(exist_ok=True)
        (self.sweep_dir / "sessions").mkdir(exist_ok=True)

        print(f"[SWEEP] Created sweep folder: {self.sweep_dir}")
        return self.sweep_dir

    def _backup_and_clear_sessions(self) -> int:
        """
        Backup existing sessions and clear them before starting sweep.

        This ensures a clean state for each sweep run.
        Sessions are backed up to backups/sessions_<timestamp>/
        """
        if not self.clear_sessions:
            return 0

        # Create backup directory in tmp/backup_sessions
        backup_dir = Path("tmp/backup_sessions") / datetime.now().strftime('%Y%m%d_%H%M%S')
        backup_dir.mkdir(parents=True, exist_ok=True)

        # Backup local session files
        backed_up = 0
        if self.agent_records_dir.exists():
            session_files = list(self.agent_records_dir.glob("*.json"))
            if session_files:
                print(f"[BACKUP] Found {len(session_files)} existing sessions")
                for src in session_files:
                    try:
                        dest = backup_dir / src.name
                        shutil.copy2(src, dest)
                        src.unlink()  # Remove after backup
                        backed_up += 1
                    except Exception as e:
                        print(f"  [WARN] Failed to backup {src.name}: {e}")

                if backed_up > 0:
                    print(f"[BACKUP] Backed up {backed_up} sessions to {backup_dir}")

        # Clear sessions from backend
        try:
            response = requests.delete(
                f"{self.base_url}/api/admin/sessions",
                timeout=10
            )
            if response.status_code == 200:
                print(f"[CLEAR] Cleared all sessions from backend")
            else:
                print(f"[WARN] Failed to clear backend sessions: {response.status_code}")
        except Exception as e:
            print(f"[WARN] Failed to clear backend sessions: {e}")

        return backed_up

    def _archive_single_session(self, session_id: str) -> bool:
        """
        Archive a single session file immediately after run completion.

        This ensures sessions are saved even if the sweep is interrupted.
        """
        if not self.sweep_dir or not self.archive_sessions:
            return False

        sessions_dest = self.sweep_dir / "sessions"

        # Try local path first
        src_file = self.agent_records_dir / f"{session_id}.json"
        if not src_file.exists():
            # Try Docker volume path
            src_file = Path(f"agent_records/sessions/{session_id}.json")

        if src_file.exists():
            dest_file = sessions_dest / f"{session_id}.json"
            try:
                shutil.copy2(src_file, dest_file)
                return True
            except Exception as e:
                print(f"  [WARN] Failed to archive {session_id}: {e}")

        return False

    def _archive_sessions(self) -> int:
        """Archive session files from this sweep to the sweep folder."""
        if not self.sweep_dir or not self.archive_sessions:
            return 0

        archived_count = 0
        sessions_dest = self.sweep_dir / "sessions"

        # Copy session files for sessions created during this sweep (skip already archived)
        for session_id in self.sweep_session_ids:
            dest_file = sessions_dest / f"{session_id}.json"

            # Skip if already archived (by _archive_single_session)
            if dest_file.exists():
                archived_count += 1
                continue

            # Try local path first
            src_file = self.agent_records_dir / f"{session_id}.json"
            if not src_file.exists():
                # Try Docker volume path
                src_file = Path(f"agent_records/sessions/{session_id}.json")

            if src_file.exists():
                try:
                    shutil.copy2(src_file, dest_file)
                    archived_count += 1
                except Exception as e:
                    print(f"  [WARN] Failed to archive {session_id}: {e}")

        print(f"[ARCHIVE] Archived {archived_count}/{len(self.sweep_session_ids)} session files to {sessions_dest}")
        return archived_count

    def _clean_agent_records(self) -> int:
        """Remove archived session files from agent_records."""
        if not self.clean_after_archive:
            return 0

        cleaned_count = 0
        for session_id in self.sweep_session_ids:
            src_file = self.agent_records_dir / f"{session_id}.json"
            if src_file.exists():
                try:
                    src_file.unlink()
                    cleaned_count += 1
                except Exception as e:
                    print(f"  [WARN] Failed to delete {session_id}: {e}")

        print(f"[CLEAN] Removed {cleaned_count} session files from agent_records")
        return cleaned_count

    def _delete_sessions_from_backend(self) -> int:
        """Delete archived sessions from the backend to clean up frontend."""
        if not self.clean_after_archive:
            return 0

        deleted_count = 0
        for session_id in self.sweep_session_ids:
            try:
                url = f"{self.base_url}/api/admin/sessions/{session_id}"
                response = requests.delete(url, timeout=10)
                if response.status_code == 200:
                    deleted_count += 1
            except Exception:
                pass

        print(f"[CLEAN] Deleted {deleted_count} sessions from backend")
        return deleted_count

    def _get_enabled_experiments(self) -> List[Dict]:
        """Get list of enabled experiments."""
        return [e for e in self.config["experiments"] if e.get("enabled", True)]

    def _get_enabled_models(self) -> List[Dict]:
        """Get list of enabled models from all providers."""
        models = []
        models_config = self.config.get("models", {})

        # Handle both flat list and nested dict by provider
        if isinstance(models_config, list):
            # Old format: flat list
            models = [m for m in models_config if m.get("enabled", True)]
        else:
            # New format: dict grouped by provider
            for provider, provider_models in models_config.items():
                if provider.startswith("_"):
                    continue  # Skip comments
                if isinstance(provider_models, list):
                    for m in provider_models:
                        if m.get("enabled", True):
                            m["provider"] = provider  # Add provider info
                            models.append(m)

        return models

    def _switch_experiment(self, experiment_name: str) -> bool:
        """Switch the backend to a different experiment."""
        try:
            url = f"{self.base_url}/api/admin/experiment/switch"
            response = requests.post(
                url,
                params={"experiment_name": experiment_name},
                timeout=10
            )
            if response.status_code == 200:
                print(f"  [OK] Switched to experiment: {experiment_name}")
                return True
            else:
                print(f"  [ERROR] Failed to switch experiment: {response.text}")
                return False
        except Exception as e:
            print(f"  [ERROR] Failed to switch experiment: {e}")
            return False

    def _get_session_result(self, session_id: str) -> Optional[Dict]:
        """Get the final result of a session."""
        try:
            url = f"{self.base_url}/api/admin/sessions/{session_id}"
            response = requests.get(url, timeout=10)
            if response.status_code == 200:
                return response.json()
            return None
        except Exception:
            return None

    def _delete_session(self, session_id: str) -> bool:
        """Delete a single session from the backend."""
        try:
            url = f"{self.base_url}/api/admin/sessions/{session_id}"
            response = requests.delete(url, timeout=10)
            if response.status_code == 200:
                print(f"  [OK] Deleted failed session {session_id}")
                return True
            else:
                print(f"  [WARN] Failed to delete session {session_id}: {response.text}")
                return False
        except Exception as e:
            print(f"  [WARN] Failed to delete session {session_id}: {e}")
            return False

    def _run_single(
        self,
        experiment: str,
        model: str,
        run_number: int,
        timeout: int,
        mode: str = "legacy"
    ) -> RunResult:
        """Run a single experiment."""
        start_time = datetime.now()
        result = RunResult(
            experiment=experiment,
            model=model,
            run_number=run_number,
            start_time=start_time.isoformat()
        )

        # Create log file in sweep folder
        log_dir = self.sweep_dir / "logs" if self.sweep_dir else self.results_dir / "logs"
        log_dir.mkdir(exist_ok=True)
        timestamp = start_time.strftime("%Y%m%d_%H%M%S")
        log_file = log_dir / f"{experiment}_{model}_{run_number}_{timestamp}.log"
        result.log_file = str(log_file)

        try:
            # Build command
            if self.use_docker:
                cmd = [
                    "docker", "exec", self.container_name,
                    "python", "run_agent.py",
                    "--model", model,
                    "--experiment", experiment,
                    "--mode", mode
                ]
            else:
                cmd = [
                    sys.executable, "run_agent.py",
                    "--model", model,
                    "--experiment", experiment,
                    "--mode", mode
                ]

            print(f"  [RUN] {experiment}/{model} #{run_number} ({mode})")

            with open(log_file, "w") as f:
                process = subprocess.Popen(
                    cmd,
                    stdout=f,
                    stderr=subprocess.STDOUT,
                    cwd=str(Path(__file__).parent) if not self.use_docker else None
                )

                try:
                    process.wait(timeout=timeout)
                    result.success = process.returncode == 0
                except subprocess.TimeoutExpired:
                    process.kill()
                    result.error = "Timeout"
                    result.success = False

            # Parse log file to extract session_id and results
            if log_file.exists():
                log_content = log_file.read_text()

                # Extract session ID (8-char hex, exclude ANSI color codes)
                # Handle both "Session registered:" and "Session resumed:"
                import re
                session_match = re.search(r"Session (?:registered|resumed): ([a-f0-9]{8})", log_content)
                if session_match:
                    result.session_id = session_match.group(1)
                    # Track this session for archiving
                    with self._lock:
                        self.sweep_session_ids.add(result.session_id)
                    # Archive session immediately (don't wait for sweep to complete)
                    self._archive_single_session(result.session_id)

                # Extract survival rate
                rate_match = re.search(r"Survival Rate: (\d+\.?\d*)%", log_content)
                if rate_match:
                    result.survival_rate = float(rate_match.group(1))

                # Check victory
                result.victory = "Victory: YES" in log_content or "YES ✓" in log_content

        except Exception as e:
            result.error = str(e)
            result.success = False

        end_time = datetime.now()
        result.end_time = end_time.isoformat()
        result.duration_seconds = (end_time - start_time).total_seconds()

        status = "✓" if result.success else "✗"
        victory = "WIN" if result.victory else "FAIL"
        rate = f"{result.survival_rate:.1f}%" if result.survival_rate else "N/A"
        print(f"  [{status}] {experiment}/{model} #{run_number}: {victory} ({rate}) [{result.duration_seconds:.0f}s]")

        return result

    def _run_experiment_model(
        self,
        experiment: Dict,
        model: Dict,
        executor: ThreadPoolExecutor
    ) -> List[RunResult]:
        """Run all iterations of an experiment-model combination."""
        exp_name = experiment["name"]
        model_name = model["name"]
        runs = model.get("runs", 1)
        timeout = self.config["execution"]["timeout_per_run_seconds"]
        delay = self.config["execution"]["delay_between_runs_seconds"]

        results = []

        for run_num in range(1, runs + 1):
            # Switch experiment before each run
            if not self._switch_experiment(exp_name):
                results.append(RunResult(
                    experiment=exp_name,
                    model=model_name,
                    run_number=run_num,
                    success=False,
                    error="Failed to switch experiment"
                ))
                continue

            # Small delay to let backend settle
            time.sleep(2)

            mode = model.get("mode", "legacy")
            result = self._run_single(exp_name, model_name, run_num, timeout, mode)
            results.append(result)

            # Delay between runs
            if run_num < runs:
                time.sleep(delay)

        return results

    def _run_single_task(
        self,
        exp_name: str,
        model_name: str,
        run_num: int,
        timeout: int,
        mode: str,
        results_list: List[RunResult]
    ):
        """Run a single task (used by parallel executor)."""
        result = self._run_single(exp_name, model_name, run_num, timeout, mode)
        with self._lock:
            results_list.append(result)
            # Save results incrementally so we can resume if interrupted
            self._save_results(results_list)

    def run_all(self, dry_run: bool = False) -> List[RunResult]:
        """Run all enabled experiments with all enabled models.

        Parallelization strategy:
        - Experiments run sequentially (because switch_experiment is global)
        - Within each experiment, runs are parallel up to parallel_runs limit
        """
        experiments = self._get_enabled_experiments()
        models = self._get_enabled_models()

        if not experiments:
            print("No experiments enabled!")
            return []

        if not models:
            print("No models enabled!")
            return []

        # Get parallelization config
        parallel_runs = self.config["execution"].get("parallel_runs", 5)
        delay = self.config["execution"]["delay_between_runs_seconds"]
        timeout = self.config["execution"]["timeout_per_run_seconds"]

        # Calculate total runs
        total_runs = sum(
            m.get("runs", 1) for m in models
        ) * len(experiments)

        print(f"\n{'='*60}")
        print(f"EXPERIMENT RUNNER")
        print(f"{'='*60}")
        print(f"Experiments: {[e['name'] for e in experiments]}")
        print(f"Models: {[m['name'] for m in models]}")
        print(f"Total runs: {total_runs}")
        print(f"Parallel runs: {parallel_runs}")
        print(f"Results dir: {self.results_dir}")
        print(f"{'='*60}\n")

        if dry_run:
            print("[DRY RUN] Would execute the following:")
            print(f"[DRY RUN] All experiments run in parallel with {parallel_runs} workers\n")
            pending_count = 0
            for exp in experiments:
                exp_pending = 0
                for model in models:
                    runs = model.get("runs", 1)
                    for r in range(1, runs + 1):
                        if not self._is_run_completed(exp["name"], model["name"], r):
                            exp_pending += 1
                            pending_count += 1
                status = f"({exp_pending} pending)" if exp_pending > 0 else "[ALL COMPLETED]"
                print(f"  {exp['name']}: {len(models)} models x {models[0].get('runs', 1)} runs {status}")
            print(f"\n  Total pending: {pending_count} tasks")
            return []

        # Create sweep folder for this run (unless resuming)
        if not self.resume_sweep_dir:
            self._create_sweep_folder()
            # Backup and clear existing sessions before starting (only for new sweeps)
            self._backup_and_clear_sessions()
        else:
            print(f"[RESUME] Using existing sweep folder: {self.sweep_dir}")

        # Check backend connectivity
        try:
            response = requests.get(f"{self.base_url}/api/admin/experiments", timeout=5)
            if response.status_code != 200:
                print(f"[ERROR] Backend not responding correctly: {response.status_code}")
                return []
        except Exception as e:
            print(f"[ERROR] Cannot connect to backend at {self.base_url}: {e}")
            return []

        all_results: List[RunResult] = []
        start_time = datetime.now()

        # Collect ALL tasks across all experiments (true parallelization)
        all_tasks = []
        for exp in experiments:
            exp_name = exp["name"]
            for model in models:
                model_name = model["name"]
                runs = model.get("runs", 1)
                mode = model.get("mode", "legacy")  # Get execution mode from model config
                for run_num in range(1, runs + 1):
                    if self._is_run_completed(exp_name, model_name, run_num):
                        print(f"  [SKIP] {exp_name}/{model_name} #{run_num} (already completed)")
                        continue
                    all_tasks.append((exp_name, model_name, run_num, mode))

        if not all_tasks:
            print("[SKIP] All runs already completed!")
            return []

        print(f"\n{'='*60}")
        print(f"RUNNING {len(all_tasks)} TASKS")
        print(f"Parallel workers: {parallel_runs}")
        print(f"{'='*60}\n")

        # Run ALL tasks in parallel (no more sequential experiments!)
        with ThreadPoolExecutor(max_workers=parallel_runs) as executor:
            futures = []
            for exp_name, model_name, run_num, mode in all_tasks:
                future = executor.submit(
                    self._run_single_task,
                    exp_name,
                    model_name,
                    run_num,
                    timeout,
                    mode,
                    all_results
                )
                futures.append(future)
                # Small delay between submissions to avoid overwhelming the backend
                time.sleep(delay)

            # Wait for all tasks to complete
            completed = 0
            for future in as_completed(futures):
                try:
                    future.result()
                    completed += 1
                    if completed % 10 == 0:
                        print(f"  [PROGRESS] {completed}/{len(all_tasks)} tasks completed")
                except Exception as e:
                    print(f"  [ERROR] Task failed: {e}")

        end_time = datetime.now()
        total_duration = (end_time - start_time).total_seconds()

        print(f"\n{'='*60}")
        print(f"ALL RUNS COMPLETE")
        print(f"Total time: {total_duration:.0f}s ({total_duration/60:.1f}m)")
        print(f"Sessions created: {len(self.sweep_session_ids)}")
        print(f"{'='*60}\n")

        # Save results
        self._save_results(all_results)

        # Print summary
        self._print_summary(all_results)

        # Archive sessions to sweep folder
        if self.archive_sessions:
            print(f"\n[POST-SWEEP] Archiving sessions...")
            self._archive_sessions()

            # Clean up agent_records if requested
            if self.clean_after_archive:
                print(f"[POST-SWEEP] Cleaning up agent_records...")
                self._clean_agent_records()
                self._delete_sessions_from_backend()

            print(f"\n[SWEEP COMPLETE] Results saved to: {self.sweep_dir}")

        return all_results

    def _save_results(self, results: List[RunResult]):
        """Save results to JSON file in sweep folder."""
        # Use sweep folder if available, otherwise results_dir
        save_dir = self.sweep_dir if self.sweep_dir else self.results_dir
        timestamp = self.sweep_timestamp or datetime.now().strftime("%Y%m%d_%H%M%S")

        results_file = save_dir / "results.json"

        # If resuming, merge with previous results
        all_results = []
        if self.resume_sweep_dir and results_file.exists():
            try:
                with open(results_file) as f:
                    prev_data = json.load(f)
                    all_results = prev_data.get("results", [])
                    # Convert back to RunResult for merging
                    print(f"[MERGE] Merging {len(results)} new results with {len(all_results)} previous results")
            except Exception as e:
                print(f"[WARN] Could not load previous results for merging: {e}")

        # Add new results
        all_results.extend([asdict(r) for r in results])

        data = {
            "sweep_id": f"sweep_{timestamp}",
            "timestamp": timestamp,
            "started_at": datetime.now().isoformat(),
            "resumed": self.resume_sweep_dir is not None,
            "config": self.config,
            "session_ids": list(self.sweep_session_ids),
            "results": all_results
        }

        with open(results_file, "w") as f:
            json.dump(data, f, indent=2)

        print(f"[SAVED] Results: {results_file} ({len(all_results)} total runs)")

        # Also save summary
        if self.config["output"]["save_summary"]:
            summary_file = save_dir / "summary.json"
            summary = self._compute_summary(results)
            summary["sweep_id"] = f"sweep_{timestamp}"
            with open(summary_file, "w") as f:
                json.dump(summary, f, indent=2)
            print(f"[SAVED] Summary: {summary_file}")

    def _compute_summary(self, results: List[RunResult]) -> Dict:
        """Compute summary statistics."""
        from collections import defaultdict

        groups = defaultdict(list)
        for r in results:
            key = (r.experiment, r.model)
            groups[key].append(r)

        summaries = []
        for (exp, model), runs in groups.items():
            successful = [r for r in runs if r.success]
            victories = [r for r in runs if r.victory]
            rates = [r.survival_rate for r in runs if r.survival_rate is not None]
            durations = [r.duration_seconds for r in runs]

            summary = {
                "experiment": exp,
                "model": model,
                "total_runs": len(runs),
                "successful_runs": len(successful),
                "victories": len(victories),
                "victory_rate": len(victories) / len(runs) if runs else 0,
                "avg_survival_rate": sum(rates) / len(rates) if rates else None,
                "min_survival_rate": min(rates) if rates else None,
                "max_survival_rate": max(rates) if rates else None,
                "avg_duration_seconds": sum(durations) / len(durations) if durations else 0
            }
            summaries.append(summary)

        return {
            "generated_at": datetime.now().isoformat(),
            "total_runs": len(results),
            "total_successful": len([r for r in results if r.success]),
            "total_victories": len([r for r in results if r.victory]),
            "summaries": summaries
        }

    def _print_summary(self, results: List[RunResult]):
        """Print summary table."""
        summary = self._compute_summary(results)

        print("\n" + "="*70)
        print("SUMMARY")
        print("="*70)
        print(f"{'Experiment':<20} {'Model':<20} {'Runs':>5} {'Wins':>5} {'Avg Rate':>10}")
        print("-"*70)

        for s in summary["summaries"]:
            rate = f"{s['avg_survival_rate']:.1f}%" if s['avg_survival_rate'] else "N/A"
            print(f"{s['experiment']:<20} {s['model']:<20} {s['total_runs']:>5} {s['victories']:>5} {rate:>10}")

        print("-"*70)
        print(f"Total: {summary['total_runs']} runs, {summary['total_victories']} victories")
        print("="*70 + "\n")

    def list_available(self):
        """List available experiments and models."""
        print("\n=== Available Experiments ===")
        for exp in self.config["experiments"]:
            status = "[ON]" if exp.get("enabled", True) else "[OFF]"
            print(f"  {status} {exp['name']}: {exp.get('description', '')}")

        print("\n=== Available Models ===")
        models_config = self.config.get("models", {})

        if isinstance(models_config, list):
            # Old format: flat list
            for model in models_config:
                status = "[ON]" if model.get("enabled", True) else "[OFF]"
                runs = model.get("runs", 1)
                print(f"  {status} {model['name']} ({runs} runs)")
        else:
            # New format: dict grouped by provider
            for provider, provider_models in models_config.items():
                if provider.startswith("_"):
                    continue
                if isinstance(provider_models, list):
                    print(f"\n  [{provider}]")
                    for model in provider_models:
                        status = "[ON]" if model.get("enabled", True) else "[OFF]"
                        runs = model.get("runs", 1)
                        desc = model.get("description", "")
                        print(f"    {status} {model['name']} ({runs} runs) - {desc}")

        print()


def archive_existing_sessions(results_dir: str = "experiment_results", clean: bool = False):
    """Archive all existing sessions from agent_records to a new sweep folder."""
    results_path = Path(results_dir)
    results_path.mkdir(parents=True, exist_ok=True)

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    sweep_dir = results_path / f"sweep_{timestamp}_archive"
    sweep_dir.mkdir(parents=True, exist_ok=True)
    (sweep_dir / "sessions").mkdir(exist_ok=True)

    agent_records = Path("agent_records/sessions")
    if not agent_records.exists():
        print(f"[ERROR] agent_records/sessions not found")
        return

    session_files = list(agent_records.glob("*.json"))
    print(f"[ARCHIVE] Found {len(session_files)} session files")

    archived = 0
    for src in session_files:
        dest = sweep_dir / "sessions" / src.name
        try:
            shutil.copy2(src, dest)
            archived += 1
            if clean:
                src.unlink()
        except Exception as e:
            print(f"  [WARN] Failed to archive {src.name}: {e}")

    print(f"[ARCHIVE] Archived {archived} sessions to {sweep_dir}")

    if clean:
        print(f"[CLEAN] Removed {archived} session files from agent_records")
        # Also delete from backend
        base_url = os.environ.get("SILENT_CANYON_URL", "http://localhost:8000")
        deleted = 0
        for src in session_files:
            session_id = src.stem
            try:
                response = requests.delete(f"{base_url}/api/admin/sessions/{session_id}", timeout=10)
                if response.status_code == 200:
                    deleted += 1
            except Exception:
                pass
        print(f"[CLEAN] Deleted {deleted} sessions from backend")

    return sweep_dir


def main():
    parser = argparse.ArgumentParser(description="Run batch experiments")
    parser.add_argument("--config", type=str, default="experiment_configs/default.json",
                       help="Path to config file")
    parser.add_argument("--dry-run", action="store_true",
                       help="Show what would run without executing")
    parser.add_argument("--list", action="store_true",
                       help="List available experiments and models")
    parser.add_argument("--docker", action="store_true",
                       help="Run agent inside Docker container (requires running container)")
    parser.add_argument("--container", type=str, default="silent-canyon",
                       help="Docker container name (default: silent-canyon)")
    parser.add_argument("--no-archive", action="store_true",
                       help="Don't archive sessions to sweep folder")
    parser.add_argument("--clean", action="store_true",
                       help="Clean agent_records after archiving (removes from frontend)")
    parser.add_argument("--archive-existing", action="store_true",
                       help="Archive existing sessions without running new experiments")
    parser.add_argument("--clear", action="store_true",
                       help="Clear existing sessions before sweep (default: don't clear)")
    parser.add_argument("--resume-sweep", type=str, default=None,
                       help="Resume an interrupted sweep from a previous sweep folder (e.g., experiment_results/sweep_default_20240101_120000)")
    args = parser.parse_args()

    # Handle archive-existing command
    if args.archive_existing:
        archive_existing_sessions(clean=args.clean)
        return

    runner = ExperimentRunner(
        args.config,
        use_docker=args.docker,
        container_name=args.container,
        archive_sessions=not args.no_archive,
        clean_after_archive=args.clean,
        clear_sessions=args.clear
    )

    if args.list:
        runner.list_available()
        return

    # Handle resume
    if args.resume_sweep:
        if not runner.setup_resume(args.resume_sweep):
            print("[ERROR] Failed to setup resume. Aborting.")
            return

    runner.run_all(dry_run=args.dry_run)


if __name__ == "__main__":
    main()
