"""
Main controller for OpenEvolve
"""

import asyncio
import logging
import os
import re
import time
import uuid
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union
import traceback
import json
from openevolve.config import Config, load_config
from openevolve.database import Program, ProgramDatabase
from openevolve.evaluator import Evaluator
from openevolve.llm.ensemble import LLMEnsemble
from openevolve.prompt.sampler import PromptSampler
from openevolve.utils.code_utils import (
    apply_diff,
    extract_code_language,
    extract_diffs,
    format_diff_summary,
    parse_evolve_blocks,
    parse_full_rewrite,
)
from openevolve.utils.format_utils import (
    format_metrics_safe,
    format_improvement_safe,
)

logger = logging.getLogger(__name__)


def _format_metrics(metrics: Dict[str, Any]) -> str:
    """Safely format metrics, handling both numeric and string values"""
    formatted_parts = []
    for name, value in metrics.items():
        if isinstance(value, (int, float)) and not isinstance(value, bool):
            try:
                formatted_parts.append(f"{name}={value:.4f}")
            except (ValueError, TypeError):
                formatted_parts.append(f"{name}={value}")
        else:
            formatted_parts.append(f"{name}={value}")
    return ", ".join(formatted_parts)


def _format_improvement(improvement: Dict[str, Any]) -> str:
    """Safely format improvement metrics"""
    formatted_parts = []
    for name, diff in improvement.items():
        if isinstance(diff, (int, float)) and not isinstance(diff, bool):
            try:
                formatted_parts.append(f"{name}={diff:+.4f}")
            except (ValueError, TypeError):
                formatted_parts.append(f"{name}={diff}")
        else:
            formatted_parts.append(f"{name}={diff}")
    return ", ".join(formatted_parts)

def clean_generated_code(code_string: str) -> str:
    """Removes Markdown fences from a generated code string."""
    if not isinstance(code_string, str):
        return "" # Return empty string if input is not a string

    code_str_stripped = code_string.strip()
    
    # Check for ```python and remove it and the final ```
    if code_str_stripped.startswith("```python"):
        return code_str_stripped.removeprefix("```python").removesuffix("```").strip()
    
    # Check for ``` and remove it and the final ```
    if code_str_stripped.startswith("```"):
        return code_str_stripped.removeprefix("```").removesuffix("```").strip()

    # If no fences are found, return the original string
    return code_string


class OpenEvolve:
    """
    Main controller for OpenEvolve

    Orchestrates the evolution process, coordinating between the prompt sampler,
    LLM ensemble, evaluator, and program database.

    Features:
    - Tracks the absolute best program across evolution steps
    - Ensures the best solution is not lost during the MAP-Elites process
    - Always includes the best program in the selection process for inspiration
    - Maintains detailed logs and metadata about improvements
    """

    def __init__(
        self,
        initial_program_path: str,
        evaluation_file: str,
        config_path: Optional[str] = None,
        config: Optional[Config] = None,
        output_dir: Optional[str] = None,
    ):
        # Load configuration
        if config is not None:
            # Use provided Config object directly
            self.config = config
        else:
            # Load from file or use defaults
            self.config = load_config(config_path)

        # Set up output directory
        self.output_dir = output_dir or os.path.join(
            os.path.dirname(initial_program_path), "BPP_output/best_fit"
        )
        os.makedirs(self.output_dir, exist_ok=True)

        # Set up logging
        self._setup_logging()

        # Set random seed for reproducibility if specified
        if self.config.random_seed is not None:
            import random
            import numpy as np
            import hashlib

            # Set global random seeds
            random.seed(self.config.random_seed)
            np.random.seed(self.config.random_seed)
            
            # Create hash-based seeds for different components
            base_seed = str(self.config.random_seed).encode('utf-8')
            llm_seed = int(hashlib.md5(base_seed + b'llm').hexdigest()[:8], 16) % (2**31)
            
            # Propagate seed to LLM configurations
            self.config.llm.random_seed = llm_seed
            for model_cfg in self.config.llm.models:
                if not hasattr(model_cfg, 'random_seed') or model_cfg.random_seed is None:
                    model_cfg.random_seed = llm_seed
            for model_cfg in self.config.llm.evaluator_models:
                if not hasattr(model_cfg, 'random_seed') or model_cfg.random_seed is None:
                    model_cfg.random_seed = llm_seed
            
            logger.info(f"Set random seed to {self.config.random_seed} for reproducibility")
            logger.debug(f"Generated LLM seed: {llm_seed}")

        # Load best program
        self.best_algorithm_code: Optional[str] = None
        self.best_algorithm_description: Optional[str] = None
        self.improvement_summary: str = ""

        # Load initial program
        self.initial_program_path = initial_program_path
        self.initial_program_code = self._load_initial_program()
        self.language = extract_code_language(self.initial_program_code)

        # Extract file extension from initial program
        self.file_extension = os.path.splitext(initial_program_path)[1]
        if not self.file_extension:
            # Default to .py if no extension found
            self.file_extension = ".py"
        else:
            # Make sure it starts with a dot
            if not self.file_extension.startswith("."):
                self.file_extension = f".{self.file_extension}"

        # Initialize components
        self.llm_ensemble = LLMEnsemble(self.config.llm.models)
        self.llm_evaluator_ensemble = LLMEnsemble(self.config.llm.evaluator_models)

        self.prompt_sampler = PromptSampler(self.config.prompt)
        self.evaluator_prompt_sampler = PromptSampler(self.config.prompt)
        self.evaluator_prompt_sampler.set_templates("evaluator_system_message")

        # Pass random seed to database if specified
        if self.config.random_seed is not None:
            self.config.database.random_seed = self.config.random_seed

        self.database = ProgramDatabase(self.config.database)

        self.evaluator = Evaluator(
            self.config.evaluator,
            evaluation_file,
            self.llm_evaluator_ensemble,
            self.evaluator_prompt_sampler,
            database=self.database,
        )
        self.dialogue_history = []

        logger.info(f"Initialized OpenEvolve with {initial_program_path} " f"and {evaluation_file}")

    def _save_dialogue_history(self) -> None:
        """Saves the complete dialogue history to a JSON file."""
        if not self.dialogue_history:
            logger.info("No dialogue history to save.")
            return

        history_path = os.path.join(self.output_dir, "history.json")
        try:
            with open(history_path, "w") as f:
                import json
                json.dump(self.dialogue_history, f, indent=2)
            logger.info(f"Saved complete dialogue history to {history_path}")
        except Exception as e:
            logger.error(f"Failed to save dialogue history: {e}")

    def _setup_logging(self) -> None:
        """Set up logging"""
        log_dir = self.config.log_dir or os.path.join(self.output_dir, "logs")
        os.makedirs(log_dir, exist_ok=True)

        # Set up root logger
        root_logger = logging.getLogger()
        root_logger.setLevel(getattr(logging, self.config.log_level))

        # Add file handler
        log_file = os.path.join(log_dir, f"openevolve_{time.strftime('%Y%m%d_%H%M%S')}.log")
        file_handler = logging.FileHandler(log_file)
        file_handler.setFormatter(
            logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
        )
        root_logger.addHandler(file_handler)

        # Add console handler
        console_handler = logging.StreamHandler()
        console_handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s"))
        root_logger.addHandler(console_handler)

        logger.info(f"Logging to {log_file}")

    def _load_initial_program(self) -> str:
        """Load the initial program from file"""
        with open(self.initial_program_path, "r") as f:
            return f.read()

    async def run(
        self,
        iterations: Optional[int] = None,
        target_score: Optional[float] = None,
    ) -> Program:
        """
        Run the evolution process

        Args:
            iterations: Maximum number of iterations (uses config if None)
            target_score: Target score to reach (continues until reached if specified)

        Returns:
            Best program found
        """
        max_iterations = iterations or self.config.max_iterations

        # Define start_iteration before creating the initial program
        start_iteration = self.database.last_iteration

        # Only add initial program if starting fresh (not resuming from checkpoint)
        # Check if we're resuming AND no program matches initial code to avoid pollution
        should_add_initial = (
            start_iteration == 0
            and len(self.database.programs) == 0
            and not any(
                p.code == self.initial_program_code for p in self.database.programs.values()
            )
        )

        if should_add_initial:
            logger.info("Adding initial program to database")
            initial_program_id = str(uuid.uuid4())

            # Evaluate the initial program
            initial_metrics = await self.evaluator.evaluate_program(
                self.initial_program_code, initial_program_id
            )

            initial_program = Program(
                id=initial_program_id,
                code=self.initial_program_code,
                language=self.language,
                metrics=initial_metrics,
                iteration_found=start_iteration,
            )

            self.database.add(initial_program)
        else:
            logger.info(
                f"Skipping initial program addition (resuming from iteration {start_iteration} with {len(self.database.programs)} existing programs)"
            )

        # Main evolution loop
        total_iterations = start_iteration + max_iterations

        logger.info(
            f"Starting evolution from iteration {start_iteration} for {max_iterations} iterations (total: {total_iterations})"
        )

        # Island-based evolution variables
        programs_per_island = max(
            1, max_iterations // (self.config.database.num_islands * 10)
        )  # Dynamic allocation
        current_island_counter = 0

        logger.info(f"Using island-based evolution with {self.config.database.num_islands} islands")
        self.database.log_island_status()
        strategy="random"
        for i in range(start_iteration, total_iterations):
            iteration_start = time.time()

            # Manage island evolution - switch islands periodically
            if i > start_iteration and current_island_counter >= programs_per_island:
                self.database.next_island()
                current_island_counter = 0
                logger.debug(f"Switched to island {self.database.current_island}")

            current_island_counter += 1

            # Sample parent and inspirations from current island
            parent, inspirations = self.database.sample()

            # Get artifacts for the parent program if available
            parent_artifacts = self.database.get_artifacts(parent.id)

            # Get actual top programs for prompt context (separate from inspirations)
            # This ensures the LLM sees only high-performing programs as examples
            actual_top_programs = self.database.get_top_programs(5)

            # Build prompt
            prompt,evolution_type = self.prompt_sampler.build_prompt(
                current_program=parent.code,
                parent_program=parent.code,  # We don't have the parent's code, use the same
                program_metrics=parent.metrics,
                #system_prompt=parent.system_prompt,
                previous_programs=[p.to_dict() for p in self.database.get_top_programs(3)],
                top_programs=[p.to_dict() for p in actual_top_programs],  # Use actual top programs
                inspirations=[p.to_dict() for p in inspirations],  # Pass inspirations separately
                language=self.language,
                evolution_round=i,
                diff_based_evolution=self.config.diff_based_evolution,
                program_artifacts=parent_artifacts if parent_artifacts else None,
                experience=parent.experience if parent.experience else None,
                strategy=strategy,
                # ✅ 新增：加入当前 best 算法信息
                #best_algorithm_code=self.best_algorithm_code,
                #best_algorithm_description=self.best_algorithm_description,
                #best_algorithm_metrics=self.database.get(self.database.best_program_id).metrics if self.database.best_program_id else None,
                #improvement_summary=self.improvement_summary,
            )

            # Generate code modification
            try:
                llm_response = await self.llm_ensemble.generate_with_context(
                    system_message=prompt["system"],
                    messages=[{"role": "user", "content": prompt["user"]}],
                )   
                # Parse the response
                if evolution_type in ["diff_user_parameter","diff_user_struct","diff_user_remove"]:
                #if evolution_type=="diff_user":
                    diff_blocks = extract_diffs(llm_response)

                    if not diff_blocks:
                        logger.warning(f"Iteration {i+1}: No valid diffs found in response")
                        continue

                    # Apply the diffs
                    child_code = apply_diff(parent.code, llm_response)
                    changes_summary = format_diff_summary(diff_blocks)
                else:
                    # Parse full rewrite
                    new_code = parse_full_rewrite(llm_response, self.language)

                    if not new_code:
                        logger.warning(f"Iteration {i+1}: No valid code found in response")
                        continue

                    child_code = new_code
                    changes_summary = "Full rewrite"
                child_code=clean_generated_code(child_code)
                # Check code length
                if len(child_code) > self.config.max_code_length:
                    logger.warning(
                        f"Iteration {i+1}: Generated code exceeds maximum length "
                        f"({len(child_code)} > {self.config.max_code_length})"
                    )
                    continue

                # Evaluate the child program
                child_id = str(uuid.uuid4())
                child_metrics = await self.evaluator.evaluate_program(child_code, child_id)

                # Handle artifacts if they exist
                artifacts = self.evaluator.get_pending_artifacts(child_id)

                # Create a child program
                child_program = Program(
                    id=child_id,
                    code=child_code,
                    language=self.language,
                    parent_id=parent.id,
                    generation=parent.generation + 1,
                    metrics=child_metrics,
                    metadata={
                        "changes": changes_summary,
                        "parent_metrics": parent.metrics,
                    },
                )
                # Add this block to record the dialogue history for the iteration
                history_entry = {
                    "iteration": i + 1,
                    "timestamp": time.time(),
                    "parent_id": parent.id,
                    "child_id": child_program.id,
                    "prompt": {
                        "system": prompt["system"],
                        "user": prompt["user"],
                    },
                    "response": llm_response,
                    "child_metrics": child_program.metrics,
                }
                self.dialogue_history.append(history_entry)

                #对提示词进行进化
                evolved_system_prompt = await self.evolve_prompt_with_llm(
                    original_prompt=prompt["system"],
                    parent_metrics=parent.metrics,
                    child_metrics=child_metrics,
                    history_entry=history_entry
                )
                #child_program.update_system_prompt(evolved_system_prompt)
                self.prompt_sampler.update_system_prompt(evolved_system_prompt)
                print("✅ 新一轮提示词进化完成")

                experience= await self.generate_evolutionary_experience(
                    parent_program=parent,
                    child_program=child_program
                )
                child_program.experience=experience
                print(f"✅ 新一轮进化经验总结生成完成")

                strategy= await self.select_mutation_strategy_with_llm(
                    current_prompt=prompt["user"],
                    parent_metrics=parent.metrics,
                    child_metrics=child_metrics,
                    history=self.dialogue_history[-10:]  # Use last 10 entries for context
                )
                print(f"✅ 新一轮变异策略选择完成，下一轮策略为：{strategy}")

                # Add to database (will be added to current island)
                self.database.add(child_program, iteration=i + 1)

                # Log prompts
                self.database.log_prompt(
                    template_key=(
                        "full_rewrite_user" if not self.config.diff_based_evolution else "diff_user"
                    ),
                    program_id=child_id,
                    prompt=prompt,
                    responses=[llm_response],
                )

                # Store artifacts if they exist
                if artifacts:
                    self.database.store_artifacts(child_id, artifacts)

                # Log prompts
                self.database.log_prompt(
                    template_key=(
                        "full_rewrite_user" if not self.config.diff_based_evolution else "diff_user"
                    ),
                    program_id=child_id,
                    prompt=prompt,
                    responses=[llm_response],
                )

                # Increment generation for current island
                self.database.increment_island_generation()

                # Check if migration should occur
                if self.database.should_migrate():
                    logger.info(f"Performing migration at iteration {i+1}")
                    self.database.migrate_programs()
                    self.database.log_island_status()

                # Log progress
                iteration_time = time.time() - iteration_start
                self._log_iteration(i, parent, child_program, iteration_time)

                # Specifically check if this is the new best program
                if self.database.best_program_id == child_program.id:
                    logger.info(f"🌟 New best solution found at iteration {i+1}: {child_program.id}")
                    logger.info(f"Metrics: {format_metrics_safe(child_program.metrics)}")
                    #asyncio.create_task(self._update_best_algorithm_description(child_program))
                    #asyncio.create_task(self._generate_improvement_summary(parent_code=parent.code,parent_metrics=parent.metrics,best_code=child_program.code,best_metrics=child_program.metrics))

                # Save checkpoint
                if (i + 1) % self.config.checkpoint_interval == 0:
                    self._save_checkpoint(i + 1)
                    # Also log island status at checkpoints
                    logger.info(f"Island status at checkpoint {i+1}:")
                    self.database.log_island_status()

                # Check if target score reached
                if target_score is not None:
                    # Only consider numeric metrics for target score calculation
                    numeric_metrics = [
                        v
                        for v in child_metrics.values()
                        if isinstance(v, (int, float)) and not isinstance(v, bool)
                    ]
                    if numeric_metrics:
                        avg_score = sum(numeric_metrics) / len(numeric_metrics)
                        if avg_score >= target_score:
                            logger.info(
                                f"Target score {target_score} reached after {i+1} iterations"
                            )
                            break

            except Exception as e:
                logger.exception(f"Error in iteration {i+1}: {str(e)}")
                continue
        self._save_dialogue_history()

        # Get the best program using our tracking mechanism
        best_program = None
        if self.database.best_program_id:
            best_program = self.database.get(self.database.best_program_id)
            logger.info(f"Using tracked best program: {self.database.best_program_id}")

        # Fallback to calculating best program if tracked program not found
        if best_program is None:
            best_program = self.database.get_best_program()
            logger.info("Using calculated best program (tracked program not found)")

        # Check if there's a better program by combined_score that wasn't tracked
        if "combined_score" in best_program.metrics:
            best_by_combined = self.database.get_best_program(metric="combined_score")
            if (
                best_by_combined
                and best_by_combined.id != best_program.id
                and "combined_score" in best_by_combined.metrics
            ):
                # If the combined_score of this program is significantly better, use it instead
                if (
                    best_by_combined.metrics["combined_score"]
                    > best_program.metrics["combined_score"] + 0.02
                ):
                    logger.warning(
                        f"Found program with better combined_score: {best_by_combined.id}"
                    )
                    logger.warning(
                        f"Score difference: {best_program.metrics['combined_score']:.4f} vs {best_by_combined.metrics['combined_score']:.4f}"
                    )
                    best_program = best_by_combined

        if best_program:
            logger.info(
                f"Evolution complete. Best program has metrics: "
                f"{format_metrics_safe(best_program.metrics)}"
            )

            # Save the best program (using our tracked best program)
            self._save_best_program(best_program)

            return best_program
        else:
            logger.warning("No valid programs found during evolution")
            # Return None if no programs found instead of undefined initial_program
            return None

    def _log_iteration(
        self,
        iteration: int,
        parent: Program,
        child: Program,
        elapsed_time: float,
    ) -> None:
        """
        Log iteration progress

        Args:
            iteration: Iteration number
            parent: Parent program
            child: Child program
            elapsed_time: Elapsed time in seconds
        """
        # Calculate improvement using safe formatting
        improvement_str = format_improvement_safe(parent.metrics, child.metrics)

        logger.info(
            f"Iteration {iteration+1}: Child {child.id} from parent {parent.id} "
            f"in {elapsed_time:.2f}s. Metrics: "
            f"{format_metrics_safe(child.metrics)} "
            f"(Δ: {improvement_str})"
        )

    def _save_checkpoint(self, iteration: int) -> None:
        """
        Save a checkpoint

        Args:
            iteration: Current iteration number
        """
        checkpoint_dir = os.path.join(self.output_dir, "checkpoints")
        os.makedirs(checkpoint_dir, exist_ok=True)

        # Create specific checkpoint directory
        checkpoint_path = os.path.join(checkpoint_dir, f"checkpoint_{iteration}")
        os.makedirs(checkpoint_path, exist_ok=True)

        # Save the database
        self.database.save(checkpoint_path, iteration)

        # Save the best program found so far
        best_program = None
        if self.database.best_program_id:
            best_program = self.database.get(self.database.best_program_id)
        else:
            best_program = self.database.get_best_program()

        if best_program:
            # Save the best program at this checkpoint
            best_program_path = os.path.join(checkpoint_path, f"best_program{self.file_extension}")
            with open(best_program_path, "w") as f:
                f.write(best_program.code)

            # Save metrics
            best_program_info_path = os.path.join(checkpoint_path, "best_program_info.json")
            with open(best_program_info_path, "w") as f:
                import json

                json.dump(
                    {
                        "id": best_program.id,
                        "generation": best_program.generation,
                        "iteration": best_program.iteration_found,
                        "current_iteration": iteration,
                        "metrics": best_program.metrics,
                        "language": best_program.language,
                        "timestamp": best_program.timestamp,
                        "saved_at": time.time(),
                    },
                    f,
                    indent=2,
                )

            logger.info(
                f"Saved best program at checkpoint {iteration} with metrics: "
                f"{format_metrics_safe(best_program.metrics)}"
            )

        logger.info(f"Saved checkpoint at iteration {iteration} to {checkpoint_path}")


    async def evolve_prompt_with_llm(
        self,
        original_prompt: str,
        parent_metrics: dict,
        child_metrics: dict,
        history_entry: dict
    ) -> str:
    
        reflection_prompt = f"""
        You are an expert in optimizing prompt words. Please analyze the following information and generate a better system prompt:

        ### Original prompt words:
        {original_prompt}

        ### Parent program metrics:
        {json.dumps(parent_metrics, indent=2)}

        ### Child program indicators:
        {json.dumps(child_metrics, indent=2)}

        ### Complete dialogue history:
        {json.dumps(history_entry, indent=2)}

        
        ###Requirement:
        1. If the subroutine performs worse, point out the defect of the original prompt word
        2. Generate an improved system prompt (output directly without explanation)
        3. Keep prompt words concise and effective, focus on code evolution tasks
        4. The format of the generated prompt should be as similar as possible to the initial prompt
        5. If the code execution is effective, generate prompt words to prompt improvement. If the code execution fails (i.e. returns a large negative value), generate prompt words to prompt correction of errors in the code
        6. Generate system prompt words that are more detailed than the initial system prompt words as much as possible, with more information that can be included in the prompt words
        7. Keep the role description section(first part) unchanged
        """

        response = await self.llm_ensemble.generate_with_context(
            system_message="You are an expert in optimizing prompt words, only return the optimized system prompt without adding explanations.",
            messages=[{"role": "user", "content": reflection_prompt}]
        )
    
        return response.strip()


    async def generate_evolutionary_experience(
        self,
        parent_program: 'Program',
        child_program: 'Program',
    ) -> str:
        """
        Analyzes the parent and child programs to generate a structured 
        "Evolutionary Experience" report.
        This function's format mimics evolve_prompt_with_llm.
        """
        
        # Consolidate all instructions and data into a single f-string for the user prompt.
        analysis_prompt = f"""
        # Role and Goal
        You are a top-tier software engineering analyst specializing in code evolution and performance optimization. Your core task is to analyze two versions of a program: a Parent and a Child. By comparing their source code and performance metrics, you must produce a clear, insightful "Evolutionary Experience Summary" that explains the child's improvements or regressions relative to the parent, and distills this into a learnable lesson.

        # Input Data
        Here is the complete information for the parent and child programs:

        ### Parent Program (Parent)
        ```json
        {{
            "metrics": {json.dumps(parent_program.metrics, indent=4, ensure_ascii=False)},
            "code": {json.dumps(parent_program.code)}
        }}
        ```

        ### Child Program (Child)
        ```json
        {{
            "metrics": {json.dumps(child_program.metrics, indent=4, ensure_ascii=False)},
            "code": {json.dumps(child_program.code)}
        }}
        ```

        # Task Requirements
        1.  Based on the input data above, perform a deep causal analysis.
        2.  Generate an "Evolutionary Experience Analysis Report".
        3.  Your output must strictly follow the Markdown format provided below. Do not add any extra explanations, dialogue, or headers.
        4.  Directly output the report content.

        # Output Format Template
        ---
        ### **Evolutionary Experience Analysis Report**

        **1. Overall Assessment**
        > Provide a one-sentence summary here, e.g., "Significant improvement," "Minor regression," or "No significant change."

        **2. Metrics Comparison**
        | Metric Name     | Parent        | Child         | Change                       |
        |:----------------|:--------------|:--------------|:-----------------------------|
        | `metric_name_1` | `<value>`     | `<value>`     | `↑ Improvement / ↓ Regression / - No Change` |
        
        **3. Code Change Analysis**
        > Describe the specific code changes from the parent to the child here, using concise language or a code diff format.

        **4. Evolutionary Experience Summary**
        > This is the core of the report. Deeply analyze the causal relationship between the code changes and the metric changes, and distill it into a "Success Story" or a "Lesson Learned."
        ---
        """

        # Use the same calling convention as evolve_prompt_with_llm
        response = await self.llm_ensemble.generate_with_context(
            system_message="You are a professional software engineering analyst. Your task is to output an analysis report strictly in the requested Markdown format, without any additional explanations or conversational text.",
            messages=[{"role": "user", "content": analysis_prompt}]
        )
    
        # Return the cleaned, ready-to-use report string
        return response.strip()

    def _save_best_program(self, program: Optional[Program] = None) -> None:
        """
        Save the best program

        Args:
            program: Best program (if None, uses the tracked best program)
        """
        # If no program is provided, use the tracked best program from the database
        if program is None:
            if self.database.best_program_id:
                program = self.database.get(self.database.best_program_id)
            else:
                # Fallback to calculating best program if no tracked best program
                program = self.database.get_best_program()

        if not program:
            logger.warning("No best program found to save")
            return

        best_dir = os.path.join(self.output_dir, "best")
        os.makedirs(best_dir, exist_ok=True)

        # Use the extension from the initial program file
        filename = f"best_program{self.file_extension}"
        code_path = os.path.join(best_dir, filename)

        with open(code_path, "w") as f:
            f.write(program.code)

        # Save complete program info including metrics
        info_path = os.path.join(best_dir, "best_program_info.json")
        with open(info_path, "w") as f:
            import json

            json.dump(
                {
                    "id": program.id,
                    "generation": program.generation,
                    "iteration": program.iteration_found,
                    "timestamp": program.timestamp,
                    "parent_id": program.parent_id,
                    "metrics": program.metrics,
                    "language": program.language,
                    "saved_at": time.time(),
                },
                f,
                indent=2,
            )

        logger.info(f"Saved best program to {code_path} with program info to {info_path}")


    async def select_mutation_strategy_with_llm(
        self,
        current_prompt: str,
        parent_metrics: float,
        child_metrics: float,
        history: List[Dict[str, Any]]
    ) -> str:
        """
        Asks an LLM to select a mutation strategy based on performance evolution.

        The LLM's choice is strictly constrained to a predefined list of strategies.
        """

        # The list of valid mutation strategies the LLM must choose from.
        mutation_options = [
            "diff_user_struct",
            "diff_user_remove",
            "rewrite_user_template_and_motivated",
            "diff_user_parameter",
            "full_rewrite_user",
            "random"
        ]

        mutation_strategy_descriptions = {
            "diff_user_struct": "Slightly modify the structure, formatting, or ordering of the prompt. Good for small, exploratory changes.",
            "diff_user_remove": "Remove a sentence, constraint, or specific instruction that might be causing issues or is unnecessary.",
            "rewrite_user_template_and_motivated": "Perform a targeted rewrite of a specific part of the prompt, guided by a hypothesis for why the change is needed.",
            "diff_user_parameter": "Adjust a specific parameter, keyword, or value within the prompt (e.g., changing 'concise' to 'detailed', or tweaking a temperature setting).",
            "full_rewrite_user": "Completely rewrite the entire prompt from scratch while keeping the original goal. Use this for major changes when stuck in a local optimum.",
            "random": "Introduce a random, less-guided change to the prompt to increase diversity and explore novel directions."
        }

        # Format the descriptions for inclusion in the prompt.
        formatted_strategies = "\n".join(
            [f'- **{name}**: {desc}' for name, desc in mutation_strategy_descriptions.items()]
        )

        selection_prompt = f"""
        You are an expert in evolutionary algorithms and prompt engineering. Your task is to analyze the performance change between a parent and child prompt and select the best mutation strategy for the next evolution step.

        ### Current Prompt:
        {current_prompt}

        ### Parent Prompt Score:
        {parent_metrics}

        ### Child Prompt Score (Result of the last mutation):
        {child_metrics}

        ### Recent Evolution History:
        {json.dumps(history, indent=2)}

        ### Analysis and Task:
        Analyze the performance improvement or degradation from the parent score to the child score. Based on this analysis and the history, select the most appropriate mutation strategy to apply to the "Current Prompt".
        - If the child score is significantly better, the last mutation was successful. Consider a similar or more focused mutation.
        - If the child score is worse, the last mutation failed. Consider a different approach, reverting a change, or a more exploratory strategy.
        - If the scores are stagnant, it might be time for a more drastic change or a random exploration to escape a local optimum.

        ### Available Mutation Strategies:
        {formatted_strategies}

        ### INSTRUCTIONS:
        Your response MUST be EXACTLY ONE of the strings from the "Available Mutation Strategies" list.
        Do NOT provide any explanation, commentary, numbering, or any characters other than the chosen strategy string.
        For example, if you choose "full_rewrite_user", your output should be just: full_rewrite_user
        """

        response = await self.llm_ensemble.generate_with_context(
            system_message="You are an AI assistant that strictly follows output format instructions. You will select one option from a provided list and output only that option as a plain string.",
            messages=[{"role": "user", "content": selection_prompt}]
        )

        # Clean the response to ensure it's just the string.
        cleaned_response = response.strip().replace('"', '').replace("'", "")

        if cleaned_response in mutation_options:
            return cleaned_response
        else:
            # Fallback strategy if the LLM fails to follow instructions
            print(f"Warning: LLM returned an invalid strategy '{cleaned_response}'. Falling back to 'random'.")
            return "random"


