from __future__ import annotations

import numpy as np
from typing import List, Optional

from ..shared.batch_eval import batch_evaluate_tasks, evaluate_single_solver_instance, prepare_tasks


class BPOnlineGeneratorProblem:
    def __init__(
        self,
        config, 
        heuristics: List[str] = None,
        sigma_h: np.ndarray = None,
        n_inst_eva: int = None,  
    ) -> None:
        """Initialize BP Online Generator Problem.
        
        Args:
            config: HeuPSROConfig 对象
            heuristics: List of solver heuristic codes (运行时动态)
            sigma_h: Mixture weights (运行时动态)
            n_inst_eva: Number of instances (可选覆盖 config.eoh_eval_n_instances)
        """
        self.config = config  #
        self.heuristics = heuristics or []
        self.sigma_h = np.asarray(sigma_h, dtype=float) if sigma_h is not None else np.array([1.0])
        self.sigma_h = self.sigma_h / (self.sigma_h.sum() if self.sigma_h.sum() > 0 else 1)
        self.capacity = config.capacity
        self.num_items = config.num_items
        
        self.n_inst_eva = n_inst_eva or config.eoh_eval_n_instances
        self.time_limit = getattr(config, 'instance_solver_time_limit', 5)
        self.use_gap = config.generator_use_gap
        self.gap_oracle = getattr(config, 'oracle_type', 'lb')
        self.gap_oracle_timeout = getattr(config, 'oracle_timeout', 0)
        self.debug_mode = getattr(config, 'debug_mode', False)
        self.parallel_backend = getattr(config, 'parallel_backend', 'loky')
        self.parallel_prefer = getattr(config, 'parallel_prefer', 'processes')
        self.parallel_n_jobs = getattr(config, 'eval_n_jobs', -1)
        
        # Note: We don't cache instances for generator evaluation because each generator
        # code generates different instances. Solver evaluation can cache because generator mix is fixed.
        
        # Add prompts attribute that EoH might expect
        from .prompts import GetPrompts
        self.prompts = GetPrompts()
    
    def set_evolution_context(self, context: str = None, enabled: bool = True) -> None:
        """
        Set evolution context for PSRO-level task description.
        
        Args:
            context: Context string describing the mixed solver strategy
            enabled: Whether to use context in prompts
        """
        if hasattr(self, 'prompts') and self.prompts is not None:
            self.prompts.set_context(context, enabled)

    def evaluate(self, code_string: str, n_instances: int = None) -> float:
        """Evaluate single generator (backward compatibility)."""
        if n_instances is None:
            n_instances = self.n_inst_eva
        return self.evaluate_batch([code_string])[0]
    
    def evaluate_batch(self, code_strings: List[str]) -> List[float]:
        """        
        Args:
            code_strings: List of generator code strings [code_0, code_1, ..., code_n-1]
            
        Returns:
            List of fitness values [fitness_0, fitness_1, ..., fitness_n-1]
        """
        n_generators = len(code_strings)
        n_solvers = len(self.heuristics)
        
        solver_codes = self.heuristics
        solver_ids = list(range(n_solvers))
        generator_ids = list(range(n_generators))
        generator_weights = np.ones(n_generators)
                        
        # Step 1: call shared prepare_tasks to generate all tasks
        # Note: For generator evaluation, we cannot cache instances because each generator code
        # generates different instances. Each call must generate new instances from the new generators.
        all_tasks, _, _ = prepare_tasks(
            solver_codes=solver_codes,
            solver_ids=solver_ids,
            generator_codes=code_strings,
            generator_ids=generator_ids,
            generator_weights=generator_weights,
            n_instances=self.n_inst_eva,
            capacity=self.capacity,
            num_items=self.num_items,
            time_limit=self.time_limit,
            use_gap=self.use_gap,
            gap_oracle=self.gap_oracle,
            oracle_timeout=self.gap_oracle_timeout,
            optimal_parallel_n_jobs=getattr(self.config, 'optimal_parallel_n_jobs', getattr(self.config, 'oracle_parallel_n_jobs', -1)),
            debug_mode=self.debug_mode,
            cached_instances=None,
            cached_oracle_costs=None,
            config=self.config
        )
        
        # Step 2: call shared batch evaluate (shared will average over instances for each (solver_id, generator_id))
        timeout_per_task = self.time_limit + self.gap_oracle_timeout + 10
        batch_timeout = len(all_tasks) * timeout_per_task * 1.5 if all_tasks else None
        results_dict = batch_evaluate_tasks(
            tasks=all_tasks,
            evaluate_fn=evaluate_single_solver_instance,
            n_jobs=self.parallel_n_jobs,
            backend=self.parallel_backend,
            prefer=self.parallel_prefer,
            timeout=batch_timeout,
            debug_mode=self.debug_mode,
            track_time=True,
            time_key="generator",
            task_batch_size=getattr(self.config, 'batch_eval_task_batch_size', None)
        )
        
        # Step 3: group by generator, weight by sigma_h (solver's weight)
        # results_dict: {(solver_id, generator_id): mean_gap}
        generator_fitnesses = [1e9] * n_generators
        for gen_id in range(n_generators):
            weighted_sum = 0.0
            for solver_id in range(n_solvers):
                # get the mean gap for (solver, generator) (shared already averaged over instances)
                mean_gap = results_dict.get((solver_id, gen_id), 1e9)
                weight = self.sigma_h[solver_id] if solver_id < len(self.sigma_h) else 0.0
                weighted_sum += weight * mean_gap
                
            # For generator: we want to maximize gap (harder instances for solvers)
            # But EOH minimizes objective, so we return negative values
            generator_fitnesses[gen_id] = -float(weighted_sum)
        
        return generator_fitnesses

