import os
import math
import yaml
import time
import numpy as np
from typing import Dict, Any, List

# Import necessary MOCO components
from MOCO.problems import (
    BiObjectiveTSP, 
    MultiObjectiveKnapsack,
    TriObjectiveTSP,
    BiObjectiveCVRP
)

from MOCO.evaluation import MOCOEvaluator
from benchmarking_helper import generate_ws_config, print_timing_details, run_algorithm_with_timing, analyze_benchmark_results
import multiprocessing
import torch
import logging

try:
    multiprocessing.set_start_method('spawn')
except RuntimeError:
    pass
from our_method_dl_Thompson_variant import CachedAdvancedBiKPWrapper  as FixedDecomposedThompsonWrapperAcc

from our_method_dl_UCB_variant import CachedAdvancedBiKPWrapper  as TestFixedDecomposedWrapperAcc

def run_flexible_benchmark(
    algorithm_classes,
    problem_types=['BiObjectiveTSP', 'MultiObjectiveKnapsack', 'TriObjectiveTSP', 'BiObjectiveCVRP'],
    problem_sizes={'BiObjectiveTSP': {'medium': {'n_cities': 50}}, 
                   'MultiObjectiveKnapsack': {'medium': {'n_items': 50, 'n_objectives': 2, 'capacity': 12.5}},
                   'TriObjectiveTSP': {'medium': {'n_cities': 50}},
                   'BiObjectiveCVRP': {'small': {'n_customers': 20}}},
    num_runs=1,
    cuda_device=0,
    base_params=None
    ):
    """
    Flexible benchmark function that supports multiple algorithms and problems including BOPR
    
    Parameters:
    -----------
    algorithm_classes : dict
        Dictionary mapping algorithm names to algorithm classes
    problem_types : list
        List of problem types to benchmark
    problem_sizes : dict
        Dictionary of problem configurations
    num_runs : int
        Number of runs per configuration
    cuda_device : int
        CUDA device to use
    base_params : dict
        Base parameters for algorithms (will be overridden by algorithm-specific params)
    """
    # Use specified CUDA device
    device = torch.device("cpu") #orch.device(f"cuda:{cuda_device}" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    
    # Create timestamp for files
    timestamp = time.strftime("%Y%m%d-%H%M%S")
    
    # Define problem mapping
    problem_map = {
        'BiObjectiveTSP': {'class': BiObjectiveTSP, 'ref_type': 'BiTSP'},
        'MultiObjectiveKnapsack': {'class': MultiObjectiveKnapsack, 'ref_type': 'BiKP'},
        'TriObjectiveTSP': {'class': TriObjectiveTSP, 'ref_type': 'TriTSP'},
        'BiObjectiveCVRP': {'class': BiObjectiveCVRP, 'ref_type': 'BiCVRP', 'n_objectives': 2}  # Added
    }
    
    # Set default base parameters if none provided
    if base_params is None:
        base_params = {
            'population_size': 100,
            'n_generations': 30,
            'verbose': True
        }
    
    # Store results
    results = {}
    config = {
        'metadata': {
            'timestamp': timestamp,
            'num_runs': num_runs,
            'device': device.type,
        },
        'results': {}
    }
    
    # Overall benchmark start time
    overall_start = time.time()
    
    # For each algorithm
    for algorithm_name, algorithm_class in algorithm_classes.items():
        print(f"\n{'-'*60}")
        print(f"Benchmarking algorithm: {algorithm_name}")
        print(f"{'-'*60}")
        
        # Algorithm-specific results
        results[algorithm_name] = {}
        config['results'][algorithm_name] = {}
        
        # For each problem type
        for problem_type in problem_types:
            if problem_type not in problem_map:
                print(f"Problem {problem_type} not defined. Skipping.")
                continue
                
            if problem_type not in problem_sizes:
                print(f"No sizes defined for {problem_type}. Skipping.")
                continue
                
            # Problem-specific results
            results[algorithm_name][problem_type] = {}
            config['results'][algorithm_name][problem_type] = {}
            
            # Get problem class
            problem_class = problem_map[problem_type]['class']
            ref_type = problem_map[problem_type]['ref_type']
            
            # Initialize temp evaluator for reference points
            temp_evaluator = MOCOEvaluator(reference_point=(1.0, 1.0))
            
            # For each problem size
            for size_name, problem_params in problem_sizes[problem_type].items():
                print(f"\nBenchmarking {problem_type} ({size_name})...")
                
                # Determine reference point size
                if problem_type == 'BiObjectiveTSP' or problem_type == 'TriObjectiveTSP':
                    ref_size = problem_params['n_cities']
                elif problem_type == 'MultiObjectiveKnapsack':  # MultiObjectiveKnapsack
                    ref_size = problem_params['n_items']
                else:                                               # BiObjectiveCVRP
                    ref_size = problem_params['n_customers']
                
                # Get standard reference points
                try:
                    standard_points = temp_evaluator.get_standard_points(
                        problem_type=ref_type,
                        problem_size=ref_size
                    )
                    
                    if standard_points and 'reference' in standard_points:
                        reference_point = standard_points['reference']
                        ideal_point = standard_points.get('ideal', (0, 0))
                    else:
                        # Default reference points
                        reference_point = (35, 35) if problem_type == 'BiObjectiveTSP' else (20, 20)
                        ideal_point = (0, 0) if problem_type == 'BiObjectiveTSP' else (50, 50)
                except Exception as e:
                    print(f"Error getting standard points: {e}")
                    # Default reference points
                    reference_point = (35, 35) if problem_type == 'BiObjectiveTSP' else (20, 20)
                    ideal_point = (0, 0) if problem_type == 'BiObjectiveTSP' else (50, 50)
                
                print(f"Reference Point: {reference_point}")
                print(f"Ideal Point: {ideal_point}")
                
                # Create evaluator
                evaluator = MOCOEvaluator(
                    reference_point=reference_point, 
                    results_dir=f"benchmark_results_OL_{timestamp}"
                )
                
                # Disable parallel evaluation to avoid CUDA issues
                evaluator.parallel = False
                
                # Set algorithm parameters
                algorithm_params = base_params.copy()
                algorithm_params['reference_point'] = reference_point
                        
                # Adjust parameters based on algorithm type and size
                if algorithm_name in "NSGA2qNEHVI":
                    # Add qNEHVI specific parameters
                    algorithm_params.update({
                        'batch_size': 5,
                        'max_iterations': 30,
                        'use_sparse_gp': True,
                        'n_inducing_points': 50,
                        'population_size': 100,
                        'n_generations': 100,
                        'verbose': True,
                        'matern_nu': 5 if ref_size==100 else 4,
                        'early_stopping_patience': 5 
                    })

                if algorithm_name == "BOPR" or "BOPR" in algorithm_name:
                    # Add BOPR specific parameters
                    algorithm_params.update({
                        'n_initial': 10,
                        'n_iterations': 15, #if ref_size < 50 else 25,
                        'mc_samples': 64,
                        'lr': 0.5,
                        'temperature': 0.8,
                        'sparse_gp': True if ref_size > 30 else False,
                        'inducing_points': min(60, max(30, ref_size))#min(100, max(30, ref_size))
                    })
                    
                if algorithm_name == "UCBHedgeHybrid" or "UCBHedgeHybrid" in algorithm_name:
                    # Add BOPR specific parameters
                    algorithm_params.update({
                        'learning_rate': 0.5,
                        'ucb_coefficient': 2.0,
                        'initial_temperature': 1.0,
                        'temp_decay': 0.98,
                        'hybrid_ratio': 0.5,
                        'adaptive_hybrid': True,
                        'decomposition_size': 10,
                        'overlap': 3,
                        'max_iterations': 150,
                        'nb_rounds': 5,
                        'patience': 30
                    })
                    
                if  "UCBHedgeHybridOCOAcc" in algorithm_name:
                
                    #     algorithm_params.update({
                    #     'learning_rate': 0.5,  # Reduced for stability
                    #     'ucb_coefficient': 2.0,  # Less exploration
                    #     'initial_temperature': 1.0,  # Ensure it starts at 1
                    #     'temp_decay': 0.98,  # Much slower decay
                    #     'hybrid_ratio': 0.5,
                    #     'adaptive_hybrid': True,
                    #     'decomposition_size': 10,  # Or 15 for size 100 problems
                    #     'overlap': 3,  # Larger initial overlap
                    #     'max_iterations': 150,
                    #     'nb_rounds': 5,
                    #     'patience': 30,
                    #     'use_lagrangian': True,
                    #     'use_ftrl': True,
                    #     'dual_step_size': 2.0,  # Much larger!
                    #     'use_accelerated_dual': True,
                    #     'use_diminishing_overlap': True,
                    #     'overlap_decay_rate': 0.1  # Slower decay
                    # })
                    # algorithm_params.update({
                    #     'learning_rate': 0.1,  # Reduced for stability
                    #     'ucb_coefficient': 2.0,  # Less exploration
                    #     'initial_temperature': 1.0,  # Ensure it starts at 1
                    #     'temp_decay': 0.98,  # Much slower decay
                    #     'hybrid_ratio': 0.6,
                    #     'adaptive_hybrid': True,
                    #     'decomposition_size': 14,  # Or 15 for size 100 problems
                    #     'overlap': 7,  # Larger initial overlap
                    #     'max_iterations': 100,
                    #     'nb_rounds': 5,
                    #     'patience': 50,
                    #     'use_lagrangian': True,
                    #     'use_ftrl': True,
                    #     'dual_step_size': 2.0,  # Much larger!
                    #     'use_accelerated_dual': True,
                    #     'use_diminishing_overlap': True,
                    #     'overlap_decay_rate': 0.1  # Slower decay
                    # })
                    # ----- final params for table results ------
                    # algorithm_params.update({
                    #     'learning_rate': 0.5,  # Reduced for stability
                    #     'ucb_coefficient': 3.0,  # Less exploration
                    #     'initial_temperature': 1.0,#1.0,  # Ensure it starts at 1
                    #     'temp_decay': 0.98, #0.98,  # Much slower decay
                    #     'hybrid_ratio': 0.7,
                    #     'adaptive_hybrid': True,
                    #     'decomposition_size': ref_size//2,  # Or 15 for size 100 problems
                    #     'overlap': ref_size//2 -7 ,  # Larger initial overlap
                    #     'use_adaptive_weights': True,
                    #     # 'adaptive_strategy': 'geometric',  # Use simple approach
                    #     'max_iterations': 100,
                    #     'nb_rounds': 10, #5
                    #     'patience': 20, # 20,
                    #     'use_lagrangian': True,
                    #     'use_ftrl': True,
                    #     'dual_step_size': 1.0,  # Much larger!
                    #     'use_accelerated_dual': True,
                    #     'use_diminishing_overlap': True,
                    #     'overlap_decay_rate': 0.1,  # Slower decay
                    #     # 'use_selective_sensitivity': True
                    #     'use_learned_operators': True,
                    #     'n_weight_vectors': 20, #ref_size + 10,  # For wrapper
                    #     # 'max_weights': 20
                    # })
                    # algorithm_params.update({
                    #     'learning_rate': 0.5,  # Reduced for stability
                    #     'ucb_coefficient': 3.0,  # Less exploration
                    #     'initial_temperature': 1.0,#1.0,  # Ensure it starts at 1
                    #     'temp_decay': 0.98, #0.98,  # Much slower decay
                    #     'hybrid_ratio': 0.5,
                    #     'adaptive_hybrid': True,
                    #     'decomposition_size': 35, #40, 25(best 100 scale), #ref_size//3,  # 15 Or 15 for size 100 problems
                    #     'overlap': 15, # 10,# 12(best), #16, 10, 8, #ref_size//3 -7 ,  # 6 Larger initial overlap
                    #     'use_adaptive_weights': True,
                    #     # 'adaptive_strategy': 'geometric',  # Use simple approach
                    #     'max_iterations': 150,
                    #     'nb_rounds': 6, #5 | 20 gave best till now for laarge scale
                    #     'patience': 10, #20, # 20,
                    #     'use_lagrangian': True,
                    #     'use_ftrl': True,
                    #     'dual_step_size': 1.0,  # Much larger!
                    #     'use_accelerated_dual': True,
                    #     'use_diminishing_overlap': True,
                    #     'overlap_decay_rate': 0.5,  # Slower decay
                    #     # 'use_selective_sensitivity': True
                    #     'use_learned_operators': True,
                    #     'n_weight_vectors': 20, #ref_size + 10,  # For wrapper
                    #     # 'max_weights': 20
                    # })

                    algorithm_params.update({
                        'learning_rate': 0.5,  # Reduced for stability
                        'ucb_coefficient': 3.0,  # Less exploration
                        'initial_temperature': 1.0,#1.0,  # Ensure it starts at 1
                        'temp_decay': 0.98, #0.98,  # Much slower decay
                        'hybrid_ratio': 0.5,
                        'adaptive_hybrid': True,
                        'decomposition_size': 25, #40, 25(best 100 scale), #ref_size//3,  # 15 Or 15 for size 100 problems
                        'overlap': 18, # 10,# 12(best), #16, 10, 8, #ref_size//3 -7 ,  # 6 Larger initial overlap
                        'use_adaptive_weights': True,
                        # 'adaptive_strategy': 'geometric',  # Use simple approach
                        'max_iterations': 150,
                        'nb_rounds': 5, #5 | 20 gave best till now for laarge scale
                        'patience': 20, #20, # 20,
                        'use_lagrangian': True,
                        'use_ftrl': True,
                        'dual_step_size': 1.0,  # Much larger!
                        'use_accelerated_dual': True,
                        'use_diminishing_overlap': True,
                        'overlap_decay_rate': 0.1,  # Slower decay
                        # 'use_selective_sensitivity': True
                        'use_learned_operators': True,
                        'n_weight_vectors': 25, #ref_size + 10,  # For wrapper
                        # 'max_weights': 20
                    })

                    # algorithm_params.update({
                    #     'learning_rate': 0.1,  # Reduced for stability
                    #     'ucb_coefficient': 3.0,  # Less exploration
                    #     'initial_temperature': 1.0,#1.0,  # Ensure it starts at 1
                    #     'temp_decay': 0.98, #0.98,  # Much slower decay
                    #     'hybrid_ratio': 0.5,
                    #     'adaptive_hybrid': True,
                    #     'decomposition_size': 25, #40, 25(best 100 scale), #ref_size//3,  # 15 Or 15 for size 100 problems
                    #     'overlap': 11, # 10,# 12(best), #16, 10, 8, #ref_size//3 -7 ,  # 6 Larger initial overlap
                    #     'use_adaptive_weights': True,
                    #     # 'adaptive_strategy': 'geometric',  # Use simple approach
                    #     'max_iterations': 150,
                    #     'nb_rounds': 20, #5 | 20 gave best till now for laarge scale
                    #     'patience': 50, #20, # 20,
                    #     'use_lagrangian': True,
                    #     'use_ftrl': True,
                    #     'dual_step_size': 1.0,  # Much larger!
                    #     'use_accelerated_dual': True,
                    #     'use_diminishing_overlap': True,
                    #     'overlap_decay_rate': 0.1,  # Slower decay
                    #     # 'use_selective_sensitivity': True
                    #     'use_learned_operators': True,
                    #     'n_weight_vectors': 25, #ref_size + 10,  # For wrapper
                    #     # 'max_weights': 20
                    # })
                    # TriTSP100
                    # algorithm_params.update({
                    #     'learning_rate': 0.5,  # Reduced for stability
                    #     'ucb_coefficient': 3.0,  # Less exploration
                    #     'initial_temperature': 1.0,#1.0,  # Ensure it starts at 1
                    #     'temp_decay': 0.98, #0.98,  # Much slower decay
                    #     'hybrid_ratio': 0.5,
                    #     'adaptive_hybrid': True,
                    #     'decomposition_size': 25, #40, 25(best 100 scale), #ref_size//3,  # 15 Or 15 for size 100 problems
                    #     'overlap': 11, #10,# 12(best), #16, 10, 8, #ref_size//3 -7 ,  # 6 Larger initial overlap
                    #     'use_adaptive_weights': True,
                    #     # 'adaptive_strategy': 'geometric',  # Use simple approach
                    #     'max_iterations': 100,
                    #     'nb_rounds': 25, #5 | 20 gave best till now for laarge scale
                    #     'patience': 100, #20, # 20,
                    #     'use_lagrangian': True,
                    #     'use_ftrl': True,
                    #     'dual_step_size': 1.0,  # Much larger!
                    #     'use_accelerated_dual': True,
                    #     'use_diminishing_overlap': True,
                    #     'overlap_decay_rate': 0.1,  # Slower decay
                    #     # 'use_selective_sensitivity': True
                    #     'use_learned_operators': True,
                    #     'n_weight_vectors': 35, #ref_size + 10,  # For wrapper
                    #     # 'max_weights': 20
                    # })
                    # algorithm_params.update({
                    #     'learning_rate': 0.5,  # Reduced for stability
                    #     'ucb_coefficient': 3.0,  # Less exploration
                    #     'initial_temperature': 0.5,#1.0,  # Ensure it starts at 1
                    #     'temp_decay': 0.98, #0.98,  # Much slower decay
                    #     'hybrid_ratio': 0.5,
                    #     'adaptive_hybrid': True,
                    #     'decomposition_size': 25, #40, 25(best 100 scale), #ref_size//3,  # 15 Or 15 for size 100 problems
                    #     'overlap': 10, #10,# 12(best), #16, 10, 8, #ref_size//3 -7 ,  # 6 Larger initial overlap
                    #     'use_adaptive_weights': True,
                    #     # 'adaptive_strategy': 'geometric',  # Use simple approach
                    #     'max_iterations': 100,
                    #     'nb_rounds': 25, #5 | 20 gave best till now for laarge scale
                    #     'patience': 50, #20, # 20,
                    #     'use_lagrangian': True,
                    #     'use_ftrl': True,
                    #     'dual_step_size': 1.0,  # Much larger!
                    #     'use_accelerated_dual': True,
                    #     'use_diminishing_overlap': True,
                    #     'overlap_decay_rate': 0.3,  # Slower decay
                    #     # 'use_selective_sensitivity': True
                    #     'use_learned_operators': True,
                    #     'n_weight_vectors': 25, #ref_size + 10,  # For wrapper
                    #     # 'max_weights': 20
                    # })
                    # CVRP
                    # algorithm_params.update({
                    #     'learning_rate': 0.1,  # Reduced for stability
                    #     'ucb_coefficient': 3.0,  # Less exploration
                    #     'initial_temperature': 1.0,#1.0,  # Ensure it starts at 1
                    #     'temp_decay': 0.98, #0.98,  # Much slower decay
                    #     'hybrid_ratio': 0.5,
                    #     'adaptive_hybrid': True,
                    #     'decomposition_size': 35, #40, 25(best 100 scale), #ref_size//3,  # 15 Or 15 for size 100 problems
                    #     'overlap': 16, # 10,# 12(best), #16, 10, 8, #ref_size//3 -7 ,  # 6 Larger initial overlap
                    #     'use_adaptive_weights': True,
                    #     # 'adaptive_strategy': 'geometric',  # Use simple approach
                    #     'max_iterations': 100,
                    #     'nb_rounds': 20, #5 | 20 gave best till now for laarge scale
                    #     'patience': 30, #20, # 20,
                    #     'use_lagrangian': True,
                    #     'use_ftrl': True,
                    #     'dual_step_size': 1.0,  # Much larger!
                    #     'use_accelerated_dual': True,
                    #     'use_diminishing_overlap': True,
                    #     'overlap_decay_rate': 0.1,  # Slower decay
                    #     # 'use_selective_sensitivity': True
                    #     'use_learned_operators': True,
                    #     'n_weight_vectors': 30, #ref_size + 10,  # For wrapper
                    #     # 'max_weights': 20
                    # })


                    print(f"DEBUG: Set params for {algorithm_name}: decomposition_size={algorithm_params['decomposition_size']}, overlap={algorithm_params['overlap']}")
                
                if "ThompsonHedgeHybridOCOAcc" in algorithm_name: # add new thompson sampling version.
                    # BiTSP
                    # algorithm_params.update({
                    #     'learning_rate': 0.1,  # Reduced for stability
                    #     'initial_temperature': 1.0,#1.0,  # Ensure it starts at 1
                    #     'temp_decay': 0.995, #0.98,  # Much slower decay
                    #     'hybrid_ratio': 0.5,
                    #     'adaptive_hybrid': True,
                    #     'decomposition_size': 35, #25, #ref_size//2,  # Or 15 for size 100 problems
                    #     'overlap': 18, #8, #ref_size//2-7 ,  # Larger initial overlap
                    #     'use_adaptive_weights': True,
                    #     # 'adaptive_strategy': 'geometric',  # Use simple approach
                    #     'max_iterations': 180, #150
                    #     'nb_rounds': 15, #10, #5, 20
                    #     'patience': 60, # 20,
                    #     'use_lagrangian': True,
                    #     'use_ftrl': False,
                    #     'dual_step_size': 1.0,  # Much larger!
                    #     'use_accelerated_dual': True,
                    #     'use_diminishing_overlap': True,
                    #     'overlap_decay_rate': 0.1,  # Slower decay
                    #     # 'use_selective_sensitivity': True
                    #     'use_learned_operators': True,
                    #     'n_weight_vectors': 25, #25, #ref_size + 10,  # For wrapper
                    #     # 'ts_prior_variance': 2.0,   # ← Thompson Sampling exploration
                    #     # 'ts_noise_variance': 0.01,   # ← Thompson Sampling exploration
                    #     # 'max_weights': 20
                    # })

                    # BiKP100
                    # algorithm_params.update({
                    #     'learning_rate': 0.5,  # Reduced for stability
                    #     'initial_temperature': 3.0,#1.0,  # Ensure it starts at 1
                    #     'temp_decay': 0.995, #0.98,  # Much slower decay
                    #     'hybrid_ratio': 0.5,
                    #     'adaptive_hybrid': True,
                    #     'decomposition_size': 35, #25, #ref_size//2,  # Or 15 for size 100 problems
                    #     'overlap': 15, #8, #ref_size//2-7 ,  # Larger initial overlap
                    #     'use_adaptive_weights': True,
                    #     # 'adaptive_strategy': 'geometric',  # Use simple approach
                    #     'max_iterations': 150, #150
                    #     'nb_rounds': 10, #10, #5, 20
                    #     'patience': 35, # 20,
                    #     'use_lagrangian': True,
                    #     'use_ftrl': False,
                    #     'dual_step_size': 1.0,  # Much larger!
                    #     'use_accelerated_dual': True,
                    #     'use_diminishing_overlap': True,
                    #     'overlap_decay_rate': 0.1,  # Slower decay
                    #     # 'use_selective_sensitivity': True
                    #     'use_learned_operators': True,
                    #     'n_weight_vectors': 20, #25, #ref_size + 10,  # For wrapper
                    #     'ts_prior_variance': 3.0,   # ← Thompson Sampling exploration
                    #     'ts_noise_variance': 0.01,   # ← Thompson Sampling exploration
                    #     # 'max_weights': 20
                    # })
                    # algorithm_params.update({
                    #     'learning_rate': 0.5,  # Reduced for stability
                    #     'initial_temperature': 3.0,#1.0,  # Ensure it starts at 1
                    #     'temp_decay': 0.995, #0.98,  # Much slower decay
                    #     'hybrid_ratio': 0.5,
                    #     'adaptive_hybrid': True,
                    #     'decomposition_size': 25, #35, #25, #ref_size//2,  # Or 15 for size 100 problems
                    #     'overlap': 10, #16, #8, #ref_size//2-7 ,  # Larger initial overlap
                    #     'use_adaptive_weights': True,
                    #     # 'adaptive_strategy': 'geometric',  # Use simple approach
                    #     'max_iterations': 150, #150
                    #     'nb_rounds': 25, #10, #5, 20
                    #     'patience': 50, # 20,
                    #     'use_lagrangian': True,
                    #     'use_ftrl': False,
                    #     'dual_step_size': 1.0,  # Much larger!
                    #     'use_accelerated_dual': True,
                    #     'use_diminishing_overlap': True,
                    #     'overlap_decay_rate': 0.1,  # Slower decay
                    #     # 'use_selective_sensitivity': True
                    #     'use_learned_operators': True,
                    #     'n_weight_vectors': 30, #25, #ref_size + 10,  # For wrapper
                    #     'ts_prior_variance': 3.0, #3.0,   # ← Thompson Sampling exploration
                    #     'ts_noise_variance': 0.01, #0.01,   # ← Thompson Sampling exploration
                    #     # 'max_weights': 20
                    # })
                    # BICVRP
                    algorithm_params.update({
                        'learning_rate': 0.5,  # Reduced for stability
                        'initial_temperature': 3.0,#1.0,  # Ensure it starts at 1
                        'temp_decay': 0.995, #0.98,  # Much slower decay
                        'hybrid_ratio': 0.5,
                        'adaptive_hybrid': True,
                        'decomposition_size': 25, #35, #25, #ref_size//2,  # Or 15 for size 100 problems
                        'overlap': 11, #16, #8, #ref_size//2-7 ,  # Larger initial overlap
                        'use_adaptive_weights': True,
                        # 'adaptive_strategy': 'geometric',  # Use simple approach
                        'max_iterations': 140, #150
                        'nb_rounds': 15, #10, #5, 20
                        'patience': 60, # 20,
                        'use_lagrangian': True,
                        'use_ftrl': False,
                        'dual_step_size': 1.0,  # Much larger!
                        'use_accelerated_dual': True,
                        'use_diminishing_overlap': True,
                        'overlap_decay_rate': 0.1,  # Slower decay
                        # 'use_selective_sensitivity': True
                        'use_learned_operators': True,
                        'n_weight_vectors': 25, #25, #ref_size + 10,  # For wrapper
                        'ts_prior_variance': 2.0, #3.0,   # ← Thompson Sampling exploration
                        'ts_noise_variance': 0.09, #0.01,   # ← Thompson Sampling exploration
                        # 'max_weights': 20
                    })
                print(f"DEBUG: Set params for {algorithm_name}: decomposition_size={algorithm_params['decomposition_size']}, overlap={algorithm_params['overlap']}", algorithm_params)
                # Start problem timer
                problem_start_time = time.time()
                
                try:
                    print(f"Running {algorithm_name} on {problem_type} ({size_name}) with {num_runs} runs...")
                    # Run algorithm evaluation
                    result = evaluator.evaluate_algorithm(
                        algorithm_class=algorithm_class,
                        problem_class=problem_class,
                        algorithm_name=algorithm_name,
                        parameters=algorithm_params,
                        problem_params=problem_params,
                        num_runs=num_runs
                    )
                    
                    # Calculate runtime
                    problem_runtime = time.time() - problem_start_time
                    
                    # Store results
                    benchmark_result = {
                        'status': 'success',
                        'runtime': float(result.runtime),
                        'local_runtime': float(problem_runtime),
                        'hypervolume': float(result.hypervolume),
                        'num_nondominated': int(result.num_nondominated)
                    }
                    
                    results[algorithm_name][problem_type][size_name] = benchmark_result
                    config['results'][algorithm_name][problem_type][size_name] = benchmark_result
                    
                    # Display results
                    print(f"\nResults for {algorithm_name} on {problem_type} ({size_name}):")
                    print(f"  Runtime: {result.runtime:.2f} seconds")
                    print(f"  Local Runtime: {problem_runtime:.2f} seconds")
                    print(f"  Hypervolume: {result.hypervolume:.4f}")
                    print(f"  Non-dominated Solutions: {result.num_nondominated}")
                    
                except Exception as e:
                    print(f"Error benchmarking {algorithm_name} on {problem_type} ({size_name}): {e}")
                    import traceback
                    traceback.print_exc()
                    
                    error_result = {'status': 'error', 'error': str(e)}
                    results[algorithm_name][problem_type][size_name] = error_result
                    config['results'][algorithm_name][problem_type][size_name] = error_result
    
    # Calculate overall time
    overall_time = time.time() - overall_start
    config['metadata']['overall_time'] = overall_time
    
    # Save configuration and results
    os.makedirs("configs", exist_ok=True)
    config_path = f"configs/benchmark_results_drl_{timestamp}.yaml"
    
    try:
        with open(config_path, 'w') as f:
            yaml.dump(config, f, default_flow_style=False)
        print(f"\nResults saved to: {config_path}")
    except Exception as e:
        print(f"Error saving config: {e}")
    
    # Print summary
    print("\n" + "="*50)
    print("BENCHMARK SUMMARY")
    print("="*50)
    print(f"Overall time: {overall_time:.1f} seconds")
    
    for alg_name, alg_results in results.items():
        print(f"\n{alg_name}:")
        for prob_name, prob_results in alg_results.items():
            print(f"  {prob_name}:")
            for size_name, result in prob_results.items():
                if result.get('status') == 'success':
                    print(f"    {size_name.upper()}: Runtime: {result['runtime']:.2f}s, "
                         f"HV: {result['hypervolume']:.4f} "
                         f"({result['num_nondominated']} solutions)")
                else:
                    print(f"    {size_name.upper()}: Error: {result.get('error', 'Unknown error')}")
    
    return results, config


# Example usage
if __name__ == "__main__":
    # Define algorithms to benchmark including BOPR
    algorithms = {
        # "NSGA2": NSGA2,
        # "NSGA2qNEHVI": NSGA2qNEHVI,
        # "MOBOqParEGO": MOBOqParEGO,
        # "BOPR": GPUAcceleratedBOPRWrapper,
        # "UCBHedgeHybrid": FixedDecomposedWrapper
        # "UCBHedgeHybridOCOAcc": FixedDecomposedWrapperAcc, # working version, final version
        "UCBHedgeHybridOCOAcc": TestFixedDecomposedWrapperAcc, #FixedDecomposedWrapperAcc,
        # "ThompsonHedgeHybridOCOAcc": FixedDecomposedThompsonWrapperAcc
    }
    
    # Define problem sizes
    problem_sizes = {
        'BiObjectiveTSP': {
        #     # 'small': {'n_cities': 20},
            'medium': {'n_cities': 50},
            # 'large': {'n_cities': 100}
        },
        # 'MultiObjectiveKnapsack': {
        # # #     # 'small': {'n_items': 50, 'n_objectives': 2, 'capacity': 12.5},
        # # #     # 'medium': {'n_items': 100, 'n_objectives': 2, 'capacity': 25.0},
        #     'large': {'n_items': 200, 'n_objectives': 2, 'capacity': 25.0}
        # },
        # 'TriObjectiveTSP': {  # Add this
        #     # 'small': {'n_cities': 20},
        #     # 'medium': {'n_cities': 50},
        #     'large': {'n_cities': 100},
        # },
        # 'BiObjectiveCVRP': {  # Add this section
        # #         # 'small': {'n_customers': 20},
        #         'medium': {'n_customers': 50, 'n_vehicles': 15}, # 25
        #         # 'large': {'n_customers': 100, 'n_vehicles': 25}
        # }
    }
    
    # Base parameters for all algorithms
    base_params = {}
    
    # Run the benchmark
    results, config = run_flexible_benchmark(
        algorithm_classes=algorithms,
        problem_sizes=problem_sizes,
        num_runs=10,
        cuda_device=0,
        base_params=base_params
    )