"""
装箱问题 (BPP) 评估器 (可反馈错误模式).

此评估器在“全有或全无”的基础上进行了增强. 当评估失败时,
它不仅返回一个极低的分数, 还会返回一个包含具体失败原因的字符串.
这个错误信息可以被上层框架(如 AlphaEvolve)捕获, 并用于指导大语言模型进行有针对性的代码修复.

评分逻辑:
- 适应度 = -误差. 误差 = (算法结果 - 最优结果) / 最优结果.
- 严格评估: 必须成功解决所有问题实例.
- 错误反馈: 任何失败都会中止评估, 并以结构化的方式返回失败的具体原因.
- 性能阈值: 如果算法结果严格小于最优解, 视为评估失败 (防止不可能的解).
"""

import os
import re
import sys
import pickle
import subprocess
import tempfile
import traceback
import numpy as np

# --- 问题与评估器配置 ---
# 存放BPP问题文件的文件夹名称.
PROBLEM_FOLDER = '/home/liuyihong/openevolve/examples/BPP/input' 
# 存放详细结果文件的文件夹名称.
OUTPUT_FOLDER = 'bpp_results'
# 当评估因任何错误而中止时, 返回的适应度分数.
FAILURE_SCORE = -1e9
# 如果结果小于最优解的此百分比, 则视为严重错误
IMPOSSIBLE_PERFORMANCE_THRESHOLD = 0.5


# --- 核心功能函数 ---

def validate_bpp_solution(bins_used):
    """
    验证BPP求解器的返回结果是否有效 (基本格式检查).
    一个有效的结果应该是一个非负整数.
    """
    if bins_used is None:
        return False, "验证错误: 返回的箱子数为 None."
    if not isinstance(bins_used, int):
        return False, f"验证错误: 返回的箱子数不是整数 (实际类型: {type(bins_used)})."
    if bins_used < 0:
        return False, f"验证错误: 返回的箱子数 ({bins_used}) 不能为负数."
    return True, "结果格式有效"


def run_solver(program_path, capacity, items):
    """
    在一个独立的子进程中运行BPP求解器函数, 以隔离环境和捕获错误.
    """
    with tempfile.NamedTemporaryFile(suffix=".py", delete=False, mode='w', encoding='utf-8') as script_file:
        results_path = f"{script_file.name}.results"
        
        script_content = f"""
import sys, os, pickle, traceback

sys.path.insert(0, os.path.dirname('{program_path}'))
results = {{}}
try:
    spec = __import__('importlib.util').util.spec_from_file_location("program", '{program_path}')
    program = __import__('importlib.util').util.module_from_spec(spec)
    spec.loader.exec_module(program)
    
    capacity_data = {capacity}
    items_data = {items}
    
    solution_details = program.solve(capacity_data, items_data)

    if isinstance(solution_details, list):
        results['bins_used'] = len(solution_details)
        results['solution_details'] = solution_details
    else:
        results['bins_used'] = None
        results['solution_details'] = None

except Exception as e:
    results['error'] = str(e)
    results['trace'] = traceback.format_exc()
finally:
    with open('{results_path}', 'wb') as f:
        pickle.dump(results, f)
"""
        script_file.write(script_content)
        script_file_path = script_file.name

    try:
        process = subprocess.Popen([sys.executable, script_file_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        stdout, stderr = process.communicate(timeout=30)

        if process.returncode != 0:
            raise RuntimeError(f"子进程执行错误 (代码: {process.returncode}):\\n{stderr.decode('utf-8', 'ignore')}")

        if os.path.exists(results_path):
            with open(results_path, "rb") as f:
                results = pickle.load(f)
            if "error" in results:
                raise RuntimeError(f"求解器内部执行失败: {results['error']}\\n{results['trace']}")
            return results
        else:
            raise RuntimeError(f"未找到结果文件. stdout: {stdout.decode('utf-8', 'ignore')}, stderr: {stderr.decode('utf-8', 'ignore')}")
    finally:
        for p in [results_path, script_file_path]:
            if os.path.exists(p):
                os.unlink(p)

# 【新增】辅助函数, 用于从旧结果文件中解析分数
def parse_bins_from_result_file(filepath):
    """从已存在的结果文件中解析出算法使用的箱子数."""
    if not os.path.exists(filepath):
        return float('inf') # 如果文件不存在, 返回极大值以确保写入
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            content = f.read()
        # 在“算法求解方案”部分查找使用的总箱数
        match = re.search(r"---\s算法求解方案\s---.*?使用的总箱数:\s*(\d+)", content, re.DOTALL)
        if match:
            return int(match.group(1))
    except (IOError, ValueError):
        # 如果文件读取或解析出错, 返回极大值
        return float('inf')
    # 如果没找到匹配项, 也返回极大值
    return float('inf')



def evaluate(program_path):
    """
    以严格的“全有或全无”模式评估一个 BPP 求解器程序.
    【修改】: 成功处理每个文件后, 仅当结果优于历史最佳时, 才保存详细结果.
    """
    all_fitness_scores = []
    all_problems_passed = True
    failure_reason = None

    problem_files_path = os.path.abspath(PROBLEM_FOLDER)
    if not os.path.isdir(problem_files_path):
         return {
            "average_fitness": FAILURE_SCORE,
            "validity_ratio": 0.0,
            "error_reason": f"评估失败: 问题文件夹 '{problem_files_path}' 不存在."
        }
        
    output_dir = os.path.abspath(OUTPUT_FOLDER)
    os.makedirs(output_dir, exist_ok=True)
    print(f"详细结果将保存到: {output_dir}")

    problem_files = sorted([f for f in os.listdir(problem_files_path) if f.endswith(".txt")])
    total_problems = len(problem_files)

    for filename in problem_files:
        try:
            file_path = os.path.join(problem_files_path, filename)
            
            with open(file_path, 'r', encoding='utf-8') as f:
                full_content = f.read()

            optimal_match = re.search(r"Total bins used:\s*(\d+)", full_content)
            if not optimal_match:
                raise ValueError("在文件中未找到 'Total bins used: ...'.")
            optimal_bins = int(optimal_match.group(1))

            numeric_lines = [int(line.strip()) for line in full_content.splitlines() if line.strip().isdigit()]
            if len(numeric_lines) < 3:
                raise ValueError("文件中必须至少包含三个数字(物品数量、容量和一个物品).")
            
            capacity = numeric_lines[1]
            items = numeric_lines[2:]

            solver_results = run_solver(program_path, capacity, items)
            actual_bins = solver_results.get("bins_used")
            solution_details = solver_results.get("solution_details")

            is_valid, reason = validate_bpp_solution(actual_bins)

            if not is_valid:
                failure_reason = f"对于文件 {filename} 的解无效: {reason}"
                all_problems_passed = False
                break

            if optimal_bins > 0 and actual_bins <= (optimal_bins * IMPOSSIBLE_PERFORMANCE_THRESHOLD):
                failure_reason = (f"对于文件 {filename} 的解质量异常: "
                                  f"算法结果 {actual_bins} 远小于最优解的50%或更少 (最优解: {optimal_bins}), 这是不可能的.")
                all_problems_passed = False
                break
            '''
            elif actual_bins < optimal_bins:
                failure_reason = f"对于文件 {filename} 的解质量异常: 算法结果 {actual_bins} 小于最优解 {optimal_bins}, 这是不可能的."
                all_problems_passed = False
                break
            '''

            if optimal_bins > 0:
                error = (actual_bins - optimal_bins) / optimal_bins
            else:
                error = float('inf') if actual_bins > 0 else 0.0
                
            fitness_score = -error
            all_fitness_scores.append(fitness_score)
            
            # 【修改】恢复打印每一次的评估结果
            print(f"✅ 已处理 {filename}: 算法结果={actual_bins}, 最优解={optimal_bins}, 误差={error:+.4%}, 适应度={fitness_score:.4f}")

            # 【修改】将详细结果写入文件, 但仅在当前解更优时
            if solution_details:
                output_filename = os.path.splitext(filename)[0] + "_result.txt"
                output_path = os.path.join(output_dir, output_filename)
                
                previous_best_bins = parse_bins_from_result_file(output_path)

                if actual_bins < previous_best_bins:
                    # 【修改】增加找到新最优解的显著提示
                    print(f"  -> 发现新最优解! 详细结果已更新至: {output_path}")
                    with open(output_path, 'w', encoding='utf-8') as out_f:
                        out_f.write(f"--- 对比文件: {filename} ---\n")
                        out_f.write(f"求解器: {program_path}\n\n")

                        out_f.write(f"--- 问题设定 ---\n")
                        out_f.write(f"箱子容量: {capacity}\n")
                        out_f.write(f"物品数量: {len(items)}\n")
                        out_f.write(f"物品列表: {items}\n\n")

                        out_f.write(f"--- 算法求解方案 ---\n")
                        out_f.write(f"使用的总箱数: {actual_bins}\n")
                        for i, bin_content in enumerate(solution_details, 1):
                            bin_sum = sum(bin_content)
                            out_f.write(f"  箱子 {i:02d} (总和={bin_sum:<4}): {bin_content}\n")
                        
                        out_f.write(f"\n--- 官方最优解 ---\n")
                        out_f.write(f"使用的总箱数: {optimal_bins}\n\n")

                        out_f.write(f"--- 性能分析 ---\n")
                        out_f.write(f"箱数差异 (算法 - 最优): {actual_bins - optimal_bins}\n")
                        out_f.write(f"相对误差: {error:+.4%}\n")
            
            print("-" * 30)

        except Exception as e:
            failure_reason = f"处理 {filename} 时评估失败: {traceback.format_exc()}"
            all_problems_passed = False
            break

    if not all_problems_passed:
        print("\n评估总结: 未能成功解决所有问题. 评测中止.")
        return {
            "average_fitness": FAILURE_SCORE,
            "validity_ratio": 0.0,
            "error_reason": failure_reason
        }
    else:
        average_fitness = np.mean(all_fitness_scores) if all_fitness_scores else 0.0
        print(f"\n评估总结: 成功解决所有 {total_problems} 个问题.")
        return {
            "average_fitness": float(average_fitness),
            "validity_ratio": 1.0,
            "error_reason": None
        }


if __name__ == '__main__':
    solution_file_path = 'first_fit.py'
    
    if not os.path.exists(solution_file_path):
        print(f"错误: 未在 '{solution_file_path}' 找到解决方案文件.")
        print("请确保您已创建或修改了一个求解器 (如 first_fit.py),使其 solve 函数返回详细的装箱方案.")
    else:
        print(f"--- 正在评估 {solution_file_path} (结果将保存至 {OUTPUT_FOLDER}) ---")
        metrics = evaluate(solution_file_path)
        print("\n--- 评估指标 ---")
        for key, value in metrics.items():
            if key == "error_reason" and value is not None:
                print(f"  - {key}:\n      {value}")
            else:
                print(f"  - {key}: {value}")
        print("--------------------")

