import json
import re


def convert_answers(input_file, output_file):
    """
    将原始答案格式转换为标准评估格式
    :param input_file: 输入的原始答案文件路径
    :param output_file: 输出的标准格式文件路径
    """
    # 读取原始答案数据
    with open(input_file, 'r') as f:
        original_answers = json.load(f)

    processed = {}
    error_count = 0

    for qid, answer_str in original_answers.items():
        try:
            # 方法一：使用正则表达式精确匹配
            match = re.search(r"The correct answer is (.+?)(,| because)", answer_str)
            if match:
                answer = match.group(1).strip()
            else:
                # 方法二：备用的字符串分割方法
                answer_part = answer_str.split("The correct answer is ")[1]
                answer = answer_part.split(',because')[0].split(', because')[0].strip()

            # 清理可能残留的标点
            answer = answer.rstrip(',').strip()

            processed[qid] = {
                'multiple_choice': answer
            }
        except (IndexError, AttributeError) as e:
            error_count += 1
            print(f"Error processing {qid}: {e}")
            processed[qid] = {
                'multiple_choice': ""  # 保留空答案占位
            }

    # 保存处理结果
    with open(output_file, 'w') as f:
        json.dump(processed, f, indent=2, ensure_ascii=False)

    print(f"转换完成！成功处理 {len(processed) - error_count} 条，失败 {error_count} 条")


# 使用示例
convert_answers('/home/test/yxl/MCoT/aokvqa/results/qwen/AP_1.json', '/home/test/yxl/MCoT/aokvqa/results/qwen/AP_1_val.json')