"""
Game session manager for puzzle benchmark system
"""
import asyncio
import json
import logging
from datetime import datetime
from typing import List, Dict, Any, Optional, Callable
from dataclasses import dataclass, asdict
from enum import Enum

from data_manager import PuzzleData
from api_client import UnifiedAPIClient, APIResponse
from response_parser import ResponseParser, ParsedResponse
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from config import QA_MODEL, EVAL_MODEL, MAX_ROUNDS
from prompts.prompts_v2 import (
    EN_MEDICAL_PUZZLE_BENCHMARK_QA_PROMPT,
    EN_MEDICAL_PUZZLE_BENCHMARK_EVAL_PROMPT,
    EN_PHILOSOPHY_PUZZLE_BENCHMARK_QA_PROMPT,
    EN_PHILOSOPHY_PUZZLE_BENCHMARK_EVAL_PROMPT,
    EN_BIOLOGY_PUZZLE_BENCHMARK_QA_PROMPT,
    EN_BIOLOGY_PUZZLE_BENCHMARK_EVAL_PROMPT,
    EN_EDAICC_PUZZLE_BENCHMARK_QA_PROMPT,
    EN_EDAICC_PUZZLE_BENCHMARK_EVAL_PROMPT,
    EN_EDAICC2_PUZZLE_BENCHMARK_QA_PROMPT,
    EN_EDAICC2_PUZZLE_BENCHMARK_EVAL_PROMPT,
    EN_EDAINNOVUSLEGACY_PUZZLE_BENCHMARK_QA_PROMPT,
    EN_EDAINNOVUSLEGACY_PUZZLE_BENCHMARK_EVAL_PROMPT,
    EN_PHYSICAL_PUZZLE_BENCHMARK_QA_PROMPT,
    EN_PHYSICAL_PUZZLE_BENCHMARK_EVAL_PROMPT,
    EN_EDAICC2_PUZZLE_BENCHMARK_QA_PROMPT,
    EN_EDAICC2_PUZZLE_BENCHMARK_EVAL_PROMPT,
    EN_EDAINNOVUSLEGACY_PUZZLE_BENCHMARK_QA_PROMPT,
    EN_EDAINNOVUSCOMMON_PUZZLE_BENCHMARK_QA_PROMPT,
    EN_EDAINNOVUSCOMMON_PUZZLE_BENCHMARK_EVAL_PROMPT,
    EN_NAME_PUZZLE_BENCHMARK_QA_PROMPT,
    EN_NAME_PUZZLE_BENCHMARK_EVAL_PROMPT,
    EN_ECONOMICS_PUZZLE_BENCHMARK_QA_PROMPT,
    EN_ECONOMICS_PUZZLE_BENCHMARK_EVAL_PROMPT,
    EN_CHEMISTRY_PUZZLE_BENCHMARK_QA_PROMPT,
    EN_CHEMISTRY_PUZZLE_BENCHMARK_EVAL_PROMPT,
    EN_COMPUTER_SCIENCE_PUZZLE_BENCHMARK_QA_PROMPT,
    EN_COMPUTER_SCIENCE_PUZZLE_BENCHMARK_EVAL_PROMPT,
    EN_MATHEMATICS_PUZZLE_BENCHMARK_QA_PROMPT,
    EN_MATHEMATICS_PUZZLE_BENCHMARK_EVAL_PROMPT
)

logger = logging.getLogger(__name__)


class GameResult(Enum):
    """游戏结束原因"""
    CORRECT_ANSWER = "correct_answer"
    MAX_ROUNDS_REACHED = "max_rounds_reached"
    API_ERROR = "api_error"
    PARSE_ERROR = "parse_error"


@dataclass
class RoundData:
    """单轮对话数据"""
    round: int
    qa_prompt: str
    qa_response: str
    eval_prompt: str
    eval_response: str
    pure_parse: Optional[str]
    pure_answer: Optional[str]
    timestamp: str
    qa_api_retries: int = 0
    eval_api_retries: int = 0
    qa_reasoning_content: Optional[str] = None  # 新增：QA模型的推理内容
    eval_reasoning_content: Optional[str] = None  # 新增：EVAL模型的推理内容
    qa_prompt_tokens: int = 0  # 新增：QA请求的prompt tokens
    qa_completion_tokens: int = 0  # 新增：QA响应的completion tokens
    qa_total_tokens: int = 0  # 新增：QA请求的总tokens
    eval_prompt_tokens: int = 0  # 新增：EVAL请求的prompt tokens
    eval_completion_tokens: int = 0  # 新增：EVAL响应的completion tokens
    eval_total_tokens: int = 0  # 新增：EVAL请求的总tokens


@dataclass
class SessionResult:
    """完整会话结果"""
    session_id: str
    domain: str
    puzzle_name: str
    qa_model: str
    eval_model: str
    start_time: str
    end_time: str
    rounds: List[RoundData]
    final_result: Dict[str, Any]
    success: bool
    total_rounds: int


class GameSession:
    """Game session manager"""
    
    def __init__(self, puzzle_data: PuzzleData, qa_model: str = None, eval_model: str = None):
        """Initialize game session"""
        
        self.puzzle_data = puzzle_data
        self.qa_model = qa_model or QA_MODEL
        self.eval_model = eval_model or EVAL_MODEL
        self.max_rounds = MAX_ROUNDS
        
        # Initialize API clients
        self.qa_client = UnifiedAPIClient(self.qa_model)
        self.eval_client = UnifiedAPIClient(self.eval_model)
        
        # Session state
        self.conversation_history = []
        self.rounds = []
        self.current_round = 0
        self.game_result = None
        self.start_time = datetime.now()  # 添加缺失的start_time
        self.is_finished = False  # 添加缺失的is_finished
        
        # 状态回调
        self.status_callback = None
        
        # 初始化prompt模板
        self.qa_prompt_template = self._get_qa_prompt_template()
        self.eval_prompt_template = self._get_eval_prompt_template()
        
        # 生成会话ID（新的命名格式）
        self.session_id = self._generate_session_id()
    
    @classmethod
    def from_existing_session(cls, puzzle_data: PuzzleData, qa_model: str, eval_model: str, 
                             existing_session_data: Dict[str, Any]):
        """从已有session数据创建GameSession以继续游戏"""
        
        # 创建新的GameSession实例
        session = cls(puzzle_data, qa_model, eval_model)
        
        # 从existing_session_data恢复状态
        session.session_id = existing_session_data.get("session_id", session.session_id)
        
        # 恢复开始时间（如果有的话）
        if "start_time" in existing_session_data:
            session.start_time = datetime.fromisoformat(existing_session_data["start_time"])
        
        # 恢复rounds历史
        if "rounds" in existing_session_data:
            session.rounds = []
            for round_data in existing_session_data["rounds"]:
                # 重建RoundData对象
                round_obj = RoundData(
                    round=round_data.get("round", 0),
                    qa_prompt=round_data.get("qa_prompt", ""),
                    qa_response=round_data.get("qa_response", ""),
                    eval_prompt=round_data.get("eval_prompt", ""),
                    eval_response=round_data.get("eval_response", ""),
                    pure_parse=round_data.get("pure_parse", ""),
                    pure_answer=round_data.get("pure_answer", ""),
                    timestamp=round_data.get("timestamp", ""),
                    qa_api_retries=round_data.get("qa_api_retries", 0),
                    eval_api_retries=round_data.get("eval_api_retries", 0)
                )
                
                # 添加可选的reasoning_content字段
                if "qa_reasoning_content" in round_data:
                    round_obj.qa_reasoning_content = round_data["qa_reasoning_content"]
                if "eval_reasoning_content" in round_data:
                    round_obj.eval_reasoning_content = round_data["eval_reasoning_content"]
                
                # 添加token统计字段
                for token_field in ["qa_prompt_tokens", "qa_completion_tokens", "qa_total_tokens",
                                   "eval_prompt_tokens", "eval_completion_tokens", "eval_total_tokens"]:
                    if token_field in round_data:
                        setattr(round_obj, token_field, round_data[token_field])
                
                session.rounds.append(round_obj)
        
        # 重建conversation_history
        session._rebuild_conversation_history()
        
        # 设置当前轮次
        session.current_round = len(session.rounds)
        
        logger.info(f"Restored session {session.session_id} with {len(session.rounds)} rounds")
        
        return session
    
    def _rebuild_conversation_history(self):
        """从rounds重建conversation_history"""
        self.conversation_history = []
        
        # 如果没有任何rounds，将使用初始prompt
        if not self.rounds:
            return
            
        # 添加初始系统prompt
        self.conversation_history.append({"role": "user", "content": self.qa_prompt_template})
        
        # 逐轮重建历史
        for round_data in self.rounds:
            # 添加QA响应
            if round_data.qa_response:
                self.conversation_history.append({"role": "assistant", "content": round_data.qa_response})
            
            # 添加EVAL的pure_parse作为用户反馈
            if round_data.pure_parse:
                self.conversation_history.append({"role": "user", "content": round_data.pure_parse})
    
    def needs_eval_completion(self) -> bool:
        """检查最后一轮是否需要完成EVAL"""
        if not self.rounds:
            return False
        
        last_round = self.rounds[-1]
        # 如果最后一轮有QA响应但没有EVAL响应，需要完成EVAL
        return bool(last_round.qa_response and not last_round.eval_response)
    
    def _generate_session_id(self) -> str:
        """生成新格式的会话ID: single_月日_小时分钟_QA模型名称(前五个字母)_puzzle名称（前五个字母）"""
        from datetime import datetime
        import re
        
        now = datetime.now()
        date_str = now.strftime('%m%d_%H%M')
        
        # QA模型名称前5个字母
        qa_model_short = re.sub(r'[^a-zA-Z]', '', self.qa_model)[:6]
        
        # puzzle名称前5个字母（中文字符和英文字符都算）
        puzzle_name = self.puzzle_data.name
        # 移除特殊字符，保留中英文字符
        clean_puzzle = re.sub(r'[^\w\u4e00-\u9fff]', '', puzzle_name)
        puzzle_short = clean_puzzle[:5]
        
        return f"single_{date_str}_{qa_model_short}_{puzzle_short}"
    
    def set_status_callback(self, callback: Callable[[Dict[str, Any]], None]):
        """设置状态回调函数"""
        self.status_callback = callback
        
        # 获取对应的prompt模板
        self.qa_prompt_template = self._get_qa_prompt_template()
        self.eval_prompt_template = self._get_eval_prompt_template()
        
        logger.info(f"Started game session: {self.session_id}")
    
    def _get_qa_prompt_template(self) -> str:
        """根据领域获取QA prompt模板"""
        if self.puzzle_data.domain == "human_disease" or self.puzzle_data.domain == "human_disease_part1" or self.puzzle_data.domain == "human_disease_part2" or self.puzzle_data.domain == "human_disease_part3" or self.puzzle_data.domain == "human_disease_part4":
            return EN_MEDICAL_PUZZLE_BENCHMARK_QA_PROMPT
        elif self.puzzle_data.domain == "biology" or self.puzzle_data.domain == "biology_part1" or self.puzzle_data.domain == "biology_part2" or self.puzzle_data.domain == "biology_part3" or self.puzzle_data.domain == "biology_part4":
            return EN_BIOLOGY_PUZZLE_BENCHMARK_QA_PROMPT
        elif self.puzzle_data.domain == "physics" or self.puzzle_data.domain == "physics_part1" or self.puzzle_data.domain == "physics_part2" or self.puzzle_data.domain == "physics_part3" or self.puzzle_data.domain == "physics_part4":
            return EN_PHYSICAL_PUZZLE_BENCHMARK_QA_PROMPT
        elif self.puzzle_data.domain == "mathematics" or self.puzzle_data.domain == "mathematics_part1" or self.puzzle_data.domain == "mathematics_part2" or self.puzzle_data.domain == "mathematics_part3" or self.puzzle_data.domain == "mathematics_part4":
            return EN_MATHEMATICS_PUZZLE_BENCHMARK_QA_PROMPT
        elif self.puzzle_data.domain == "economics" or self.puzzle_data.domain == "economics_part1" or self.puzzle_data.domain == "economics_part2" or self.puzzle_data.domain == "economics_part3" or self.puzzle_data.domain == "economics_part4":
            return EN_ECONOMICS_PUZZLE_BENCHMARK_QA_PROMPT
        elif self.puzzle_data.domain == "philosophy" or self.puzzle_data.domain == "philosophy_part1" or self.puzzle_data.domain == "philosophy_part2" or self.puzzle_data.domain == "philosophy_part3" or self.puzzle_data.domain == "philosophy_part4":
            return EN_PHILOSOPHY_PUZZLE_BENCHMARK_QA_PROMPT
        elif self.puzzle_data.domain == "chemistry" or self.puzzle_data.domain == "chemistry_part1" or self.puzzle_data.domain == "chemistry_part2" or self.puzzle_data.domain == "chemistry_part3" or self.puzzle_data.domain == "chemistry_part4":
            return EN_CHEMISTRY_PUZZLE_BENCHMARK_QA_PROMPT
        elif self.puzzle_data.domain == "computer_science" or self.puzzle_data.domain == "computer_science_part1" or self.puzzle_data.domain == "computer_science_part2" or self.puzzle_data.domain == "computer_science_part3" or self.puzzle_data.domain == "computer_science_part4":
            return EN_COMPUTER_SCIENCE_PUZZLE_BENCHMARK_QA_PROMPT
        
        elif self.puzzle_data.domain == "eda_icc":
            return EN_EDAICC_PUZZLE_BENCHMARK_QA_PROMPT
        elif self.puzzle_data.domain == "eda_icc2":
            return EN_EDAICC2_PUZZLE_BENCHMARK_QA_PROMPT
        elif self.puzzle_data.domain == "eda_innovus_legacy":
            return EN_EDAINNOVUSLEGACY_PUZZLE_BENCHMARK_QA_PROMPT
        elif self.puzzle_data.domain == "eda_innovus_common":
            return EN_EDAINNOVUSCOMMON_PUZZLE_BENCHMARK_QA_PROMPT
        else:
            logger.warning(f"No specific QA prompt for domain {self.puzzle_data.domain}, using name template")
            return EN_NAME_PUZZLE_BENCHMARK_QA_PROMPT
    
    def _get_eval_prompt_template(self) -> str:
        """根据领域获取EVAL prompt模板"""
        if self.puzzle_data.domain == "human_disease" or self.puzzle_data.domain == "human_disease_part1" or self.puzzle_data.domain == "human_disease_part2" or self.puzzle_data.domain == "human_disease_part3" or self.puzzle_data.domain == "human_disease_part4":
            return EN_MEDICAL_PUZZLE_BENCHMARK_EVAL_PROMPT
        elif self.puzzle_data.domain == "biology" or self.puzzle_data.domain == "biology_part1" or self.puzzle_data.domain == "biology_part2" or self.puzzle_data.domain == "biology_part3" or self.puzzle_data.domain == "biology_part4":
            return EN_BIOLOGY_PUZZLE_BENCHMARK_EVAL_PROMPT
        elif self.puzzle_data.domain == "mathematics" or self.puzzle_data.domain == "mathematics_part1" or self.puzzle_data.domain == "mathematics_part2" or self.puzzle_data.domain == "mathematics_part3" or self.puzzle_data.domain == "mathematics_part4":
            return EN_MATHEMATICS_PUZZLE_BENCHMARK_EVAL_PROMPT
        elif self.puzzle_data.domain == "physics" or self.puzzle_data.domain == "physics_part1" or self.puzzle_data.domain == "physics_part2" or self.puzzle_data.domain == "physics_part3" or self.puzzle_data.domain == "physics_part4":
            return EN_PHYSICAL_PUZZLE_BENCHMARK_EVAL_PROMPT
        elif self.puzzle_data.domain == "economics" or self.puzzle_data.domain == "economics_part1" or self.puzzle_data.domain == "economics_part2" or self.puzzle_data.domain == "economics_part3" or self.puzzle_data.domain == "economics_part4":
            return EN_ECONOMICS_PUZZLE_BENCHMARK_EVAL_PROMPT
        elif self.puzzle_data.domain == "philosophy" or self.puzzle_data.domain == "philosophy_part1" or self.puzzle_data.domain == "philosophy_part2" or self.puzzle_data.domain == "philosophy_part3" or self.puzzle_data.domain == "philosophy_part4":
            return EN_PHILOSOPHY_PUZZLE_BENCHMARK_EVAL_PROMPT
        elif self.puzzle_data.domain == "chemistry" or self.puzzle_data.domain == "chemistry_part1" or self.puzzle_data.domain == "chemistry_part2" or self.puzzle_data.domain == "chemistry_part3" or self.puzzle_data.domain == "chemistry_part4":
            return EN_CHEMISTRY_PUZZLE_BENCHMARK_EVAL_PROMPT
        elif self.puzzle_data.domain == "computer_science" or self.puzzle_data.domain == "computer_science_part1" or self.puzzle_data.domain == "computer_science_part2" or self.puzzle_data.domain == "computer_science_part3" or self.puzzle_data.domain == "computer_science_part4":
            return EN_COMPUTER_SCIENCE_PUZZLE_BENCHMARK_EVAL_PROMPT
        
        elif self.puzzle_data.domain == "eda_icc":
            return EN_EDAICC_PUZZLE_BENCHMARK_EVAL_PROMPT
        elif self.puzzle_data.domain == "eda_icc2":
            return EN_EDAICC2_PUZZLE_BENCHMARK_EVAL_PROMPT
        elif self.puzzle_data.domain == "eda_innovus_legacy":
            return EN_EDAINNOVUSLEGACY_PUZZLE_BENCHMARK_EVAL_PROMPT
        elif self.puzzle_data.domain == "eda_innovus_common":
            return EN_EDAINNOVUSCOMMON_PUZZLE_BENCHMARK_EVAL_PROMPT
        else:
            logger.warning(f"No specific EVAL prompt for domain {self.puzzle_data.domain}, using name template")
            return EN_NAME_PUZZLE_BENCHMARK_EVAL_PROMPT
    
    async def play_round(self) -> RoundData:
        """执行一轮对话"""
        round_num = len(self.rounds) + 1
        timestamp = datetime.now().isoformat()
        
        logger.info(f"Starting round {round_num}")
        
        # 1. QA系统提问 - 带重试机制
        qa_messages = self._build_qa_messages()
        qa_response = None
        qa_retries = 0
        max_qa_retries = 18
        
        for qa_attempt in range(max_qa_retries):
            qa_response = await self.qa_client.chat_completion(qa_messages)
            qa_retries = qa_response.retries_used
            
            # 检查API是否成功且有内容
            if qa_response.success and qa_response.content and qa_response.content.strip():
                logger.info(f"QA API successful in round {round_num}, attempt {qa_attempt + 1}")
                break
            elif qa_response.success and (not qa_response.content or not qa_response.content.strip()):
                logger.warning(f"QA API returned empty response in round {round_num}, attempt {qa_attempt + 1}")
            else:
                logger.warning(f"QA API failed in round {round_num}, attempt {qa_attempt + 1}: {qa_response.error}")
            
            # 如果不是最后一次尝试，等待后重试
            if qa_attempt < max_qa_retries - 1:
                logger.info(f"Waiting 10s before QA retry...")
                await asyncio.sleep(10)
        
        # 所有QA重试都失败了
        if not qa_response or not qa_response.success or not qa_response.content or not qa_response.content.strip():
            logger.error(f"QA API failed after {max_qa_retries} attempts in round {round_num}")
            return RoundData(
                round=round_num,
                qa_prompt=str(qa_messages),
                qa_response=f"QA API Failed After {max_qa_retries} Attempts: {qa_response.error if qa_response else 'No response'}",
                eval_prompt="",
                eval_response="",
                pure_parse=None,
                pure_answer=None,
                timestamp=timestamp,
                qa_api_retries=qa_retries,
                qa_reasoning_content=None,  # 新增：失败时无推理内容
                eval_reasoning_content=None,  # 新增：失败时无推理内容
                qa_prompt_tokens=0,
                qa_completion_tokens=0,
                qa_total_tokens=0,
                eval_prompt_tokens=0,
                eval_completion_tokens=0,
                eval_total_tokens=0
            )
        
        # 2. EVAL系统回复
        eval_prompt = self.eval_prompt_template.format(
            puzzle_name=self.puzzle_data.name,
            puzzle_reference=self.puzzle_data.description
        )
        # eval_messages = [{"role": "user", "content": "<qa_question>" + qa_response.content + "</qa_question>" + "\n" + EVAL_PARSE_PROMPT}]
        eval_messages = [{"role": "user", "content": "<qa_question>" + qa_response.content + "</qa_question>"}]
        # 2. EVAL系统回复 - 带重试机制
        eval_response = None
        eval_retries = 0
        max_eval_retries = 8
        
        for eval_attempt in range(max_eval_retries):
            eval_response = await self.eval_client.chat_completion(eval_messages, eval_prompt)
            eval_retries = eval_response.retries_used
            
            # 检查API是否成功
            if not eval_response.success:
                logger.warning(f"EVAL API failed in round {round_num}, attempt {eval_attempt + 1}: {eval_response.error}")
            elif not eval_response.content or not eval_response.content.strip():
                logger.warning(f"EVAL API returned empty response in round {round_num}, attempt {eval_attempt + 1}")
            else:
                # 验证响应格式
                if ResponseParser.validate_eval_response(eval_response.content):
                    # 进一步检查pure_answer是否为无效内容
                    temp_parsed = ResponseParser.parse_eval_response(eval_response.content)
                    if temp_parsed.pure_answer and temp_parsed.pure_answer.strip() == "Your preset name":
                        logger.warning(f"EVAL returned invalid pure_answer 'Your preset name' in round {round_num}, attempt {eval_attempt + 1}")
                    elif temp_parsed.pure_answer and temp_parsed.pure_answer.strip() == "None":
                        break
                    else:
                        # 添加EVAL成功验证检查
                        try:
                            from code.core.eval_validator import validate_eval_success, get_puzzle_reference
                            
                            # 获取puzzle reference
                            puzzle_reference = get_puzzle_reference(self.puzzle_data)
                            
                            # 验证EVAL成功的真实性
                            validation_result = await validate_eval_success(
                                source_text=qa_response.content,
                                specified_content=temp_parsed.pure_answer,
                                reference=puzzle_reference
                            )
                            
                            if validation_result is False:
                                logger.warning(f"EVAL success validation failed in round {round_num}, attempt {eval_attempt + 1}: QA response may not truly contain the specified content")
                                # 修改eval_response.content中的<pure_answer>为None并退出循环
                                import re
                                eval_response.content = re.sub(
                                    r'<pure_answer>.*?</pure_answer>', 
                                    '<pure_answer>None</pure_answer>', 
                                    eval_response.content, 
                                    flags=re.DOTALL
                                )
                                logger.info(f"Modified eval_response pure_answer to None due to validation failure")
                                break
                            elif validation_result is True:
                                logger.info(f"EVAL success validation passed in round {round_num}, attempt {eval_attempt + 1}")
                                break
                            else:
                                logger.warning(f"EVAL success validation API failed in round {round_num}, attempt {eval_attempt + 1}, continuing with retry")
                                # API调用失败，继续重试
                        except Exception as e:
                            logger.warning(f"EVAL success validation error in round {round_num}, attempt {eval_attempt + 1}: {e}")
                            # 验证过程出错，继续重试
                else:
                    logger.warning(f"Invalid EVAL response format in round {round_num}, attempt {eval_attempt + 1}")
            
            # 如果不是最后一次尝试，等待后重试
            if eval_attempt < max_eval_retries - 1:
                logger.info(f"Waiting 10s before EVAL retry...")
                await asyncio.sleep(10)
        
        # 检查EVAL是否彻底失败
        if not eval_response or not eval_response.success or not eval_response.content or not eval_response.content.strip():
            logger.error(f"EVAL API failed after {max_eval_retries} attempts in round {round_num}")
            return RoundData(
                round=round_num,
                qa_prompt=str(qa_messages),
                qa_response=qa_response.content,
                eval_prompt=eval_prompt,
                eval_response=f"EVAL API Failed After {max_eval_retries} Attempts: {eval_response.error if eval_response else 'No response'}",
                pure_parse=None,
                pure_answer=None,
                timestamp=timestamp,
                qa_api_retries=qa_retries,
                eval_api_retries=eval_retries,
                qa_reasoning_content=qa_response.reasoning_content if qa_response else None,  # 新增：保存QA推理内容
                eval_reasoning_content=None,  # 新增：EVAL失败时无推理内容
                qa_prompt_tokens=qa_response.prompt_tokens if qa_response else 0,
                qa_completion_tokens=qa_response.completion_tokens if qa_response else 0,
                qa_total_tokens=qa_response.total_tokens if qa_response else 0,
                eval_prompt_tokens=0,
                eval_completion_tokens=0,
                eval_total_tokens=0
            )
        
        # 解析EVAL响应
        parsed_eval = ParsedResponse()
        if eval_response and eval_response.success:
            parsed_eval = ResponseParser.parse_eval_response(eval_response.content)
        
        # 创建轮次记录
        round_data = RoundData(
            round=round_num,
            qa_prompt=str(qa_messages),
            qa_response=qa_response.content,
            eval_prompt=eval_prompt,
            eval_response=eval_response.content if eval_response else "",
            pure_parse=parsed_eval.pure_parse,
            pure_answer=parsed_eval.pure_answer,
            timestamp=timestamp,
            qa_api_retries=qa_response.retries_used,
            eval_api_retries=eval_retries,
            qa_reasoning_content=qa_response.reasoning_content,  # 新增：保存QA推理内容
            eval_reasoning_content=eval_response.reasoning_content if eval_response else None,  # 新增：保存EVAL推理内容
            qa_prompt_tokens=qa_response.prompt_tokens,
            qa_completion_tokens=qa_response.completion_tokens,
            qa_total_tokens=qa_response.total_tokens,
            eval_prompt_tokens=eval_response.prompt_tokens if eval_response else 0,
            eval_completion_tokens=eval_response.completion_tokens if eval_response else 0,
            eval_total_tokens=eval_response.total_tokens if eval_response else 0
        )
        
        # 更新QA系统的对话历史
        if len(self.rounds) == 0:  # 第一轮
            # 第一轮需要初始化完整的对话历史：system prompt + qa问题 + eval回复
            self.conversation_history = [
                {"role": "user", "content": self.qa_prompt_template},
                {"role": "assistant", "content": qa_response.content}
            ]
            # 添加EVAL的pure_parse结果作为用户反馈
            if parsed_eval.pure_parse:
                self.conversation_history.append({"role": "user", "content": parsed_eval.pure_parse})
        else:  # 后续轮次
            # 添加QA的新问题
            self.conversation_history.append({"role": "assistant", "content": qa_response.content})
            # 添加EVAL的pure_parse结果
            if parsed_eval.pure_parse:
                self.conversation_history.append({"role": "user", "content": parsed_eval.pure_parse})
        
        self.rounds.append(round_data)
        
        # 检查是否找到最终答案
        if parsed_eval.pure_answer and parsed_eval.pure_answer != "None" and parsed_eval.pure_answer.strip() != "Your preset name":
            self.is_finished = True
            self.result = GameResult.CORRECT_ANSWER
            logger.info(f"Game finished with correct answer in round {round_num}: {parsed_eval.pure_answer}")
        
        return round_data
    
    def _build_qa_messages(self) -> List[Dict[str, str]]:
        """构建QA系统的消息历史"""
        # 如果是第一轮，使用系统prompt
        if not self.conversation_history:
            return [{"role": "user", "content": self.qa_prompt_template}]
        
        # 对于后续轮次，直接使用完整的对话历史
        # conversation_history已经包含了完整的历史：[user: system_prompt, assistant: qa1, user: parse1, ...]
        return self.conversation_history.copy()
    
    async def run_full_game(self) -> SessionResult:
        """运行完整的游戏会话"""
        logger.info(f"Starting full game for puzzle: {self.puzzle_data.name}")
        
        try:
            while not self.is_finished and len(self.rounds) < MAX_ROUNDS:
                round_data = await self.play_round()
                
                # 检查是否因为API错误而需要停止 - 只有在彻底失败时才停止
                if ("Failed After" in round_data.qa_response and "Attempts" in round_data.qa_response) or \
                   ("Failed After" in round_data.eval_response and "Attempts" in round_data.eval_response):
                    logger.error(f"Game session terminated due to persistent API failures in round {len(self.rounds)}")
                    self.result = GameResult.API_ERROR
                    self.is_finished = True
                    break
                
                # 检查轮次限制 - 但优先检查是否已经找到答案
                if len(self.rounds) >= MAX_ROUNDS:
                    # 如果已经找到答案，保持成功状态；否则设置为达到最大轮次
                    if not self.is_finished or self.result != GameResult.CORRECT_ANSWER:
                        self.result = GameResult.MAX_ROUNDS_REACHED
                        self.is_finished = True
                    break
            
            # 如果没有设置结果，检查是否找到了答案
            if not self.result:
                # 检查最后一轮是否找到了答案
                final_answer = self._get_final_answer()
                if final_answer and final_answer != "None" and final_answer.strip() != "Your preset name":
                    self.result = GameResult.CORRECT_ANSWER
                    logger.info(f"Found correct answer in final round: {final_answer}")
                else:
                    self.result = GameResult.MAX_ROUNDS_REACHED
        
        except Exception as e:
            logger.error(f"Game session error: {e}")
            self.result = GameResult.API_ERROR
            self.is_finished = True
        
        # 生成最终结果
        end_time = datetime.now()
        final_result = {
            "success": self.result == GameResult.CORRECT_ANSWER,
            "final_answer": self._get_final_answer(),
            "total_rounds": len(self.rounds),
            "reason": self.result.value
        }
        
        session_result = SessionResult(
            session_id=self.session_id,
            domain=self.puzzle_data.domain,
            puzzle_name=self.puzzle_data.name,
            qa_model=self.qa_model,
            eval_model=self.eval_model,
            start_time=self.start_time.isoformat(),
            end_time=end_time.isoformat(),
            rounds=self.rounds,
            final_result=final_result,
            success=final_result["success"],
            total_rounds=len(self.rounds)
        )
        
        logger.info(f"Game completed: {self.session_id}, Success: {final_result['success']}, Rounds: {len(self.rounds)}")
        
        return session_result
    
    async def run_continuation_game(self) -> SessionResult:
        """继续运行未完成的游戏会话"""
        from code.config import MAX_ROUNDS
        
        logger.info(f"Continuing game for puzzle: {self.puzzle_data.name} from round {len(self.rounds) + 1}")
        
        try:
            # 检查是否需要先完成最后一轮的EVAL
            if self.needs_eval_completion():
                logger.info("Completing EVAL for the last incomplete round")
                
                last_round = self.rounds[-1]
                
                # 构建EVAL消息
                eval_messages = [
                    {"role": "user", "content": f"<qa_response>{last_round.qa_response}</qa_response>"},
                    {"role": "user", "content": self.eval_prompt_template}
                ]
                
                eval_retries = 0
                max_eval_retries = 8
                
                for eval_attempt in range(max_eval_retries):
                    eval_response = await self.eval_client.chat_completion(eval_messages, self.eval_prompt_template)
                    eval_retries = eval_response.retries_used
                    
                    # 检查API是否成功
                    if not eval_response.success:
                        logger.warning(f"EVAL API failed in continuation, attempt {eval_attempt + 1}: {eval_response.error}")
                    elif not eval_response.content or not eval_response.content.strip():
                        logger.warning(f"EVAL API returned empty response in continuation, attempt {eval_attempt + 1}")
                    else:
                        # 验证响应格式
                        if ResponseParser.validate_eval_response(eval_response.content):
                            # 进一步检查pure_answer是否为无效内容
                            temp_parsed = ResponseParser.parse_eval_response(eval_response.content)
                            if temp_parsed.pure_answer and temp_parsed.pure_answer.strip() == "Your preset name":
                                logger.warning(f"EVAL returned invalid pure_answer 'Your preset name' in continuation, attempt {eval_attempt + 1}")
                            elif temp_parsed.pure_answer and temp_parsed.pure_answer.strip() == "None":
                                break
                            else:
                                # 添加EVAL成功验证检查
                                try:
                                    from code.core.eval_validator import validate_eval_success, get_puzzle_reference
                                    
                                    # 获取puzzle reference
                                    puzzle_reference = get_puzzle_reference(self.puzzle_data)
                                    
                                    # 验证EVAL成功的真实性
                                    validation_result = await validate_eval_success(
                                        source_text=last_round.qa_response,
                                        specified_content=temp_parsed.pure_answer,
                                        reference=puzzle_reference
                                    )
                                    
                                    if validation_result is False:
                                        logger.warning(f"EVAL success validation failed in continuation, attempt {eval_attempt + 1}: QA response may not truly contain the specified content")
                                        # 修改eval_response.content中的<pure_answer>为None并退出循环
                                        import re
                                        eval_response.content = re.sub(
                                            r'<pure_answer>.*?</pure_answer>', 
                                            '<pure_answer>None</pure_answer>', 
                                            eval_response.content, 
                                            flags=re.DOTALL
                                        )
                                        logger.info(f"Modified eval_response pure_answer to None due to validation failure in continuation")
                                        break
                                    elif validation_result is True:
                                        logger.info(f"EVAL success validation passed in continuation, attempt {eval_attempt + 1}")
                                        break
                                    else:
                                        logger.warning(f"EVAL success validation API failed in continuation, attempt {eval_attempt + 1}, continuing with retry")
                                        # API调用失败，继续重试
                                except Exception as e:
                                    logger.warning(f"EVAL success validation error in continuation, attempt {eval_attempt + 1}: {e}")
                                    # 验证过程出错，继续重试
                        else:
                            logger.warning(f"Invalid EVAL response format in continuation, attempt {eval_attempt + 1}")
                    
                    # 如果不是最后一次尝试，等待后重试
                    if eval_attempt < max_eval_retries - 1:
                        logger.info(f"Waiting 10s before EVAL retry...")
                        await asyncio.sleep(10)
                
                # 更新最后一轮的EVAL信息
                if eval_response.success and eval_response.content:
                    parsed_eval = ResponseParser.parse_eval_response(eval_response.content)
                    
                    last_round.eval_response = eval_response.content
                    last_round.pure_parse = parsed_eval.pure_parse or ""
                    last_round.pure_answer = parsed_eval.pure_answer or ""
                    last_round.eval_api_retries = eval_retries
                    
                    # 添加推理内容和token统计
                    if hasattr(eval_response, 'reasoning_content') and eval_response.reasoning_content:
                        last_round.eval_reasoning_content = eval_response.reasoning_content
                    if hasattr(eval_response, 'prompt_tokens'):
                        last_round.eval_prompt_tokens = eval_response.prompt_tokens
                    if hasattr(eval_response, 'completion_tokens'):
                        last_round.eval_completion_tokens = eval_response.completion_tokens
                    if hasattr(eval_response, 'total_tokens'):
                        last_round.eval_total_tokens = eval_response.total_tokens
                    
                    # 添加EVAL结果到conversation_history
                    if parsed_eval.pure_parse:
                        self.conversation_history.append({"role": "user", "content": parsed_eval.pure_parse})
                    
                    # 检查是否在补充EVAL后就找到了答案
                    if parsed_eval.pure_answer and parsed_eval.pure_answer != "None" and parsed_eval.pure_answer.strip() != "Your preset name":
                        self.is_finished = True
                        self.result = GameResult.CORRECT_ANSWER
                        logger.info(f"Game finished with correct answer after completing EVAL: {parsed_eval.pure_answer}")
                else:
                    # EVAL完全失败
                    last_round.eval_response = f"Failed After {max_eval_retries} Attempts"
                    last_round.eval_api_retries = max_eval_retries
                    logger.error("EVAL completion failed after all retries")
            
            # 如果还没有结束，继续进行新的rounds
            while not self.is_finished and len(self.rounds) < MAX_ROUNDS:
                round_data = await self.play_round()
                
                # 检查是否因为API错误而需要停止
                if ("Failed After" in round_data.qa_response and "Attempts" in round_data.qa_response) or \
                   ("Failed After" in round_data.eval_response and "Attempts" in round_data.eval_response):
                    logger.error(f"Game session terminated due to persistent API failures in round {len(self.rounds)}")
                    self.result = GameResult.API_ERROR
                    self.is_finished = True
                    break
                
                # 检查轮次限制
                if len(self.rounds) >= MAX_ROUNDS:
                    if not self.is_finished or self.result != GameResult.CORRECT_ANSWER:
                        self.result = GameResult.MAX_ROUNDS_REACHED
                        self.is_finished = True
                    break
            
            # 如果没有设置结果，检查是否找到了答案
            if not self.result:
                final_answer = self._get_final_answer()
                if final_answer and final_answer != "None" and final_answer.strip() != "Your preset name":
                    self.result = GameResult.CORRECT_ANSWER
                    logger.info(f"Found correct answer in continuation: {final_answer}")
                else:
                    self.result = GameResult.MAX_ROUNDS_REACHED
        
        except Exception as e:
            logger.error(f"Game continuation error: {e}")
            self.result = GameResult.API_ERROR
            self.is_finished = True
        
        # 生成最终结果
        end_time = datetime.now()
        final_result = {
            "success": self.result == GameResult.CORRECT_ANSWER,
            "final_answer": self._get_final_answer(),
            "total_rounds": len(self.rounds),
            "reason": self.result.value
        }
        
        session_result = SessionResult(
            session_id=self.session_id,
            domain=self.puzzle_data.domain,
            puzzle_name=self.puzzle_data.name,
            qa_model=self.qa_model,
            eval_model=self.eval_model,
            start_time=self.start_time.isoformat(),
            end_time=end_time.isoformat(),
            rounds=self.rounds,
            final_result=final_result,
            success=final_result["success"],
            total_rounds=len(self.rounds)
        )
        
        logger.info(f"Game continuation completed: {self.session_id}, Success: {final_result['success']}, Rounds: {len(self.rounds)}")
        
        return session_result
    
    def _get_final_answer(self) -> Optional[str]:
        """获取最终答案"""
        for round_data in reversed(self.rounds):
            if round_data.pure_answer and round_data.pure_answer != "None" and round_data.pure_answer.strip() != "Your preset name":
                return round_data.pure_answer
        return None 