"""
Poster Adaptive QA
Simplified implementation for Flexible and Merge (MPC) modes
"""

import os
import sys
from pathlib import Path
from typing import List, Dict, Optional, Tuple, Union

# Add original src directory to path
_original_src_path = Path(__file__).parent.parent.parent / "src"
if str(_original_src_path) not in sys.path:
    sys.path.insert(0, str(_original_src_path))

from html_ad_workflow_qa_with_answer import QnAWithAnswerHTMLAdGenerator, TokenCounter, TokenTrackingCallback
try:
    from multiagent.gemini_token_callback import GeminiTokenTrackingCallback
except ImportError:
    GeminiTokenTrackingCallback = None
from langchain_core.messages import HumanMessage, SystemMessage

# Add current directory to path
_current_dir = Path(__file__).parent
if str(_current_dir) not in sys.path:
    sys.path.insert(0, str(_current_dir))


class PosterAdaptiveQA:
    """Poster Adaptive QA - Flexible and Adaptive format support
    
    - is_adaptive_format=True: Adaptive Mode (guided format selection)
    - is_adaptive_format=False: Flexible Mode (free format selection, no guidance)
    - mpc_enabled=True: MPQC Mode (with trajectory and accept/reject)
    - mpc_enabled=False: Naive Mode (direct answer)
    """
    
    def __init__(self, config, model_version="gpt41", mpc_enabled=False, is_adaptive_format=False, rag_agent=None, question_format=None,
                 question_agent_model_version=None, answer_agent_model_version=None, global_model_version=None):
        """
        Initialize Poster Adaptive QA

        Args:
            config: Configuration object
            model_version: Model version (for backward compatibility, used as question_agent_model_version)
            mpc_enabled: Whether to enable MPC (use Merge Mode with trajectory)
            is_adaptive_format: Whether using Adaptive format (guided) or Flexible format (free)
            rag_agent: Optional RAG agent for question retrieval
            question_format: Question format (for Fixed formats: FIXED_BINARY, FIXED_MULTICHOICE, FIXED_OPENTEXT, FREE_ASK)
            question_agent_model_version: Model version for question agent (fallback to model_version)
            answer_agent_model_version: Model version for answer agent (fallback to model_version)
            global_model_version: Global model version (for plan operations, fallback to model_version)
        """
        self.is_adaptive_format = is_adaptive_format
        self.config = config
        # Determine effective model versions with fallback
        effective_global = global_model_version or model_version
        effective_question = question_agent_model_version or model_version
        effective_answer = answer_agent_model_version or model_version

        self.model_version = effective_question  # Keep for backward compatibility (used for token tracking)
        self.question_agent_model_version = effective_question
        self.answer_agent_model_version = effective_answer
        self.global_model_version = effective_global
        self.mpc_enabled = mpc_enabled
        self.question_format = question_format
        self.output_folder = None
        self.rag_agent = rag_agent
        
        # Base generator: used for LLM instances, image_to_base64, and answerer_agent_answer_question.
        # Poster prompts come from prompt files only; we do not use the base's questioner/answerer personas.
        self.generator = QnAWithAnswerHTMLAdGenerator(
            config=config,
            question_format="open_text",
            model_version=model_version,  # For backward compatibility
            question_agent_model_version=effective_question,
            answer_agent_model_version=effective_answer,
            global_model_version=effective_global
        )
        
        # Token tracking
        self.token_counter = TokenCounter()
        self.questioner_token_stats = {
            "input_tokens": 0,
            "output_tokens": 0,
            "reasoning_tokens": 0,
            "total_tokens": 0
        }
        self.answerer_token_stats = {
            "input_tokens": 0,
            "output_tokens": 0,
            "reasoning_tokens": 0,
            "total_tokens": 0
        }
        
        # Logo info
        self.logo_info = None
        
        # Load prompts
        self._load_prompts()
    
    def set_logo_info(self, logo_path: str):
        """Set logo information"""
        self.generator.set_logo_info(logo_path)
        self.logo_info = self.generator.logo_info

    @staticmethod
    def _strip_next_plan(text: str) -> str:
        """Remove trailing planning hints like '| Next: ...' from question text."""
        if not text:
            return text
        import re
        return re.sub(r'(\s*\|\s*Next:.*$|\s*Next:\s*.*$)', '', text, flags=re.IGNORECASE).strip()
    
    def _check_satisfaction(self, plan: Optional[Dict], conversation_history: List[Tuple[str, str]]) -> bool:
        """Check if question agent is satisfied with current information."""
        prompts_dir = Path(__file__).parent.parent / "prompts"
        satisfaction_prompt_path = prompts_dir / "poster_qa_satisfaction_check.txt"
        
        if not satisfaction_prompt_path.exists():
            return False
        
        plan_str = ""
        if plan:
            import json
            plan_str = json.dumps(plan, indent=2, ensure_ascii=False)
        else:
            plan_str = "No plan yet."
        
        qa_history_text = ""
        if conversation_history:
            qa_history_text = "\n".join([
                f"Q: {q}\nA: {a}"
                for q, a in conversation_history[-10:]
            ])
        
        with open(satisfaction_prompt_path, 'r', encoding='utf-8') as f:
            prompt_template = f.read()
        
        prompt = prompt_template.replace("{current_plan}", plan_str)
        prompt = prompt.replace("{qa_history}", qa_history_text)
        
        system_instruction = ""
        if "[SYSTEM]" in prompt and "[SATISFACTION_CHECK]" in prompt:
            parts = prompt.split("[SATISFACTION_CHECK]")
            system_instruction = parts[0].replace("[SYSTEM]", "").strip()
            prompt = parts[1] if len(parts) > 1 else prompt
        
        try:
            messages = [SystemMessage(content=system_instruction)] if system_instruction else []
            messages.append(HumanMessage(content=prompt))
            
            # Use lower temperature for more conservative/strict evaluation
            # Handle both LLM wrapper (has .llm attribute) and direct LangChain wrapper (no .llm attribute)
            # Use questioner_llm for question format evaluation (part of questioner agent)
            llm_instance = self.generator.questioner_llm.llm if hasattr(self.generator.questioner_llm, 'llm') else self.generator.questioner_llm
            try:
                config = {"temperature": 0.0}
                response = llm_instance.invoke(messages, config=config)
            except:
                response = llm_instance.invoke(messages)
            response_text = response.content.strip() if hasattr(response, 'content') else str(response)
            response_upper = response_text.upper().strip()
            first_line = response_upper.splitlines()[0].strip() if response_upper else ""
            # Only return True if explicitly SATISFIED on the first line (avoid "NOT SATISFIED")
            return first_line.startswith("SATISFIED")
        except Exception as e:
            print(f"⚠️ Error checking satisfaction: {e}")
            return False
    
    def _load_prompts(self):
        """Load all prompts from files"""
        prompts_dir = Path(__file__).parent.parent / "prompts"
        
        # Load shared questioning principles
        shared_principles_path = prompts_dir / "poster_shared_principles.txt"
        shared_principles = ""
        if shared_principles_path.exists():
            with open(shared_principles_path, 'r', encoding='utf-8') as f:
                shared_principles = f.read()
        
        # Load base prompt and inject shared principles
        base_prompt_path = prompts_dir / "poster_qa_base.txt"
        self.qa_system_prompt = ""
        if base_prompt_path.exists():
            with open(base_prompt_path, 'r', encoding='utf-8') as f:
                base_content = f.read()
                self.qa_system_prompt = base_content.replace("{shared_principles}", shared_principles)

        # Load Fixed format prompts (for Naive_Agent fixed formats)
        self.fixed_format_instruction = ""
        if self.question_format:
            from poster_config import QuestionFormat
            if self.question_format == QuestionFormat.FIXED_BINARY:
                fixed_format_path = prompts_dir / "poster_qa_format_binary.txt"
            elif self.question_format == QuestionFormat.FIXED_MULTICHOICE:
                fixed_format_path = prompts_dir / "poster_qa_format_multi_choice.txt"
            elif self.question_format == QuestionFormat.FIXED_OPENTEXT:
                fixed_format_path = prompts_dir / "poster_qa_format_open_text.txt"
            elif self.question_format == QuestionFormat.FREE_ASK:
                fixed_format_path = prompts_dir / "poster_qa_free_ask.txt"
            else:
                fixed_format_path = None
            
            if fixed_format_path and fixed_format_path.exists():
                with open(fixed_format_path, 'r', encoding='utf-8') as f:
                    self.fixed_format_instruction = f.read()
        
        # Load flexible mode format instruction (inject shared principles)
        flexible_prompt_path = prompts_dir / "poster_qa_flexible_mode.txt"
        with open(flexible_prompt_path, 'r', encoding='utf-8') as f:
            flexible_content = f.read()
            flexible_content = flexible_content.replace("{base_prompt}", "").strip()
            self.flexible_format_instruction = flexible_content.replace("{shared_principles}", shared_principles)

        # Load adaptive mode format instruction (inject shared principles)
        adaptive_prompt_path = prompts_dir / "poster_qa_adaptive_mode.txt"
        with open(adaptive_prompt_path, 'r', encoding='utf-8') as f:
            adaptive_content = f.read()
            adaptive_content = adaptive_content.replace("{base_prompt}", "").strip()
            self.adaptive_format_instruction = adaptive_content.replace("{shared_principles}", shared_principles)
        
        # Load User Agent prompt based on mode
        # MPQC uses respond/reject format, non-MPQC uses answer-only format
        if self.mpc_enabled:
            user_prompt_path = prompts_dir / "poster_user_respond_reject.txt"
        else:
            user_prompt_path = prompts_dir / "poster_user_answer_only.txt"
        
        with open(user_prompt_path, 'r', encoding='utf-8') as f:
            self.eval_answer_prompt_template = f.read()

    def _format_history(self, conversation_history: List[Tuple[str, str]]) -> str:
        """Format conversation history for paper user message."""
        if not conversation_history:
            return ""
        lines = []
        for i, (q, a) in enumerate(conversation_history, 1):
            lines.append(f"Q{i}: {q}\nA{i}: {a}")
        return "\n\n".join(lines)

    def _build_user_message_paper(
        self,
        item_description: str,
        conversation_history: List[Tuple[str, str]],
        plan: Optional[Dict],
        format_instruction: str,
        rag_context: Optional[str] = None,
    ) -> str:
        """Build Question Agent user message per paper: Initial prompt, Current plan, History, format instruction."""
        import json
        plan_str = json.dumps(plan, indent=2, ensure_ascii=False) if plan else "No plan yet."
        history_str = self._format_history(conversation_history) if conversation_history else "None yet."
        user_msg = f"""Initial prompt: {item_description}
Current plan: {plan_str}
History: {history_str}

{format_instruction}"""
        if rag_context:
            user_msg += f"\n\nReference questions (use as inspiration only):\n{rag_context}"
        return user_msg

    def _get_format_instruction(self) -> str:
        """Return format instruction for current mode (fixed, adaptive, or flexible)."""
        if self.fixed_format_instruction:
            return self.fixed_format_instruction
        if self.is_adaptive_format:
            return self.adaptive_format_instruction
        return self.flexible_format_instruction

    def _generate_single_question(
        self,
        item_description: str,
        conversation_history: List[Tuple[str, str]],
        plan: Optional[Dict] = None,
        answer_image_path: Optional[str] = None,
    ) -> Tuple[str, Dict]:
        """Generate one question using paper prompt structure: system (poster_qa_base) + user (Initial plan, History, format)."""
        import re
        rag_context_text = None
        rag_result = None
        if self.rag_agent:
            plan_text = str(plan) if plan else ""
            rag_result = self.rag_agent.retrieve_questions(
                current_plan=plan_text,
                qa_history=conversation_history,
                target_count=3,
            )
            if rag_result:
                rag_context_text = rag_result.get("context_text")

        format_instruction = self._get_format_instruction()
        user_text = self._build_user_message_paper(
            item_description,
            conversation_history,
            plan,
            format_instruction,
            rag_context=rag_context_text,
        )

        message_content = [{"type": "text", "text": user_text}]
        if self.logo_info and "path" in self.logo_info:
            logo_path = self.logo_info["path"]
            if os.path.exists(logo_path):
                logo_base64 = self.generator.image_to_base64(logo_path)
                if logo_base64:
                    if not logo_base64.startswith("data:image"):
                        logo_base64 = f"data:image/png;base64,{logo_base64}"
                    message_content.append({
                        "type": "image_url",
                        "image_url": {"url": logo_base64, "detail": "high"},
                    })

        messages = [
            SystemMessage(content=self.qa_system_prompt),
            HumanMessage(content=message_content),
        ]

        initial_tokens = {
            "input_tokens": self.questioner_token_stats["input_tokens"],
            "output_tokens": self.questioner_token_stats["output_tokens"],
            "reasoning_tokens": self.questioner_token_stats["reasoning_tokens"],
            "total_tokens": self.questioner_token_stats["total_tokens"],
        }
        input_tokens = self.token_counter.count_tokens(messages)
        if len(message_content) > 1:
            input_tokens += (len(message_content) - 1) * 85

        if self.question_agent_model_version == "gemini25" and GeminiTokenTrackingCallback:
            callback = GeminiTokenTrackingCallback()
        else:
            callback = TokenTrackingCallback()
        response = self.generator.questioner_llm.invoke(messages, config={"callbacks": [callback]})

        callback_stats = callback.get_stats()
        output_tokens = callback_stats.get("completion_tokens", 0)
        reasoning_tokens = callback_stats.get("reasoning_tokens", 0)
        if output_tokens == 0:
            output_tokens = self.token_counter.count_tokens(response.content)

        self.questioner_token_stats["input_tokens"] += input_tokens
        self.questioner_token_stats["output_tokens"] += output_tokens
        self.questioner_token_stats["reasoning_tokens"] += reasoning_tokens
        self.questioner_token_stats["total_tokens"] += input_tokens + output_tokens + reasoning_tokens

        per_question_tokens = {
            "input_tokens": self.questioner_token_stats["input_tokens"] - initial_tokens["input_tokens"],
            "output_tokens": self.questioner_token_stats["output_tokens"] - initial_tokens["output_tokens"],
            "reasoning_tokens": self.questioner_token_stats["reasoning_tokens"] - initial_tokens["reasoning_tokens"],
            "total_tokens": self.questioner_token_stats["total_tokens"] - initial_tokens["total_tokens"],
        }

        question_text = response.content.strip() if response.content else ""
        question_text = question_text.strip()
        if question_text.upper().startswith("QUESTION:"):
            question_text = question_text.split(":", 1)[1].strip()
        if question_text.startswith("Q") and ":" in question_text:
            question_text = question_text.split(":", 1)[1].strip()
        question_text = question_text.strip('"').strip("'")
        question_text = re.sub(r"\[FORMAT_[^\]]+\]\s*", "", question_text, flags=re.IGNORECASE)
        question_text = question_text.strip()
        question_text = self._strip_next_plan(question_text)
        return question_text, per_question_tokens

    def _generate_single_flexible_question(
        self,
        item_description: str,
        conversation_history: List[Tuple[str, str]],
        plan: Optional[Dict] = None,
        answer_image_path: Optional[str] = None,
    ) -> Tuple[str, Dict]:
        """Generate single flexible/adaptive question (delegates to paper-aligned _generate_single_question)."""
        return self._generate_single_question(
            item_description, conversation_history, plan, answer_image_path
        )

    def _generate_single_merge_question(
        self,
        item_description: str,
        conversation_history: List[Tuple[str, str]],
        plan: Optional[Dict] = None,
        answer_image_path: Optional[str] = None,
    ) -> Tuple[str, Dict]:
        """Generate single merge (MPQC) question (delegates to paper-aligned _generate_single_question)."""
        return self._generate_single_question(
            item_description, conversation_history, plan, answer_image_path
        )

    def evaluate_and_answer_questions(
        self,
        questions_text: str,
        answer_image_path: str,
        item_description: str,
        plan: Optional[Dict] = None,
        conversation_history: Optional[List[Tuple[str, str]]] = None
    ) -> Tuple[str, Dict]:
        """Evaluate and answer questions (Merge Mode) - Paper aligned: SystemMessage + HumanMessage"""
        import json
        plan_text = json.dumps(plan, indent=2, ensure_ascii=False) if plan else "No plan yet."

        # Build system message from template (paper: User Agent system prompt)
        try:
            system_prompt = self.eval_answer_prompt_template.format(
                current_plan=plan_text,
                questions_text=questions_text
            )
        except KeyError:
            system_prompt = self.eval_answer_prompt_template

        # Build user message: images + question (paper: User Agent user message)
        image_base64 = None
        if os.path.exists(answer_image_path):
            image_base64 = self.generator.image_to_base64(answer_image_path)
            if image_base64 and not image_base64.startswith("data:image"):
                image_base64 = f"data:image/png;base64,{image_base64}"

        user_content = []
        # Add target image
        if image_base64:
            user_content.append({
                "type": "image_url",
                "image_url": {"url": image_base64, "detail": "high"}
            })

        # Add logo image if available
        if self.logo_info and 'path' in self.logo_info:
            logo_path = self.logo_info['path']
            if os.path.exists(logo_path):
                logo_base64 = self.generator.image_to_base64(logo_path)
                if logo_base64:
                    if not logo_base64.startswith("data:image"):
                        logo_base64 = f"data:image/png;base64,{logo_base64}"
                    user_content.append({
                        "type": "image_url",
                        "image_url": {"url": logo_base64, "detail": "high"}
                    })

        # Add question text
        user_content.append({"type": "text", "text": f"Question:\n{questions_text}"})

        # Paper-aligned: SystemMessage (instructions) + HumanMessage (images + question)
        messages = [
            SystemMessage(content=system_prompt),
            HumanMessage(content=user_content)
        ]
        
        input_tokens = self.token_counter.count_tokens(messages)
        # Add approximate image tokens
        image_count = sum(1 for item in user_content if item.get("type") == "image_url")
        input_tokens += image_count * 85
        
        # Use answerer_llm for evaluate_and_answer (this is answerer agent's work)
        if self.answer_agent_model_version == "gemini25" and GeminiTokenTrackingCallback:
            callback = GeminiTokenTrackingCallback()
        else:
            callback = TokenTrackingCallback()
        response = self.generator.answerer_llm.invoke(messages, config={"callbacks": [callback]})
        
        callback_stats = callback.get_stats()
        callback_input_tokens = callback_stats.get("input_tokens", 0) or callback_stats.get("prompt_tokens", 0)
        output_tokens = callback_stats.get("completion_tokens", 0) or callback_stats.get("output_tokens", 0)
        reasoning_tokens = callback_stats.get("reasoning_tokens", 0)
        
        if self.answer_agent_model_version == "gemini25" and callback_input_tokens > 0:
            input_tokens = callback_input_tokens
        elif output_tokens == 0:
            output_tokens = self.token_counter.count_tokens(response.content)
        
        self.answerer_token_stats["input_tokens"] += input_tokens
        self.answerer_token_stats["output_tokens"] += output_tokens
        self.answerer_token_stats["reasoning_tokens"] += reasoning_tokens
        self.answerer_token_stats["total_tokens"] += input_tokens + output_tokens + reasoning_tokens
        
        response_text = response.content.strip()
        token_stats = {
            "input_tokens": input_tokens,
            "output_tokens": output_tokens,
            "reasoning_tokens": reasoning_tokens,
            "total_tokens": input_tokens + output_tokens + reasoning_tokens
        }
        
        return response_text, token_stats
    
    def conduct_adaptive_qa_with_plan(
        self,
        item_description: str,
        answer_image_path: str,
        plan: Optional[Dict] = None,
        num_questions: int = 10,
        mpc_enabled: bool = False
    ) -> Dict:
        """Execute adaptive QA (Flexible or Merge/MPQC Mode)"""
        mode_str = "Merge (MPQC)" if mpc_enabled else "Flexible"
        print(f"\n{'='*80}")
        print(f"🎯 ADAPTIVE QA SESSION (Max {num_questions} questions, Mode: {mode_str})")
        print(f"{'='*80}")
        
        if not hasattr(self, 'conversation_history') or self.conversation_history is None:
            self.conversation_history = []
        
        cycle_token_stats = {
            "questioner_agent": {"input_tokens": 0, "output_tokens": 0, "reasoning_tokens": 0, "total_tokens": 0},
            "answerer_agent": {"input_tokens": 0, "output_tokens": 0, "reasoning_tokens": 0, "total_tokens": 0}
        }
        
        initial_q_tokens = {
            "input_tokens": self.questioner_token_stats.get("input_tokens", 0),
            "output_tokens": self.questioner_token_stats.get("output_tokens", 0),
            "reasoning_tokens": self.questioner_token_stats.get("reasoning_tokens", 0),
            "total_tokens": self.questioner_token_stats.get("total_tokens", 0)
        }
        initial_a_tokens = {
            "input_tokens": self.answerer_token_stats.get("input_tokens", 0),
            "output_tokens": self.answerer_token_stats.get("output_tokens", 0),
            "reasoning_tokens": self.answerer_token_stats.get("reasoning_tokens", 0),
            "total_tokens": self.answerer_token_stats.get("total_tokens", 0)
        }
        
        if mpc_enabled:
            # Merge Mode (MPQC)
            print(f"\n🔄 Merge Mode: One question at a time with trajectory planning...")
            
            accepted_questions_with_answers = []
            rejected_questions = []
            cycle_conversation_history = []
            
            for i in range(1, num_questions + 1):
                # Generate question (returns question_text and questioner_tokens)
                question_text, questioner_tokens = self._generate_single_merge_question(
                    item_description,
                    self.conversation_history,
                    plan,
                    answer_image_path
                )
                
                if not question_text:
                    break
                
                import re
                # Detect format (check the original question text, ignoring "| Next:" part)
                question_text_clean = self._strip_next_plan(question_text)
                question_text_for_format = re.sub(r'\s*\|\s*Next:.*$', '', question_text_clean, flags=re.IGNORECASE).strip()
                format_type = 'open_text'
                if re.search(r'\b(Yes|No)\b', question_text_for_format, re.IGNORECASE) or '?' in question_text_for_format and re.search(r'\(Yes/No\)', question_text_for_format, re.IGNORECASE):
                    format_type = 'binary'
                elif re.search(r'\b([A-D]\)|Option [A-D]|A\)|B\)|C\)|D\))', question_text_for_format, re.IGNORECASE):
                    format_type = 'multi_choice'
                
                print(f"\n{'='*60}")
                print(f"❓ Question {i}/{num_questions} (Format: {format_type.upper()})")
                print(f"{'='*60}")
                print(f"❓ Question: {question_text_clean}")
                
                # Record answerer token stats before answering (for per-question tracking)
                initial_answerer_tokens = {
                    "input_tokens": self.answerer_token_stats.get("input_tokens", 0),
                    "output_tokens": self.answerer_token_stats.get("output_tokens", 0),
                    "reasoning_tokens": self.answerer_token_stats.get("reasoning_tokens", 0),
                    "total_tokens": self.answerer_token_stats.get("total_tokens", 0)
                }
                
                eval_and_answer_response_text, _ = self.evaluate_and_answer_questions(
                    question_text_clean,
                    answer_image_path,
                    item_description,
                    plan,
                    self.conversation_history
                )
                
                # Calculate per-question answerer tokens (difference from initial)
                per_question_answerer_tokens = {
                    "input_tokens": self.answerer_token_stats.get("input_tokens", 0) - initial_answerer_tokens["input_tokens"],
                    "output_tokens": self.answerer_token_stats.get("output_tokens", 0) - initial_answerer_tokens["output_tokens"],
                    "reasoning_tokens": self.answerer_token_stats.get("reasoning_tokens", 0) - initial_answerer_tokens["reasoning_tokens"],
                    "total_tokens": self.answerer_token_stats.get("total_tokens", 0) - initial_answerer_tokens["total_tokens"]
                }
                
                response_text = eval_and_answer_response_text.strip() if eval_and_answer_response_text else ""
                response_line = response_text.split('\n')[0].strip() if response_text else ""
                # Paper A-MPQC: "respond: <answer>" or "reject: No"; legacy: "Yes: <answer>" or "No"
                is_accept_prefix = response_line.upper().startswith('YES:') or response_line.upper().startswith('RESPOND:')
                answer_content = response_text.split(":", 1)[1].strip() if is_accept_prefix and ":" in response_text else ""
                is_not_visible = answer_content.lower().strip() == "not visible" or "not visible" in answer_content.lower()
                is_reject = response_line.upper().startswith('REJECT:') or (response_line.upper() == 'NO' and not is_accept_prefix)
                accepted = is_accept_prefix and not is_not_visible and not is_reject
                if accepted:
                    answer = answer_content
                    accepted_questions_with_answers.append({
                        "question_text": question_text_clean,
                        "raw_question_text": question_text,
                        "answer": answer,
                        "questioner_tokens": questioner_tokens,
                        "answerer_tokens": per_question_answerer_tokens,
                        "format": format_type
                    })
                    print(f"✅ Accepted - Answer: {answer[:100]}..." if len(answer) > 100 else f"✅ Accepted - Answer: {answer}")
                    # Store as dict with token stats for per-question analysis
                    cycle_conversation_history.append({
                        "question": question_text_clean,
                        "answer": answer,
                        "questioner_tokens": questioner_tokens,
                        "answerer_tokens": per_question_answerer_tokens,
                        "format": format_type
                    })
                    # Keep tuple format for backward compatibility
                    self.conversation_history.append((question_text_clean, answer))
                    
                    # Check satisfaction after each accepted Q&A
                    is_satisfied = self._check_satisfaction(plan, self.conversation_history)
                    if is_satisfied:
                        print(f"\n✅ Question agent satisfied after {i} question(s), stopping questions in this cycle")
                        break
                else:
                    rejection_reason = "Answer: Not visible" if is_not_visible else "Rejected by answerer"
                    rejected_questions.append({
                        "question_text": question_text_clean,
                        "reason": rejection_reason
                    })
                    print(f"❌ Rejected - Reason: {rejection_reason} (evaluation cost still incurred)")
                    # NOTE: per_question_answerer_tokens still includes the cost of evaluate_and_answer_questions
                    # even though the question was rejected. This cost is tracked in cycle_token_stats.
            
            # Calculate cycle token stats
            cycle_token_stats["questioner_agent"] = {
                "input_tokens": self.questioner_token_stats["input_tokens"] - initial_q_tokens["input_tokens"],
                "output_tokens": self.questioner_token_stats["output_tokens"] - initial_q_tokens["output_tokens"],
                "reasoning_tokens": self.questioner_token_stats["reasoning_tokens"] - initial_q_tokens["reasoning_tokens"],
                "total_tokens": self.questioner_token_stats["total_tokens"] - initial_q_tokens["total_tokens"]
            }
            cycle_token_stats["answerer_agent"] = {
                "input_tokens": self.answerer_token_stats["input_tokens"] - initial_a_tokens["input_tokens"],
                "output_tokens": self.answerer_token_stats["output_tokens"] - initial_a_tokens["output_tokens"],
                "reasoning_tokens": self.answerer_token_stats["reasoning_tokens"] - initial_a_tokens["reasoning_tokens"],
                "total_tokens": self.answerer_token_stats["total_tokens"] - initial_a_tokens["total_tokens"]
            }
            
            return {
                "conversation_history": cycle_conversation_history,
                "token_stats": cycle_token_stats,
                "accepted_questions": accepted_questions_with_answers,
                "rejected_questions": rejected_questions
            }
        else:
            # Flexible Mode
            print(f"\n🔄 Flexible Mode: One question at a time...")
            
            cycle_conversation_history = []
            
            for i in range(1, num_questions + 1):
                # Generate question (returns question_text and questioner_tokens)
                question, questioner_tokens = self._generate_single_flexible_question(
                    item_description,
                    self.conversation_history,
                    plan,
                    answer_image_path
                )
                
                print(f"\n{'='*60}")
                print(f"❓ Question {i}/{num_questions}")
                print(f"{'='*60}")
                print(f"❓ Question: {question}")
                
                # Use evaluate_and_answer_questions with poster_user_answer_only.txt prompt
                answer_response, per_question_answerer_tokens = self.evaluate_and_answer_questions(
                    question,
                    answer_image_path,
                    item_description,
                    plan,
                    self.conversation_history
                )
                answer = answer_response.strip()
                
                # Accumulate answerer token stats
                self.answerer_token_stats["input_tokens"] += per_question_answerer_tokens.get("input_tokens", 0)
                self.answerer_token_stats["output_tokens"] += per_question_answerer_tokens.get("output_tokens", 0)
                self.answerer_token_stats["reasoning_tokens"] += per_question_answerer_tokens.get("reasoning_tokens", 0)
                self.answerer_token_stats["total_tokens"] += per_question_answerer_tokens.get("total_tokens", 0)
                
                print(f"💭 Answer: {answer[:100]}..." if len(answer) > 100 else f"💭 Answer: {answer}")
                
                # Detect format
                import re
                question_text_for_format = re.sub(r'\s*\|\s*Next:.*$', '', question, flags=re.IGNORECASE).strip()
                format_type = 'open_text'
                if re.search(r'\b(Yes|No)\b', question_text_for_format, re.IGNORECASE) or '?' in question_text_for_format and re.search(r'\(Yes/No\)', question_text_for_format, re.IGNORECASE):
                    format_type = 'binary'
                elif re.search(r'\b([A-D]\)|Option [A-D]|A\)|B\)|C\)|D\))', question_text_for_format, re.IGNORECASE):
                    format_type = 'multi_choice'
                
                # Store as dict with token stats for per-question analysis
                cycle_conversation_history.append({
                    "question": question,
                    "answer": answer,
                    "questioner_tokens": questioner_tokens,
                    "answerer_tokens": per_question_answerer_tokens,
                    "format": format_type
                })
                # Keep tuple format for backward compatibility
                self.conversation_history.append((question, answer))
            
                # Check satisfaction after each Q&A
                is_satisfied = self._check_satisfaction(plan, self.conversation_history)
                if is_satisfied:
                    print(f"\n✅ Question agent satisfied after {i} question(s), stopping questions in this cycle")
                    break
            
            # Calculate cycle token stats
            cycle_token_stats["questioner_agent"] = {
                "input_tokens": self.questioner_token_stats["input_tokens"] - initial_q_tokens["input_tokens"],
                "output_tokens": self.questioner_token_stats["output_tokens"] - initial_q_tokens["output_tokens"],
                "reasoning_tokens": self.questioner_token_stats["reasoning_tokens"] - initial_q_tokens["reasoning_tokens"],
                "total_tokens": self.questioner_token_stats["total_tokens"] - initial_q_tokens["total_tokens"]
            }
            cycle_token_stats["answerer_agent"] = {
                "input_tokens": self.answerer_token_stats["input_tokens"] - initial_a_tokens["input_tokens"],
                "output_tokens": self.answerer_token_stats["output_tokens"] - initial_a_tokens["output_tokens"],
                "reasoning_tokens": self.answerer_token_stats["reasoning_tokens"] - initial_a_tokens["reasoning_tokens"],
                "total_tokens": self.answerer_token_stats["total_tokens"] - initial_a_tokens["total_tokens"]
            }
            
            return {
                "conversation_history": cycle_conversation_history,
                "token_stats": cycle_token_stats
            }
