import json
import random
from typing import List, Dict, Any, Tuple, Optional
from openai import OpenAI
from prompt import system_prompt, user_prompt
from task_requirements import task_description, task_list
from utils.utils import clean_response_text, get_question_type_description
from utils.time_extractor import format_qa_time_field
from .role_matcher import RoleMatcher

                           
ablation_system_prompt = """You are a professional gaming Q&A assistant capable of generating specific questions and reasonable hypothetical questions and answers based on question templates.

## Key Requirements for Ablation Study:
- Generate QA pairs based on the provided document content and question template
- Do NOT perform document quality checks or reject documents
- Always generate at least one QA pair, even if the document quality is not perfect
- Focus on creating diverse questions that reflect different aspects of the gaming content
- Ensure questions are natural and from a player's perspective

## Generated Data Format Requirements:
Output results in JSON data list format, surrounded by <json></json>, format requirements as follows:
<json>
[
    {
        "question": "Question raised from player perspective (natural expression, avoid directly mentioning version numbers)",
        "answer": "Direct answer to the question, concise and clear, without referential expressions",
        "references": ["Specific content segment 1 quoted from materials", "Quote segment 2"]
    }
]
</json>"""


class QAGenerator:
                 

    def __init__(self,
                 client: OpenAI,
                 model_name: str,
                 question_types_map: Dict[str, Any],
                 role_matcher: Optional[RoleMatcher] = None):      
        self.client = client
        self.model_name = model_name
        self.question_types_map = question_types_map
        self.task_description = task_description
        self.task_list = task_list
        self.role_matcher = role_matcher

    def generate_hypothetical_qa_from_template(self, template_data: Dict[str, Any]) -> Tuple[str, str]:   
        if not self.client:
            return "", ""

        try:
            template = template_data.get('template', '')
            question_type = template_data.get('question_type', 'UNCATEGORIZED')
            game_name = template_data.get('game_name', 'Game Name')
                                              
            prompt = f"""
                Based on the following question template, generate a specific question and corresponding hypothetical answer. Please ensure that placeholders in the template are replaced with appropriate content. Pay special attention to the game placeholder [GAME_NAME], please use the correct game name {game_name}.

                Question Template: {template}
                Question Type: {question_type}

                Please output in the following format:
                Question: [Generated specific question]
                Answer: [Corresponding hypothetical answer]

                Requirements:
                1. Questions should be specific and clear, conforming to gaming players' expression habits
                2. Answers should be reasonable and helpful
                3. If there are placeholders in the template (such as [GAME_NAME], etc.), please replace them with appropriate content
                4. Answer length should be moderate, both useful and not too lengthy
            """

            response = self.client.chat.completions.create(
                model=self.model_name,
                messages=[
                    {
                        "role": "system",
                        "content": "You are a professional gaming Q&A assistant capable of generating specific questions and reasonable hypothetical questions and answers based on question templates."
                    },
                    {
                        "role": "user",
                        "content": prompt
                    }
                ],
                max_tokens=1000,
                temperature=0.7
            )

            response_text = response.choices[0].message.content.strip()

                          
            lines = response_text.split('\n')
            question = ""
            answer = ""

            for line in lines:
                line = line.strip()
                if line.startswith('Question：') or line.startswith('Question:'):
                    question = line.replace('Question：', '').replace('Question:', '').strip()
                elif line.startswith('Answer：') or line.startswith('Answer:'):
                    answer = line.replace('Answer：', '').replace('Answer:', '').strip()

                            
            if not question or not answer:
                parts = response_text.split('\n', 1)
                if len(parts) >= 2:
                    question = parts[0].strip()
                    answer = parts[1].strip()
                else:
                                             
                    question = template
                    answer = response_text

            return question, answer

        except Exception as e:
            return "", ""

    def generate_qa_pair(self,
                         template_data: Dict[str, Any],
                         retrieved_docs: List[Dict[str, Any]],
                         selected_task_type: str = None,
                         use_ablation_mode: bool = False) -> List[Dict[str, Any]]:         
        docs_content = ""
        all_entities = []
        for i, doc in enumerate(retrieved_docs, 1):
            if isinstance(doc, dict) and 'content' in doc:
                content = doc['content']
                if content:                
                    docs_content += f"Reference Material {i}:\n{content}\n\n"

                                                  
                                                                   
                    if 'metadata' in doc:
                        metadata = doc['metadata']
                        if 'entity_texts' in metadata:
                                             
                            entity_texts = metadata['entity_texts']
                            for entity_text in entity_texts:
                                if entity_text.strip():
                                               
                                    entity_obj = {
                                        'text': entity_text,
                                        'type': 'UNKNOWN',               
                                        'context': ''                
                                    }
                                    all_entities.append(entity_obj)
                        elif 'entities' in metadata:
                                         
                            all_entities.extend(metadata['entities'])

                          
        if not docs_content.strip():         
            question_type = template_data.get('question_type', 'UNCATEGORIZED')
            question_type_description = get_question_type_description(
                question_type, self.question_types_map)

                                  
        if selected_task_type is None:
            selected_task_type = random.choice(self.task_list)

        task_desc = self.task_description.get(selected_task_type, "")

                   
        role_data = None
        role_context = ""
        if self.role_matcher:
            role_data = self.role_matcher.find_matching_role(template_data)
            if role_data:
                role_context = self.role_matcher.get_role_context(role_data)

                             
        user_prompt_str = self._build_role_aware_prompt(
            question_type_description, selected_task_type, task_desc,
            docs_content, role_context, template_data
        )

        try:
                              
                                                         
                                    
            current_system_prompt = ablation_system_prompt if use_ablation_mode else system_prompt
            response = self.client.chat.completions.create(
                model=self.model_name,
                messages=[
                    {
                        "role": "system",
                        "content": current_system_prompt
                    },
                    {
                        "role": "user",
                        "content": user_prompt_str
                    }
                ],
                max_tokens=1500,
                temperature=0.8
            )

                  
            response_text = response.choices[0].message.content.strip()
                                                     
            cleaned_text = clean_response_text(response_text)
            qa_data = json.loads(cleaned_text)
                                     
            for qa in qa_data:
                                          
                if 'entities' not in qa or not qa['entities']:
                    qa['entities'] = self._extract_relevant_entities(qa, all_entities)

                                           
                extracted_time = format_qa_time_field(retrieved_docs)
                qa['time'] = extracted_time

                qa.update({
                    'template_id': template_data.get('id'),
                    'template': template_data.get('template'),
                    'question_type': question_type,
                    'task_type': selected_task_type,
                    'retrieved_docs': retrieved_docs,
                    'source_template': template_data,
                    'role_data': role_data if role_data else None
                })

            return qa_data

        except json.JSONDecodeError as e:
            return []
        except Exception as e:
            return []

    def _build_role_aware_prompt(self,
                                 question_type_description: str,
                                 selected_task_type: str,
                                 task_desc: str,
                                 docs_content: str,
                                 role_context: str,
                                 template_data: Dict[str, Any]) -> str:                              
        if role_context:
            formatted_role_context = f"""
                ## Player Role Background
                {role_context}

                **Important Note**: Please generate questions completely from the perspective of the above player role, ensuring questions reflect the role's:
                - Language expression style and habits
                - Gaming experience level and focus points
                - Personal characteristics and preferences
                - Mindset and motivation when asking questions
            """
        else:
            formatted_role_context = ""

                   
        template_context = self._build_template_context(template_data)

                                                   
        return user_prompt.format(
            topic_description=question_type_description,
            task_name=selected_task_type,
            task_require=task_desc,
            role_context=formatted_role_context,
            template_context=template_context,
            doc_str=docs_content
        )

    def _build_template_context(self, template_data: Dict[str, Any]) -> str:
        template_text = template_data.get('template', '')
        description = template_data.get('description', '')
        placeholders = template_data.get('placeholders', [])

        if not template_text:
            return ""

        template_context = f"""
## Question Template Guidance
### Template Example
{template_text}

### Template Description
{description if description else 'Generate questions based on the structure and style of this template'}

**Generation Guidelines**:
1. **Reference Template Structure**: Generated questions should follow the expression and structural characteristics of the above template
2. **Maintain Style Consistency**: The language style of questions should be consistent with the template, reflecting the same expression habits
3. **Appropriate Variation**: While maintaining the template spirit, make reasonable changes and expansions based on specific document content
4. **Player Perspective**: Ensure generated questions are raised from a real player's perspective, not a simple copy of the template"""

        if placeholders:
            placeholder_text = ", ".join(placeholders)
            template_context += f"""
5. **Placeholder Understanding**: The template contains placeholders ({placeholder_text}), please understand their meaning and use them flexibly when generating questions"""

        return template_context.strip()

    def _extract_relevant_entities(self, qa_pair: Dict[str, Any],
                                   all_entities: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        if not all_entities:
            return []

        relevant_entities = []
        question_text = qa_pair.get('question', '').lower()
        answer_text = qa_pair.get('answer', '').lower()
        combined_text = question_text + ' ' + answer_text

        for entity in all_entities:
            entity_text = entity.get('text', '').lower()
                               
            if entity_text and entity_text in combined_text:
                relevant_entities.append(entity)

        return relevant_entities
