import json
import logging
import re
from llms.llms import async_call_llm
from json_repair import repair_json
import asyncio


class FilterAgent:
    """
    FilterAgent用于筛选PlanningAgent生成的计划是否满足要求
    使用LLM对计划进行评估，返回YES/NO的判断结果
    """
    
    @staticmethod
    async def async_filter_plan(model, example, semaphore):
        """根据示例类型筛选不同类型的计划"""
        if example["type"] == "Week":
            example = await FilterAgent.async_filter_week_plan(model, example, semaphore)
        elif example["type"] == "Floor":
            example = await FilterAgent.async_filter_floor_plan(model, example, semaphore)
        elif example["type"] == "Menu Week":
            example = await FilterAgent.async_filter_menu_plan(model, example, semaphore)
        elif example["type"] == "Block":
            example = await FilterAgent.async_filter_block_plan(model, example, semaphore)

        return example

    @staticmethod
    async def async_filter_week_plan(model, example, semaphore):
        """筛选周计划"""
        if "weekly_plan" not in example:
            logging.warning("No weekly plan found to filter")
            example['is_valid'] = False
            return example
            
        filter_prompt = f"""
Given the following prompt and plan, return YES if the plan is relevant to the question and NO if it isn't. A plan is considered relevant only if it meets all of the following criteria:

1. Direct Relevance: The plan must directly address the question posed in the prompt. It should not deviate from the main objective.
2. Completeness: The plan must include all necessary steps to achieve the goal described in the prompt. Missing critical steps will result in the plan being deemed irrelevant.
3. Logical Coherence: The plan must follow a logical sequence of actions. Each step should clearly lead to the next, and there should be no logical gaps or contradictions.
4. Efficiency: The plan should not include unnecessary or redundant steps. Each step must contribute directly to the final goal.
5. Specificity: The plan must be specific and actionable. Vague or overly broad steps will not be considered relevant.
6. Contextual Consistency: The plan must be consistent with the context provided in the prompt. Any steps that are inconsistent with the background information or constraints will be deemed irrelevant.

Note: To ensure rigorous screening, we aim to reject approximately one-third of the plans that do not meet these stringent criteria.

Example of Irrelevant Plans:
- Irrelevant Content: If the prompt asks for a plan to "design an experiment to test plant photosynthesis," but the plan includes steps like "build a rocket model," this plan will be rejected.
- Incomplete Plan: If the plan omits critical steps, such as "measure the photosynthesis rate," it will be considered irrelevant.
- Illogical Sequence: If the steps in the plan do not follow a logical order, such as "measure plant growth first, then plant the seeds," the plan will be rejected.

> Prompt: {example['prompt']}
> Plan:
>>>
{json.dumps(example['weekly_plan'], indent=2)}
>>>
> Relevant (YES / NO):

"""

#         filter_prompt = f"""
# You are an expert quality controller. Your task is to determine if a weekly plan meets the user requirements.
# Given the following user requirements and generated weekly plan, evaluate if the plan satisfies all requirements.

# User requirements:
# {example['prompt']}

# Generated weekly plan:
# {json.dumps(example['weekly_plan'], indent=2)}

# Think step by step:
# 1. Identify all key requirements specified by the user
# 2. Check if the weekly plan addresses each requirement
# 3. Verify if special events are correctly placed in their appropriate weeks
# 4. Check for any contradictions or inconsistencies in the plan

# Answer only with "YES" if the plan fully satisfies all requirements, or "NO" if it has any deficiencies.
# Your final answer (YES/NO):"""

        logging.info("Filtering weekly plan")
        return await FilterAgent._evaluate_and_mark_plan(model, example, filter_prompt, semaphore)

    @staticmethod
    async def async_filter_floor_plan(model, example, semaphore):
        """筛选楼层计划"""
        if "floor_plan" not in example:
            logging.warning("No floor plan found to filter")
            example['is_valid'] = False
            return example
        
        filter_prompt = f"""
Given the following prompt and plan, return YES if the plan is relevant to the question and NO if it isn't. A plan is considered relevant only if it meets all of the following criteria:

1. Direct Relevance: The plan must directly address the question posed in the prompt. It should not deviate from the main objective.
2. Completeness: The plan must include all necessary steps to achieve the goal described in the prompt. Missing critical steps will result in the plan being deemed irrelevant.
3. Logical Coherence: The plan must follow a logical sequence of actions. Each step should clearly lead to the next, and there should be no logical gaps or contradictions.
4. Efficiency: The plan should not include unnecessary or redundant steps. Each step must contribute directly to the final goal.
5. Specificity: The plan must be specific and actionable. Vague or overly broad steps will not be considered relevant.
6. Contextual Consistency: The plan must be consistent with the context provided in the prompt. Any steps that are inconsistent with the background information or constraints will be deemed irrelevant.

Note: To ensure rigorous screening, we aim to reject approximately one-third of the plans that do not meet these stringent criteria.

Example of Irrelevant Plans:
- Irrelevant Content: If the prompt asks for a plan to "design an experiment to test plant photosynthesis," but the plan includes steps like "build a rocket model," this plan will be rejected.
- Incomplete Plan: If the plan omits critical steps, such as "measure the photosynthesis rate," it will be considered irrelevant.
- Illogical Sequence: If the steps in the plan do not follow a logical order, such as "measure plant growth first, then plant the seeds," the plan will be rejected.

> Prompt: {example['prompt']}
> Plan:
>>>
{json.dumps(example['floor_plan'], indent=2)}
>>>
> Relevant (YES / NO):

"""

#         filter_prompt = f"""
# You are an expert architect and quality controller. Your task is to determine if a floor plan meets the user requirements.
# Given the following user requirements and generated floor plan, evaluate if the plan satisfies all requirements.

# User requirements:
# {example['prompt']}

# Generated floor plan:
# {json.dumps(example['floor_plan'], indent=2)}

# Think step by step:
# 1. Identify all key architectural requirements specified by the user
# 2. Check if the floor plan addresses each requirement
# 3. Verify if special facilities are correctly placed on their designated floors
# 4. Check for logical arrangement of floors and facilities
# 5. Verify if there are any contradictions or inconsistencies in the plan

# Answer only with "YES" if the floor plan fully satisfies all requirements, or "NO" if it has any deficiencies.
# Your final answer (YES/NO):"""

        logging.info("Filtering floor plan")
        return await FilterAgent._evaluate_and_mark_plan(model, example, filter_prompt, semaphore)

    @staticmethod
    async def async_filter_menu_plan(model, example, semaphore):
        """筛选菜单计划"""
        if "weekly_plan" not in example:
            logging.warning("No menu plan found to filter")
            example['is_valid'] = False
            return example

        filter_prompt = f"""
Given the following prompt and plan, return YES if the plan is relevant to the question and NO if it isn't. A plan is considered relevant only if it meets all of the following criteria:

1. Direct Relevance: The plan must directly address the question posed in the prompt. It should not deviate from the main objective.
2. Completeness: The plan must include all necessary steps to achieve the goal described in the prompt. Missing critical steps will result in the plan being deemed irrelevant.
3. Logical Coherence: The plan must follow a logical sequence of actions. Each step should clearly lead to the next, and there should be no logical gaps or contradictions.
4. Efficiency: The plan should not include unnecessary or redundant steps. Each step must contribute directly to the final goal.
5. Specificity: The plan must be specific and actionable. Vague or overly broad steps will not be considered relevant.
6. Contextual Consistency: The plan must be consistent with the context provided in the prompt. Any steps that are inconsistent with the background information or constraints will be deemed irrelevant.

Note: To ensure rigorous screening, we aim to reject approximately one-third of the plans that do not meet these stringent criteria.

Example of Irrelevant Plans:
- Irrelevant Content: If the prompt asks for a plan to "design an experiment to test plant photosynthesis," but the plan includes steps like "build a rocket model," this plan will be rejected.
- Incomplete Plan: If the plan omits critical steps, such as "measure the photosynthesis rate," it will be considered irrelevant.
- Illogical Sequence: If the steps in the plan do not follow a logical order, such as "measure plant growth first, then plant the seeds," the plan will be rejected.

> Prompt: {example['prompt']}
> Plan:
>>>
{json.dumps(example['weekly_plan'], indent=2)}
>>>
> Relevant (YES / NO):

"""
              
#         filter_prompt = f"""
# You are an expert chef and quality controller. Your task is to determine if a weekly menu plan meets the user requirements.
# Given the following user requirements and generated menu plan, evaluate if the plan satisfies all requirements.

# User requirements:
# {example['prompt']}

# Generated menu plan:
# {json.dumps(example['weekly_plan'], indent=2)}

# Think step by step:
# 1. Identify all key culinary requirements specified by the user
# 2. Check if the menu plan addresses each requirement
# 3. Verify if special dishes are correctly planned for their appropriate weeks
# 4. Check for variety, nutritional balance, and seasonal appropriateness
# 5. Verify if there are any contradictions or inconsistencies in the plan

# Answer only with "YES" if the menu plan fully satisfies all requirements, or "NO" if it has any deficiencies.
# Your final answer (YES/NO):"""

        logging.info("Filtering menu plan")
        return await FilterAgent._evaluate_and_mark_plan(model, example, filter_prompt, semaphore)

    @staticmethod
    async def async_filter_block_plan(model, example, semaphore):
        """筛选区块计划"""
        if "block_plan" not in example:
            logging.warning("No block plan found to filter")
            example['is_valid'] = False
            return example

        filter_prompt = f"""
Given the following prompt and plan, return YES if the plan is relevant to the question and NO if it isn't. A plan is considered relevant only if it meets all of the following criteria:

1. Direct Relevance: The plan must directly address the question posed in the prompt. It should not deviate from the main objective.
2. Completeness: The plan must include all necessary steps to achieve the goal described in the prompt. Missing critical steps will result in the plan being deemed irrelevant.
3. Logical Coherence: The plan must follow a logical sequence of actions. Each step should clearly lead to the next, and there should be no logical gaps or contradictions.
4. Efficiency: The plan should not include unnecessary or redundant steps. Each step must contribute directly to the final goal.
5. Specificity: The plan must be specific and actionable. Vague or overly broad steps will not be considered relevant.
6. Contextual Consistency: The plan must be consistent with the context provided in the prompt. Any steps that are inconsistent with the background information or constraints will be deemed irrelevant.

Note: To ensure rigorous screening, we aim to reject approximately one-third of the plans that do not meet these stringent criteria.

Example of Irrelevant Plans:
- Irrelevant Content: If the prompt asks for a plan to "design an experiment to test plant photosynthesis," but the plan includes steps like "build a rocket model," this plan will be rejected.
- Incomplete Plan: If the plan omits critical steps, such as "measure the photosynthesis rate," it will be considered irrelevant.
- Illogical Sequence: If the steps in the plan do not follow a logical order, such as "measure plant growth first, then plant the seeds," the plan will be rejected.

> Prompt: {example['prompt']}
> Plan:
>>>
{json.dumps(example['block_plan'], indent=2)}
>>>
> Relevant (YES / NO):
"""

#         filter_prompt = f"""
# You are an expert urban planner and quality controller. Your task is to determine if a city block plan meets the user requirements.
# Given the following user requirements and generated block plan, evaluate if the plan satisfies all requirements.

# User requirements:
# {example['prompt']}

# Generated block plan:
# {json.dumps(example['block_plan'], indent=2)}

# Think step by step:
# 1. Identify all key urban planning requirements specified by the user
# 2. Check if the block plan addresses each requirement
# 3. Verify if special facilities are correctly placed in their designated blocks
# 4. Check for logical arrangement of city zones and facilities
# 5. Verify if there are any contradictions or inconsistencies in the plan

# Answer only with "YES" if the block plan fully satisfies all requirements, or "NO" if it has any deficiencies.
# Your final answer (YES/NO):"""

        logging.info("Filtering block plan")
        return await FilterAgent._evaluate_and_mark_plan(model, example, filter_prompt, semaphore)

    @staticmethod
    async def _evaluate_and_mark_plan(model, example, filter_prompt, semaphore):
        """评估计划并标记有效性"""
        trial = 0
        while trial < 3:
            try:
                async with semaphore:
                    response = await async_call_llm(model, filter_prompt)
                logging.info(f"Filter response: {response}")
                
                # 判断结果是否包含 YES
                is_valid = ("YES" in response.upper() and "NO" not in response.upper().replace("NOT", "").strip()) or ("YES" in response.upper() and response.upper().find("YES") < response.upper().find("NO"))
                
                example['is_valid'] = is_valid
                example['filter_response'] = response
                
                # 记录筛选结果
                if is_valid:
                    logging.info("Plan passed filtering")
                else:
                    logging.warning("Plan failed filtering check")
                
                return example
            
            except Exception as e:
                logging.error(f"Error during plan filtering: {e}")
                trial += 1
                await asyncio.sleep(1 * trial)  # 指数退避
                
        # 如果多次尝试后仍失败，默认标记为无效
        logging.error("Failed to filter plan after multiple attempts")
        example['is_valid'] = False
        example['filter_response'] = "Error during filtering"
        return example