import json
import math
import os
import base64
import io

import numpy as np
import re

from android_world.agents.BoN.utils import image_url, parallel_call_gpt
from android_world.agents.BoN.Tool.qwen_api import parallel_call_gpt_qwen
from android_world.agents.BoN.vllm_api import parallel_call_vllm

from android_world.agents.BoN.dynmaic_memory_2 import dynamic_memory
from android_world.agents.BoN.eval_prompt import Tool_system_prompt

from android_world.agents.BoN.Tool.tool_router import tool_router
from android_world.agents import m3a_utils
from PIL import Image


import requests, json, time
from concurrent.futures import ThreadPoolExecutor, as_completed
import json


def array_to_jpeg_bytes(image: np.ndarray) -> bytes:
  """Converts a numpy array into a byte string for a JPEG image."""
  image = Image.fromarray(image)
  return image_to_jpeg_bytes(image)

def image_to_jpeg_bytes(image: Image.Image) -> bytes:
  in_mem_file = io.BytesIO()
  image.save(in_mem_file, format='JPEG')
  # Reset file pointer to start
  in_mem_file.seek(0)
  img_bytes = in_mem_file.read()
  return img_bytes

def encode_image(image: np.ndarray) -> str:
    return base64.b64encode(array_to_jpeg_bytes(image)).decode('utf-8')

def select_the_best(BoN_num, responses_list, action_prompt, tool_prompt, image_list, Tool, goal, history, ui_elements, additional_guidelines, previous_action_score,previous_action):
    '''
    BoN_num: int
    responses_list: list of (BoN_num) responses for a step
    action_prompt: str for user text prompt
    image_list: list of images, [raw_screen, before_screenshot]
    step: int for step
    '''

    debug_tool_conversation_log = []

    Memory = True
    dynamic_action_prompt = ""
    dynamic_history = ""
    base64_image = encode_image(image_list[0])
    base64_image_som = encode_image(image_list[1])


    if Tool:

        Memory = True
        dynamic_action_prompt = ""

        base64_image = encode_image(image_list[0])
        base64_image_som = encode_image(image_list[1])
        if Memory:
            dynamic_action_prompt, dynamic_history = dynamic_memory(goal, history, ui_elements, base64_image, additional_guidelines)
            # breakpoint()
            action_prompt = dynamic_action_prompt



        all_evaluation_payloads = []

        T1 = time.time()
        tool_result, debug_tool_conversation_log = tool_router(tool_prompt, image_list[0], debug=True)
        T2 = time.time()
        print('UI Tool excution time :%s s' % ((T2 - T1)))
        
        '''
        tool_result = {
            "tool_chain": tool_chain_history,
            "tool_chain_filter": tool_chain_filter,
            "final_image": pil_to_base64(current_image),
            "image_is_edited": Bool
        }
        '''

        tool_information_full = """The information below is tool-using information which which will help you better score them in the PRM stage. \nRelevant tool introudce: \n- **Point**: Identifies a specific point in the image based on a description and returns coordinates. Use when you need to locate a specific point for UI Icon or Text. Then use a red five-pointed star to highlight the coordinates in the image. Example: `{"name": "Point", "arguments": {"image": "img_1", "param": "Icon 'Gmail'"}}` \n - **omni_parser**: Parses a UI or general image to detect, label, and locate all interactive and non-interactive elements like icons, buttons, and text fields. Use this as a primary tool to get a complete overview of all components on the screen, especially for UI-related tasks. Example: `{"name": "omni_parser", "arguments": {"image": "img_1"}}`.\n\n""" + f"""TOOL_INFORMATION: {tool_result.get('tool_chain_filter')}""" + """Based on the tool information, you need to carefully analyze and compare the correctness of the candidate answers' formats, and give a more granular score based on how helpful they are in completing the task."""

        # tool_information_omni = """The information below is tool-using information which which will help you better score them in the PRM stage. \nRelevant tool introudce: \n- **Point**: Identifies a specific point in the image based on a description and returns coordinates. Use when you need to locate a specific point for UI Icon or Text. Then use a red five-pointed star to highlight the coordinates in the image. Example: `{"name": "Point", "arguments": {"image": "img_1", "param": "Icon 'Gmail'"}}` \n""" + f"""TOOL_INFORMATION: {tool_result.get('tool_chain_filter')}""" + """Based on the tool information, you need to carefully analyze and compare the correctness of the candidate answers' formats, and give a more granular score based on how helpful they are in completing the task."""

        # tool_information_point = """The information below is tool-using information which which will help you better score them in the PRM stage. \nRelevant tool introudce: \n- **Point**: Identifies a specific point in the image based on a description and returns coordinates. Use when you need to locate a specific point for UI Icon or Text. Then use a red five-pointed star to highlight the coordinates in the image. Example: `{"name": "Point", "arguments": {"image": "img_1", "param": "Icon 'Gmail'"}}`.\n\n""" + f"""TOOL_INFORMATION: {tool_result.get('tool_chain_filter')}""" + """Based on the tool information, you need to carefully analyze and compare the correctness of the candidate answers' formats, and give a more granular score based on how helpful they are in completing the task."""

        edit_img_num = len(tool_result["final_images"])
        print("edit_img:",tool_result.get('image_is_edited'))
        print("edit_img_num:",edit_img_num)

        for i in range(BoN_num):
            candidate_response = responses_list[i]
            action = candidate_response
            # reason, action = m3a_utils.parse_reason_action_output(candidate_response)
            if previous_action_score is None:
                previous = "No previous steps yet."
            else:
                previous = f"""Action:{previous_action},Score:{previous_action_score}"""
            
            system_prompt = Tool_system_prompt


            user_prompt_text = f"""Please evaluate the following candidate action based on the user's instruction and the provided screen image, following all guidelines from the system prompt.

    User's Instruction:
    {action_prompt}

    Candidate Action to Evaluate:
    {action}

    Please complete a granular scoring for the current step based on the previous steps and scores. Here's the last action and its score :
    {previous}""" + """Your evaluation should be a JSON object with "score" and "original_step", wrapped in <eval></eval> tags.For example:\nExample 1:The user wants to click the '+' button ,the action is as expected. So a score of 10 is given.<eval>\n{"score": 10, "original_step": "Reason: I need to add a new contact. The \'create contact\' option is visible and clickable, allowing me to proceed with entering the required details.\nAction: {"action_type": "click", "index": 0}."}\n</eval> """
            
            messages = [
                {'role': 'system', 'content': system_prompt}, 
                {
                    'role': 'user',
                    'content': [
                        {'type': 'text', 'text': user_prompt_text + tool_information_full},
                        {
                            'type': 'image_url',
                            'image_url': {
                                'url': f'data:image/jpeg;base64,{base64_image}',
                            },
                        },
                    ],
                }
            ]
            #----------------- add image-----------------------------
            if tool_result.get('image_is_edited'):
                edit_img_num = len(tool_result["final_images"])

                for j in range(edit_img_num):

                    tool_image = {
                        'type': 'image_url',
                        'image_url': {
                            'url': f'data:image/jpeg;base64,{tool_result["final_images"][j]}',
                        },
                    }

                    messages[1]['content'].append(tool_image)


            all_evaluation_payloads.append(messages)


    else:
        all_evaluation_payloads = []


        system_prompt ="""You are a **Process Reward Model**. Your task is to evaluate a single **candidate action step** based on a **user's instruction** and **provided screen image**.

**Evaluation Process:**
1.  **Understand the Goal:** Carefully review the user's instruction and the current screen image.
2.  **Determine Your Optimal Action:** Based on the instruction and image, decide what you believe is the best possible action step.
3.  **Evaluate the Candidate Action:** Compare the provided candidate action step against your optimal action. 
4.  **Assign a Score:** Assign a numerical score to the candidate action from 0 to 100. If the candidate action is correct and has a correct reasoning process, a higher score should be given.
**Output Format:**
Your output **must be a single JSON object** containing a `"score"` (as a number or string, e.g., 5 or "5") and the `"original_step"` (the exact text of the candidate action you evaluated).
Enclose your entire JSON output within `\n<eval></eval>\n` XML tags.

Example:
Your Output:
According to the data provided, we need to open the corresponding software, so the corresponding operations are as follows.{"action_type": "open_app", "app_name": "Phone"}. The step provided is consistent with the answer to the analysis, so 5 points are given. The output is as follows.
<eval>
{"score": 96, "original_step": "Reason: The task is to create a new contact for Hugo Pereira. Currently, the home screen is displayed, and I need to access the Contacts app to add a new contact. {"action_type": "open_app", "app_name": "Phone"}."}
</eval>
""" 

        for i in range(BoN_num):
            candidate_response = responses_list[i]
            user_prompt_text = f"""Please evaluate the following candidate action based on the user's instruction and the provided screen image, following all guidelines from the system prompt.Do not use unescaped double or single quotes in original_step.

    User's Instruction:
    {action_prompt}

    Candidate Action to Evaluate:
    {candidate_response}

    Your evaluation should be a JSON object with "score" and "original_step", wrapped in <eval></eval> tags.
    """
            messages = [
                {'role': 'system', 'content': system_prompt}, # No need for list here, direct string
                {
                    'role': 'user',
                    'content': [
                        {'type': 'text', 'text': user_prompt_text},
                        {
                            'type': 'image_url',
                            'image_url': {
                                'url': f'data:image/jpeg;base64,{base64_image}',
                            },
                        }
                    ],
                }
            ]
            # breakpoint()
            all_evaluation_payloads.append(messages)
    # breakpoint()
    prm = "local" # "local"
    if prm == "gpt":
        model = "gpt-4o-mini" #"gpt-4o-2024-08-06"
        temperature = 0.3
        max_tokens = 3000 
        max_processes = 2
        api_base = ""
        api_key = ""
        llm_outputs_raw = parallel_call_gpt(
            all_evaluation_payloads, 
            model, 
            temperature,
            max_tokens,
            max_processes,
            api_base,
            api_key
        )
    else:
        model = "qwen2.5-vl-72b-instruct"
        temperature = 0.0
        max_tokens = 1200 
        max_processes = 4

        T1 = time.time()
        llm_outputs_raw = parallel_call_vllm(
            all_evaluation_payloads,
            model=model,                 
            temperature=temperature,
            max_tokens=max_tokens,
            max_workers=max_processes,   
            port="8005",                 
        )
        T2 = time.time()
        print('BoN Selecting Time :%s s' % ((T2 - T1)))

    
    if not any(llm_outputs_raw):

        raise ValueError("LLM 调用失败，返回的结果列表全为空。请检查模型调用、API配置或网络连接。")
    
    scored_responses = []
    
    for idx, raw_text in enumerate(llm_outputs_raw):
        eval_data = None
        # === 第一步：尝试常规提取 ===
        try:
            start = raw_text.find("<eval>")
            end = raw_text.rfind("</eval>")
            if start != -1 and end != -1 and start < end:
                # 截取标签中间的 JSON
                candidate = raw_text[start + len("<eval>"):end].strip()
                eval_data = json.loads(candidate)
            else:
                raise ValueError("没有找到 <eval> 标签")
        except (ValueError, json.JSONDecodeError) as e:
            # print(f"No.{idx}：常规JSON解析失败 - {e}。")
            # print("")
            pass

        # === 第二步：如果第一步失败，尝试直接从原始文本中提取分数（优先兜底） ===
        if eval_data is None:
            # print(f"No.{idx}：尝试直接从原始文本中提取分数。")
            score = None
            # 优先匹配 "score": X 或 'score': X 的格式，更精确
            match = re.search(r'["\']score["\']\s*:\s*(\d+)', raw_text)
            if match:
                score_candidate = int(match.group(1))
                if 0 <= score_candidate <= 100:
                    score = score_candidate
            
            # 如果精确匹配失败，则在整个文本中寻找 0-10 之间的数字
            if score is None:
                nums = re.findall(r'\d+', raw_text)
                score_candidate = next((int(n) for n in nums if 0 <= int(n) <= 100), None)
                if score_candidate is not None:
                    score = score_candidate
            
            if score is not None:
                eval_data = {"score": score}
                # print(f"No.{idx}：成功提取到分数：{score}。")


        # === 第三步：如果以上步骤全部失败，调用 LLM 进行修复 ===
        if eval_data is None:
            print(f"No.{idx}：直接提取分数失败，改用 LLM 修复。")
            # 构建 LLM 的 prompt
            parsing_system_prompt = """You are an expert data parsing assistant. Your sole task is to accurately extract the clean, valid JSON object e from the user's provided text. You must output only the JSON object itself, without any additional text, explanations, or the surrounding tags.\nIf you encounter an error in the JSON format, please correct it and return.\n
        For example:\n {\n  "score": 8,\n  "original_step": "Reason: I need to locate and open the File Manager app to access the 'task.html' file in the Downloads folder.\\nAction: {\\"action_type\\": \\"open_app\\", \\"app_name\\": \\"File Manager\\"}"\n}  \n\n
        The task you need to extract and correct:\n
        """
            user_input = """The task you need to extract and correct:""" + "\n" + raw_text

            fixed_json = parallel_call_gpt(
                [[{"role": "system", "content": parsing_system_prompt},{"role": "user", "content": user_input}]],
                model="gpt-4.1-mini",
                temperature=0.0,
                max_tokens=200,
                max_processes=1
            )[0]
            
            # --- Mocking LLM call for demonstration ---
  
            print(f"No.{idx}: (模拟) LLM正在尝试修复...")
            fixed_json = '{"score": 0, "original_step": "LLM_REPAIR_FAILED_OR_NO_INFO"}'
            # --- End Mocking ---

            try:
                eval_data = json.loads(fixed_json)
            except json.JSONDecodeError:
                # 最后兜底：只提取 score
                print(f"No.{idx}：LLM 修复后依然无法解析，尝试从 LLM 输出中提取数字。")
                nums = re.findall(r'\d+', fixed_json)
                score = next((int(n) for n in nums if 0 <= int(n) <= 100), 0) # 找不到则默认为 0
                eval_data = {"score": score}

        # === 提取结果并存储 ===
        # breakpoint()
        score = int(eval_data.get("score", 0))
        original = eval_data.get("original_step", responses_list[idx])
        scored_responses.append({
            "response": original,
            "score": score,
            "index": idx
        })

    print("Scored Responses:",scored_responses)

     # 获取分数最大的回复的索引
    if scored_responses:
        best_response = max(scored_responses, key=lambda x: x['score'])
        best_index = best_response['index']
        # breakpoint()
        return best_index, llm_outputs_raw, scored_responses, debug_tool_conversation_log,dynamic_history

    else:
        return None  # 如果没有有效的评分结果，则返回None


if __name__ == "__main__":
    BoN_num = 2
    responses_list = []
    action_prompt = ""
    image_list = []
    step = 1

    select_the_best(BoN_num, responses_list, action_prompt, image_list, step)
