import json
from PIL import Image
from openai import OpenAI
with open("env.json",'r') as f:
    api_keys = json.load(f)

client_s = OpenAI(
    base_url="https://yunwu.ai/v1",
    api_key=api_keys['API_DEFAULT'],
    timeout=120
)



def split_into_sentences_llm(text,client,split_model_name,bot,eot):
    prompt = [{"role":"user","content":f"""
Next I will give you a reasoning rationale, you will help me to split the rationale into a sequence of full sentences. You must follow the format below:
"<sentence>Sentence 1</sentence>\n<sentence>Sentence 2</sentence>\n..."
Note:
    1. The line break character "\n" that follows the end of one sentence and precedes the start of the next sentence shall be attached to the latter sentence. DO NOT IGNORE THE LINE BREAK.
    2. Please only output the segmentation results. Do not include any extra output or delete any output, and do not modify any content in the original rationale.
    3. Colons do not split.
    4. When the reasoning (rationale) is listing four options, the options themselves should not be split by the newline character.
Now split the following rationale into <sentence></sentence>:
"{text}"
    """}]
    prompt2 = [{"role":"user","content":f"""
Next I will give you a reasoning rationale, you will help me to split the rationale into a sequence of full sentences. You must follow the format below:
"<sentence>Sentence 1</sentence>\n<sentence>Sentence 2</sentence>\n..."
Note:
    1. The line break character "\n" that follows the end of one sentence and precedes the start of the next sentence shall be attached to the latter sentence. DO NOT IGNORE THE LINE BREAK.
    2. Please only output the segmentation results. Do not include any extra output or delete any output, and do not modify any content in the original rationale.
    3. Colons do not split.
    4. When the reasoning (rationale) is listing four options, the options themselves should not be split by the newline character.
    5. Do not split colon.
Follow the example below STRICTLY:
Input: "Think first: The image shows a guitar, but it's not immediately clear how many strings it has. The strings are not visible in the image, so we cannot determine the number of strings from this perspective alone.\nDiscussion: To accurately answer this question, we would need to see the strings clearly or have additional information about the type of guitar shown. Since the strings are not visible, we cannot confidently state whether there are 6, 4, or 5 strings.\nSummary: The final answer is <answer>D</answer>."
Your output:
"
<sentence>Think first: The image shows a guitar, but it's not immediately clear how many strings it has. The strings are not visible in the image, so we cannot determine the number of strings from this perspective alone.</sentence> <sentence>\nDisscussion: To accurately answer this question, we would need to see the strings clearly or have additional information about the type of guitar shown.</sentence> <sentence>Since the strings are not visible, we cannot confidently state whether there are 6, 4, or 5 strings.</sentence> <sentence>\n\nSummary: The final answer is <answer>D</answer>.</sentence>
"

Now split the following rationale into <sentence></sentence>:
"{text}"
    """}]

    prompt3 = [{"role": "user", "content": f"""
    Next I will give you a reasoning rationale, you will help me to split the rationale into a sequence of full sentences. You must follow the format below:
    "<sentence>Sentence 1</sentence>\n<sentence>Sentence 2</sentence>\n..."
    Note:
        1. The line break character "\n" that follows the end of one sentence and precedes the start of the next sentence shall be attached to the latter sentence. DO NOT IGNORE THE LINE BREAK.
        2. Please only output the segmentation results. Do not include any extra output. However, you should delete all text after {eot}.
        3. Colons do not split.
        4. When the reasoning (rationale) is listing four options, the options themselves should not be split by the newline character.
        5. Do not split colon.
        6. The thought process, i.e., the content between {bot} and {eot}, should be segmented into multiple parts based on meaning. Each segment should maintain appropriate length (neither overly long nor short) and internal semantic coherence. The tag {eot} must be placed at the beginning of the first sentence, while {eot} must be placed at the end of a sentence. (If {eot} not exist, you just need to care about {bot}).
    Follow the example below STRICTLY:
    Input: "{bot}\nGot it, let's look at the image. The nearest traffic light is the one hanging from the pole. It's showing blue? Wait, no, traffic lights are usually red, yellow, green. Wait, the image: the traffic light is blue? Wait, no, maybe it's a blue light. Wait, the options: A is Blue. Let's check. The traffic light in the image is a blue light. So the answer is A.\n{eot}\n\n\\A"
    Your output:
    "
    <sentence>{bot}\nGot it, let's look at the image. The nearest traffic light is the one hanging from the pole.</sentence> <sentence>It's showing blue? Wait, no, traffic lights are usually red, yellow, green. Wait, the image: the traffic light is blue? Wait, no, maybe it's a blue light.</sentence> <sentence>Wait, the options: A is Blue. Let's check. The traffic light in the image is a blue light.</sentence> <sentence>So the answer is A.\n{eot}</sentence>
    "

    Now split the following rationale into <sentence></sentence>:
    "{text}"
        """}]
    import re
    # print(text)
    # response = client_s.chat.completions.create(
    #     messages=prompt2,
    #     temperature=0,
    #     seed=42,
    #     max_tokens=10000,
    #     model="gpt-5-nano-2025-08-07",
    #     reasoning_effort="low",
    # )
    response = client_s.chat.completions.create(
            messages=prompt3,
            temperature=0,
            seed=42,
            max_tokens=10000,
            model="gpt-5-nano-2025-08-07",
            reasoning_effort="low",
        )
    # response = client.chat.completions.create(
    #     messages=prompt2,
    #     temperature=0,
    #     seed=42,
    #     max_tokens=10000,
    #     model=split_model_name
    # )
    response = response.choices[0].message.content
    # print('###',response)
    pattern = r'<sentence>(.*?)</sentence>'
    sentences = re.findall(pattern, response, re.DOTALL)
    # base = sentences[0]
    base = ""
    probe_prompt = []
    for idx, sentence in enumerate(sentences[:-1]):
        base += sentence
        probe_prompt.append(base)
    # print(sentences)
    return probe_prompt
def split_into_sentences(text):
    import re
    protected_text = re.sub(r'(\n\s*\d+)\.', r'\1__PROTECTED_DOT__', text)
    pattern = r'''
        (?<=[!?。])                
        |                     
        (?<!\d)               
        (?<!\.\d)                 
        (?<=\.)                    
        (?!\.?\d)                  
    '''
    sentences = re.split(pattern, protected_text, flags=re.VERBOSE)
    sentences = [s.replace('__PROTECTED_DOT__', '.') for s in sentences if s.strip()]
    base = ""
    probe_prompt = []
    for idx,sentence in enumerate(sentences[:-1]):
        base += sentence
        probe_prompt.append(base)
    return probe_prompt

def get_probe_prompt(probe_text,
                     source_response,
                     source_prompt,
                     llm_split=False,
                     client=None,
                     split_model_name=None):
    if llm_split:
        split_sentences = split_into_sentences_llm(source_response, client, split_model_name)
    else:
        split_sentences = split_into_sentences(source_response)
    added_prompt = [sentence + probe_text for sentence in split_sentences]
    # print(split_sentences)
    # print(tpye(source_prompt))
    return [source_prompt + [
        {
            "role":"assistant",
            "content": [
                {
                    "type" : "text",
                    "text" : added
                }
            ]
        }
    ] for added in added_prompt]

def get_probe_prompt_v2(probe_text,
                     source_response,
                     source_prompt,
                     bot,
                     eot,
                     llm_split=False,
                     client=None,
                     split_model_name=None):
    placeholder = "<image>"
    if llm_split:
        split_sentences = split_into_sentences_llm(source_response, client, split_model_name, bot, eot)
    else:
        split_sentences = split_into_sentences(source_response)
    added_suffix = [sentence + probe_text for sentence in split_sentences]
    return [source_prompt + add for add in added_suffix], added_suffix

def get_multimodal_probe_inputs(probe_text,
                          source_response,
                          source_prompt,
                          image_path,
                          bot,
                          eot,
                          llm_split=False,
                          client=None,
                          split_model_name=None):
    probe_prompts, added_suffix = get_probe_prompt_v2(probe_text,
                                        source_response,
                                        source_prompt,
                                        bot,
                                        eot,
                                        llm_split,
                                        client,
                                        split_model_name)
    inputs = [{
        "prompt": probe_prompt,
        "multi_modal_data":{
            "image": Image.open(image_path)
        }
    } for probe_prompt in probe_prompts]
    return inputs, added_suffix

def merge_json(json1,json2,target):
    with open(json1,'r',encoding='utf-8') as f:
        j1 = json.load(f)
    with open(json2,'r',encoding='utf-8') as f:
        j2 = json.load(f)
    j = j1 + j2
    # print(len(j))
    with open(target,'w',encoding='utf-8') as f:
        json.dump(j,f,ensure_ascii=False,indent=2)

if __name__ == "__main__":
    # l = split_into_sentences("To answer this question, let's analyze the image step by step:\n\n1. Identify the Left Palm: The girl in the image has her arms extended outward, and her palms are facing the camera. The left palm is clearly visible.\n\n2. Count the Fingers: Upon close observation of the left palm, all five fingers (thumb, index finger, middle finger, ring finger, and little finger) are visible and distinct.\n\n3. Verify the Count: There are no overlapping fingers or any indication of missing fingers. All five fingers appear normal and complete.\n\nGiven this analysis, the number of fingers on the left palm of the girl in the picture is 5.\n\nThe final answer is <answer>A</answer>.")
    # for ll in l:
    #     print(f'######\n{ll}')
    merge_json("experiment3/internvl3-38b_probe2_0819.json","experiment3/internvl3-38b_probe2_0819.json","./experiment3/internvl3-38b_0819_probe_sum.json")
#     print(split_into_sentences_llm("""To determine the color of the nearest traffic light in the picture, let's analyze the image step by step:
#
# 1. Identify the Traffic Light: The nearest traffic light is clearly visible in the foreground, hanging from a pole above the street.
#
# 2. Examine the Lights: The traffic light has three circular lights stacked vertically. In this case, all three lights are illuminated in blue.
#
# 3. Interpret the Color: Typically, traffic lights use red, yellow (or amber), and green to indicate stop, caution, and go, respectively. However, in this image, the lights are all blue, which is not standard for traffic signals.
#
# 4. Consider the Context: Since the lights are all blue and there is no indication of a special situation (such as a malfunction or a non-standard traffic signal), we must conclude that the image shows the traffic light in a non-standard configuration.
#
# Given the options provided:
# - A. Blue: This matches the color of the lights in the image.
# - B. Red: The lights are not red.
# - C. Green: The lights are not green.
# - D. The correct answer is not listed: While the blue color is unusual for a traffic light, it is explicitly shown in the image.
#
# Since the image clearly shows the traffic light with blue lights, the most accurate choice is "Blue."
#
# Summarize: The final answer is <answer>A</answer>.
#     """))