import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
from utils.utils import *
from llm.avior_api import *
from llm.gpt import *
from prompt.ALCUNA_prompt import *
import numpy as np
from tqdm import tqdm
import json
import argparse


instruction_template_dict = {
    "KA": KA_PROMPT,
    "KU": KU_PROMPT,
    "KD": KD_PROMPT,
    "KD_TRUST_OWN": KD_TRUST_OWN_PROMPT,
    "KD_TRUST_HALF": KD_TRUST_HALF_PROMPT,
    "INT": INT_PROMPT,
    "INT_CONTEXT": INT_CONTEXT_PROMPT,
    "CONFLICT": CONFLICT_PROMPT,
    "KU_TRUST_OWN_CHOICE": KU_TRUST_OWN_CHOICE_PROMPT,
    "KU_TRUST_OWN_HALF_CHOICE": KU_TRUST_OWN_HALF_CHOICE_PROMPT,
    "OKF": OUTPUT_OWN_FIRST_PROMPT,
}


def generate_prompt(taxon_info, question, question_type):
    """
    Generate a prompt based on the given taxon information and question.
    
    Args:
    taxon_info (str): The taxon information to be inserted into the prompt.
    question (str): The question to be inserted into the prompt.
    
    Returns:
    str: The formatted prompt.
    """
    
    instruction_template = instruction_template_dict[question_type]
 
    taxon_info_str = json.dumps(taxon_info, indent=2) if isinstance(taxon_info, dict) or  isinstance(taxon_info, list) else str(taxon_info)
    question_str = question if isinstance(question, str) else str(question)
    
    if question_type == "INT" or question_type == "DROPOUT_INT":
        prompt = instruction_template.replace("[Question]", question_str)
    else:
        prompt = instruction_template.replace("[Taxon]", taxon_info_str).replace("[Question]", question_str)

    return prompt


def evaluation(input_file, output_file, question_type, model):
    """
    Process the JSON file to generate prompts and write them to an output file.
    
    Args:
    input_file (str): The path to the input JSON file containing questions and taxon information.
    output_file (str): The path to the output file where prompts will be saved.
    """
    data = read_json_file(input_file)
    acc = []
    
    for index, item in enumerate(tqdm(data[:3200])):
        question = item.get('question')
        taxon_info = item.get('new_knowledge')
        answer = item.get('answers')
        is_acc = 0
        if question_type == "KA":
            answer = answer[0]

        if question and taxon_info:
            prompt = generate_prompt(taxon_info, question, question_type)

            if "gpt" in model:
                result = generate_chatgpt_response(prompt, model)
            else:
                result = chat_completion(prompt, model)
            if question_type == "KA":
                result = result.split("Final Choice:")[-1].strip(".").strip()
            else:
                result = result.split("Final Answer:")[-1].strip(".").strip().lower()

            print(result)
            print(answer)

            try:
                if question_type == "KA":
                    is_acc = (result == answer)
                else:
                    is_acc = 1
                    for ans in answer:
                        if (str(ans).split())[0].strip().lower() not in result:
                            print(ans, result)
                            is_acc = 0
                            break
                    result = eval(result)
                    if (len(result) != len(answer)) and len(result) != 0:
                        is_acc = 0
                        if result[0] == "yes" or result[0] == "no":
                            if result[0] == str(answer[0]).strip().lower():
                                is_acc = 1
            except Exception as e:
                print(e)
                is_acc = 0

            acc.append(is_acc)
            item['result'] = result
            item['acc'] = is_acc
            data[index]=item
            print("current acc: ", np.mean(acc))

        if index % 300 == 0:
            write_json_file(data, output_file)

    write_json_file(data, output_file)


def translate_dropout_attribute_to_question_verification(meta_data, question):
    related_property = meta_data.get('related_property', {})
    related_values = related_property.get('values', [])

    answer = json.dumps(related_values) if isinstance(related_values, list) else str(related_values)
    return question + answer


def translate_triple_to_question_variation(meta_data, old_name):
    related_property = meta_data.get('related_property', {})
    related_values = related_property['new']['values']
    name = related_property['new']['name']

    question = f"Is the {name} of {old_name} {json.dumps(related_values)}?"

    return question


def translate_triple_to_question_dropout(meta_data, old_name):
    related_property = meta_data.get('related_property', {})
    related_values = related_property['values']
    name = related_property['name']

    question = f"Is the {name} of {old_name} {json.dumps(related_values)}?"

    return question

def conflict_knowledge_evaluation(input_file, output_file, new2old_knowledge_file, parent_entity_file, model):
    """
    Process the JSON file to generate prompts and write them to an output file.
    
    Args:
    input_file (str): The path to the input JSON file containing questions and taxon information.
    output_file (str): The path to the output file where prompts will be saved.
    """
    data = read_json_file(input_file)
    new2old_knowledge = read_json_file(new2old_knowledge_file)
    parent_entity = read_json_file(parent_entity_file)
    internal_acc = []
    internal_context_acc = []
    conflict_acc = []
    total_price = 0
    price = 0
    filtered_data = read_json_file("./output/ALCUNA/KD_o1-preview_conflict_0826_old.json")

    for index, item in enumerate(tqdm(data[2082:])):
        question = item.get('question')
        taxon_info = item.get('new_knowledge')
        meta_data = item.get('meta_data')
        answer = item.get('answers')
        conflict_knowledge_acc = 1
        is_internal = 0

        # change new name to parent name
        new_name = taxon_info['name']
        old_name = new2old_knowledge[new_name]

        if meta_data['difference'] == "variation":
            verifi_que = translate_triple_to_question_variation(meta_data, old_name)
        else:
            verifi_que = translate_triple_to_question_dropout(meta_data, old_name)

        prompt = generate_prompt("", verifi_que, "INT") # tax_info for now should be empty (no context to test the internal knowledge)

        if 'o1' in model:
            result, price = generate_o1_response(prompt, model)
        elif "gpt" in model:
            result,price = generate_chatgpt_response(prompt, model)
        else:
            result = chat_completion(prompt, model)

        result = result.split("Final Answer:")[-1].strip(".").strip("!").strip().lower()
        total_price += price
        print("current total price: ", total_price)
        # should so no for the old entity, because the meta data is for the new entity
        if meta_data['difference'] == "variation" and result == "no":
            is_internal = 1
        elif meta_data['difference'] == "dropout" and result == "yes":
            is_internal = 1

        item['internal_acc'] = is_internal
        internal_acc.append(is_internal)

        # if is_internal == 1:
        #     is_context_internal_acc = 0
        #     # check in context internal, only change the prompt (adding taxon info into context)
        #     parent_taxon_info = parent_entity[old_name]
        #     prompt = generate_prompt(parent_taxon_info, verifi_que, "INT_CONTEXT")

        #     if "gpt" in model:
        #         result = generate_chatgpt_response(prompt, model)
        #     else:
        #         result = chat_completion(prompt, model)
            
        #     result = result.split("Final Answer:")[-1].strip(".").strip("!").strip().lower()

        #     # also should say no for the old entity, because the meta data is for the new entity
        #     if meta_data['difference'] == "variation" and result == "no":
        #         is_context_internal_acc = 1
        #     elif meta_data['difference'] == "dropout" and result == "yes":
        #         is_context_internal_acc = 1

        #     internal_context_acc.append(is_context_internal_acc)
        #     item['is_context_internal_acc'] = is_context_internal_acc

        #     # build conflict prompt. do for droput (drop the property) and variation (change the property)
        #     taxon_info['name'] = old_name   # change the name to the parent name to build conflict knowledge
        #     old_entity_question = question.replace(new_name, old_name)
            
        #     # not using the verify question. for we have multiple types of question here to test the vastity of the model.
        #     prompt = generate_prompt(taxon_info, old_entity_question, "KD") # KU_PROMPT is the same as KD_PROMPT

        #     if "gpt" in model:
        #         result = generate_chatgpt_response(prompt, model)
        #     else:
        #         result = chat_completion(prompt, model)
        #     result = result.split("Final Answer:")[-1].strip(".").strip().lower()

        #     print(prompt, result)
        #     print(answer)

        #     try:
        #         conflict_knowledge_acc = 1
        #         for ans in answer:
        #             if (str(ans).split())[0].strip().lower() not in result:
        #                 conflict_knowledge_acc = 0
        #                 break
        #         result = eval(result)
        #         if (len(result) != len(answer)) and len(result) != 0:
        #             conflict_knowledge_acc = 0
        #             if result[0] == "yes" or result[0] == "no":
        #                 if result[0] == str(answer[0]).strip().lower():
        #                     conflict_knowledge_acc = 1
        #         conflict_knowledge_acc = 1 - conflict_knowledge_acc
        #     except Exception as e:
        #         print(e)
        #         conflict_knowledge_acc = 0

        #     conflict_acc.append(conflict_knowledge_acc)
        #     item['conflict_knowledge_acc'] = conflict_knowledge_acc

        if is_internal == 1:
            filtered_data.append(item)

        print("current internal_acc: ", np.mean(internal_acc))        
        # print("current internal_context_acc: ", np.mean(internal_context_acc))        
        # print("current conflict_knowledge_acc: ", np.mean(conflict_acc))        

        if index % 20 == 0:
            write_json_file(filtered_data, output_file)

        if len(filtered_data) > 1799:
            break

    write_json_file(filtered_data, output_file)


def conflict_knowledge_only(input_file, output_file, new2old_knowledge_file, parent_entity_file, model):
    """
    Process the JSON file to generate prompts and write them to an output file.
    
    Args:
    input_file (str): The path to the input JSON file containing questions and taxon information.
    output_file (str): The path to the output file where prompts will be saved.
    """
    data = read_json_file(input_file)
    new2old_knowledge = read_json_file(new2old_knowledge_file)
    parent_entity = read_json_file(parent_entity_file)
    conflict_acc = []

    for index, item in enumerate(tqdm(data[:3200])):
        question = item.get('question')
        taxon_info = item.get('new_knowledge')
        meta_data = item.get('meta_data')
        answer = item.get('answers')
        conflict_knowledge_acc = 1

        # change new name to parent name
        new_name = taxon_info['name']
        if new_name in new2old_knowledge.keys():
            old_name = new2old_knowledge[new_name]
        elif new_name in new2old_knowledge.values():
            old_name = new_name

        if item['internal_acc'] == 1:
            taxon_info['name'] = old_name   # change the name to the parent name to build conflict knowledge
            old_entity_question = question.replace(new_name, old_name)
            
            # not using the verify question. for we have multiple types of question here to test the vastity of the model.
            prompt = generate_prompt(taxon_info, old_entity_question, "KD_TRUST_HALF") # KU_PROMPT is the same as KD_PROMPT

            if "gpt" in model:
                result = generate_chatgpt_response(prompt, model)
            else:
                result = chat_completion(prompt, model)

            result = result.split("Final Answer:")[-1].strip(".").strip().lower()

            print(prompt, result)
            print(answer)

            try:
                conflict_knowledge_acc = 1
                for ans in answer:
                    if (str(ans).split())[0].strip().lower() not in result:
                        conflict_knowledge_acc = 0
                        break
                result = eval(result)
                if (len(result) != len(answer)) and len(result) != 0:
                    conflict_knowledge_acc = 0
                    if result[0] == "yes" or result[0] == "no":
                        if result[0] == str(answer[0]).strip().lower():
                            conflict_knowledge_acc = 1
                conflict_knowledge_acc = 1 - conflict_knowledge_acc
            except Exception as e:
                print(e)
                conflict_knowledge_acc = 0

            conflict_acc.append(conflict_knowledge_acc)
            item['conflict_knowledge_acc_trust_own'] = conflict_knowledge_acc

        data[index] = item      
        print("current conflict_knowledge_acc: ", np.mean(conflict_acc))        

        if index % 100 == 0:
            write_json_file(data, output_file)

    write_json_file(data, output_file)



def conflict_whole_eval(input_file, output_file, new2old_knowledge_file, parent_entity_file, model):
    """
    Process the JSON file to generate prompts and write them to an output file.
    
    Args:
    input_file (str): The path to the input JSON file containing questions and taxon information.
    output_file (str): The path to the output file where prompts will be saved.
    """
    data = read_json_file(input_file)
    new2old_knowledge = read_json_file(new2old_knowledge_file)
    old2new_knowledge = {v: k for k, v in new2old_knowledge.items()}
    conflict_acc_np_list = []
    trust_own_np_list = []
    unknown_np_list = []

    conflict_acc_tokp_list = []
    trust_own_tokp_list = []
    unknown_acc_tokp_list = []

    conflict_acc_okf_list = []
    trust_own_okf_list = []
    unknown_acc_okf_list = []  

    new_data = read_json_file("./output/ALCUNA/w/unk/KD_o1-preview_conflict_whole_old.json")
    question2item = {i['question']:i for i in new_data}
    total_price = 0
    price = 0

    for index, item in enumerate(tqdm(data)):
        question = item.get('question')
        taxon_info = item.get('new_knowledge')
        meta_data = item.get('meta_data')
        answer = item.get('answers')

        if meta_data['difference'] != "variation":
            continue
        print(meta_data['difference'])
        # deduplicate
        if question in question2item.keys():
            item = question2item[question]
            conflict_acc_np_list.append(item['conflict_knowledge_trust_context_rate_np'])
            trust_own_np_list.append(item['conflict_knowledge_trust_own_rate_np'])
            unknown_np_list.append(item['conflict_knowledge_unknown_np'])
            conflict_acc_tokp_list.append(item['conflict_knowledge_trust_context_tokp'])
            trust_own_tokp_list.append(item['conflict_knowledge_trust_own_tokp'])
            unknown_acc_tokp_list.append(item['conflict_knowledge_unknown_tokp'])
            conflict_acc_okf_list.append(item['conflict_knowledge_trust_context_okf'])
            trust_own_okf_list.append(item['conflict_knowledge_trust_own_okf'])
            unknown_acc_okf_list.append(item['conflict_knowledge_unknown_okf'])
            continue

        trust_context_np = 0
        trust_own_np = 0
        unknown_np = 0
        trust_context_tokp = 0
        trust_own_tokp = 0
        unknown_tokp = 0
        
        trust_context_okf = 0
        trust_own_okf = 0
        unknown_okf = 0
        # change new name to parent name
        new_name = taxon_info['name']

        if new_name in new2old_knowledge.keys():
            old_name = new2old_knowledge[new_name]
        elif new_name in old2new_knowledge.keys():
            old_name = new_name
            new_name = old2new_knowledge[old_name]


        if 'internal_acc' in item.keys() and item['internal_acc'] == 1:
            old_entity_question = question.replace(new_name, old_name)

            print(old_entity_question, new_name, old_name)
            print("xxxxxxx")
            taxon_info['name'] = old_name   # change the name to the parent name to build conflict knowledge

            if item['form'] == "boolean":
                question = old_entity_question + "\n0. Yes\n\n1. No\n\n2. Unknown"
            elif item['form'] == "fill-in-blank":
                if meta_data['difference'] == "dropout":
                    if answer[0].lower() == "i don't know":
                        question = old_entity_question + "\n0. " + str(meta_data['related_property']['values']) + "\n\n1. Unknown"
                    else:
                        question = old_entity_question + "\n0. " + str(meta_data['related_property']['values']) + "\n\n1. Unknown"
                elif meta_data['difference'] == "variation":
                    question = old_entity_question + "\n0. " + str(meta_data['related_property']['old']['values']) + "\n\n1. " + str(meta_data['related_property']['new']['values']) + "\n\n2. Unknown"
            elif item['form'] == "multi-choice":
                question = old_entity_question + "\n\n4. Unknown"
           
            item['conflict_question'] = question

            # use KA prompt because all questions are now multi-choice
            prompt = generate_prompt(taxon_info, question, "KA")

            if "o1" in model:
                result,price = generate_o1_response(prompt, model)
            elif "gpt" in model:
                result,price = generate_chatgpt_response(prompt, model)
            else:
                result = chat_completion(prompt, model)

            item['result_unk_np'] = result
            print(prompt, result)
            print(answer)
            total_price += price
            print("current total price: ", total_price)
            result = result.split("Final Choice:")[-1].lower().strip().strip("unknown").strip().strip(".").strip("]").strip("[").strip(".").strip()
            if len(result) > 1:
               result = result[0]
            try:
                # evaluation
                if item['form'] == "boolean":
                    if result == "0":
                        if answer[0].lower() == "yes":
                            trust_context_np = 1
                        elif answer[0].lower() == "no":
                            trust_own_np = 1
                        
                        if meta_data['difference'] == "dropout":
                            trust_own_np = 1

                    elif result == "1":
                        if answer[0].lower() == "no":
                            trust_context_np = 1
                        elif answer[0].lower() == "yes": 
                            trust_own_np = 1

                    elif result == "2":
                        unknown_np = 1
                        if answer[0].lower() == "i don't know":
                            trust_context_np = 1
                elif item['form'] == "fill-in-blank":
                    if meta_data['difference'] == "dropout":
                        if result == "1":
                            unknown_np = 1
                            if answer[0].lower() == "i don't know":
                                trust_context_np = 1
                        elif result == "0":
                            trust_own_np = 1
                    else:
                        if result == "2":
                            unknown_np = 1
                        elif result == "1":
                            trust_context_np = 1 
                        elif result == "0":
                            trust_own_np = 1

                elif item['form'] == "multi-choice":
                    if result == str(answer[0]):
                        trust_context_np = 1
                    if result == "4":
                        unknown_np = 1

            except Exception as e:
                print(e)
                trust_context_np = 0

            conflict_acc_np_list.append(trust_context_np)
            trust_own_np_list.append(trust_own_np)
            unknown_np_list.append(unknown_np)
            item['conflict_knowledge_trust_context_rate_np'] = trust_context_np
            item['conflict_knowledge_trust_own_rate_np'] = trust_own_np
            item['conflict_knowledge_unknown_np'] = unknown_np
    
           # use KU_TRUST_OWN_CHOICE prompt because all questions are now multi-choice
            prompt = generate_prompt(taxon_info, question, "KU_TRUST_OWN_CHOICE")

            if "o1" in model:
                result,price = generate_o1_response(prompt, model)
            elif "gpt" in model:
                result,price = generate_chatgpt_response(prompt, model)
            else:
                result = chat_completion(prompt, model)

            item['result_unk_trust_own'] = result
            print(prompt, result)
            print(answer)
            total_price += price
            print("current total price: ", total_price)

            result = result.split("Final Choice:")[-1].lower().strip().strip("unknown").strip().strip(".").strip("]").strip("[").strip(".").strip()
            if len(result) > 1:
               result = result[0]
            try:
                # evaluation
                if item['form'] == "boolean":
                    if result == "0":
                        if answer[0].lower() == "yes":
                            trust_context_tokp = 1
                        elif answer[0].lower() == "no":
                            trust_own_tokp = 1
                        
                        if meta_data['difference'] == "dropout":
                            trust_own_tokp = 1

                    elif result == "1":
                        if answer[0].lower() == "no":
                            trust_context_tokp = 1
                        elif answer[0].lower() == "yes": 
                            trust_own_tokp = 1

                    elif result == "2":
                        unknown_tokp = 1
                        if answer[0].lower() == "i don't know":
                            trust_context_tokp = 1
                elif item['form'] == "fill-in-blank":
                    if meta_data['difference'] == "dropout":
                        if result == "1":
                            unknown_tokp = 1
                            if answer[0].lower() == "i don't know":
                                trust_context_tokp = 1
                        elif result == "0":
                            trust_own_tokp = 1
                    else:
                        if result == "2":
                            unknown_tokp = 1
                        elif result == "1":
                            trust_context_tokp = 1 
                        elif result == "0":
                            trust_own_tokp = 1

                elif item['form'] == "multi-choice":
                    if result == str(answer[0]):
                        trust_context_tokp = 1
                    if result == "4":
                        unknown_tokp = 1

            except Exception as e:
                print(e)
                trust_context_tokp = 0

            conflict_acc_tokp_list.append(trust_context_tokp)
            trust_own_tokp_list.append(trust_own_tokp)
            unknown_acc_tokp_list.append(unknown_tokp)
            item['conflict_knowledge_trust_context_tokp'] = trust_context_tokp
            item['conflict_knowledge_trust_own_tokp'] = trust_own_tokp
            item['conflict_knowledge_unknown_tokp'] = unknown_tokp     
            
            # own knowledge first prompt  because all questions are now multi-choice
            prompt = generate_prompt(taxon_info, question, "OKF")

            if "o1" in model:
                result,price = generate_o1_response(prompt, model)
            elif "gpt" in model:
                result,price = generate_chatgpt_response(prompt, model)
            else:
                result = chat_completion(prompt, model)

            item['result_okf'] = result
            print(prompt, result)
            print(answer)
            total_price += price
            print("current total price: ", total_price)

            result = result.split("Final Choice:")[-1].lower().strip().strip("unknown").strip().strip(".").strip("]").strip("[").strip(".").strip()
            if len(result) > 1:
               result = result[0]
            try:
                # evaluation
                if item['form'] == "boolean":
                    if result == "0":
                        if answer[0].lower() == "yes":
                            trust_context_okf = 1
                        elif answer[0].lower() == "no":
                            trust_own_okf = 1
                        
                        if meta_data['difference'] == "dropout":
                            trust_own_okf = 1

                    elif result == "1":
                        if answer[0].lower() == "no":
                            trust_context_okf = 1
                        elif answer[0].lower() == "yes": 
                            trust_own_okf = 1

                    elif result == "2":
                        unknown_okf = 1
                        if answer[0].lower() == "i don't know":
                            trust_context_okf = 1
                elif item['form'] == "fill-in-blank":
                    if meta_data['difference'] == "dropout":
                        if result == "1":
                            unknown_okf = 1
                            if answer[0].lower() == "i don't know":
                                trust_context_okf = 1
                        elif result == "0":
                            trust_own_okf = 1
                    else:
                        if result == "2":
                            unknown_okf = 1
                        elif result == "1":
                            trust_context_okf = 1 
                        elif result == "0":
                            trust_own_okf = 1

                elif item['form'] == "multi-choice":
                    if result == str(answer[0]):
                        trust_context_okf = 1
                    elif result == "4":
                        unknown_okf = 1
                    # else:


            except Exception as e:
                print(e)
                trust_context_okf = 0

            conflict_acc_okf_list.append(trust_context_okf)
            trust_own_okf_list.append(trust_own_okf)
            unknown_acc_okf_list.append(unknown_okf)
            item['conflict_knowledge_trust_context_okf'] = trust_context_okf
            item['conflict_knowledge_trust_own_okf'] = trust_own_okf
            item['conflict_knowledge_unknown_okf'] = unknown_okf


            print("current trust context rate neutral prompt: ", np.mean(conflict_acc_np_list))        
            print("current trust own rate neutral prompt: ", np.mean(trust_own_np_list))        
            print("current unknown rate neutral prompt: ", np.mean(unknown_np_list))        
            print("memorization ratio np: ", np.mean(trust_own_np_list)/(np.mean(conflict_acc_np_list) + np.mean(trust_own_np_list)))


            print("current trust own rate trust context knowledge prompt: ", np.mean(conflict_acc_tokp_list))        
            print("current trust own rate trust own knowledge prompt: ", np.mean(trust_own_tokp_list))        
            print("current unknown rate trust own knowledge prompt: ", np.mean(unknown_acc_tokp_list)) 
            print("memorization ratio tokp: ", np.mean(trust_own_tokp_list)/(np.mean(conflict_acc_tokp_list) + np.mean(trust_own_tokp_list)))


            print("current own knowledge first prompt ck: ", np.mean(conflict_acc_okf_list))
            print("current own knowledge first prompt pk: ", np.mean(trust_own_okf_list))
            print("current own knowledge first prompt unknown: ", np.mean(unknown_acc_okf_list))
            print("memorization ratio okf: ", np.mean(trust_own_okf_list)/(np.mean(conflict_acc_okf_list) + np.mean(trust_own_okf_list)))
            
            new_data.append(item)      

        if index % 10 == 0:
            write_json_file(new_data, output_file)

    write_json_file(new_data, output_file)


def conflict_whole_result_eval(input_file, output_file, new2old_knowledge_file, parent_entity_file, model):
    """
    Process the JSON file to generate prompts and write them to an output file.
    
    Args:
    input_file (str): The path to the input JSON file containing questions and taxon information.
    output_file (str): The path to the output file where prompts will be saved.
    """
    data = read_json_file(output_file)
    new2old_knowledge = read_json_file(new2old_knowledge_file)
    old2new_knowledge = {v: k for k, v in new2old_knowledge.items()}
    conflict_acc_np_list = []
    trust_own_np_list = []
    unknown_np_list = []

    conflict_acc_tokp_list = []
    trust_own_tokp_list = []
    unknown_acc_tokp_list = []

    conflict_acc_okf_list = []
    trust_own_okf_list = []
    unknown_acc_okf_list = []  

    new_data = []
    for index, item in enumerate(tqdm(data[:3200])):
        question = item.get('question')
        taxon_info = item.get('new_knowledge')
        meta_data = item.get('meta_data')
        answer = item.get('answers')
        trust_context_np = 0
        trust_own_np = 0
        unknown_np = 0
        trust_context_tokp = 0
        trust_own_tokp = 0
        unknown_tokp = 0
        
        trust_context_okf = 0
        trust_own_okf = 0
        unknown_okf = 0

        if 'internal_acc' in item.keys() and item['internal_acc'] == 1:
            if meta_data['difference'] != "variation":
                continue
            # use KA prompt because all questions are now multi-choice
            result = item['result_unk_np'].split("Final Choice:")[-1].lower().strip().strip("unknown").strip().strip(".").strip("]").strip("[").strip(".").strip()
            if len(result) > 1:
               result = result[0]
            try:
                # evaluation
                if item['form'] == "boolean":
                    if result == "0":
                        if answer[0].lower() == "yes":
                            trust_context_np = 1
                        elif answer[0].lower() == "no":
                            trust_own_np = 1
                        
                        if meta_data['difference'] == "dropout":
                            trust_own_np = 1

                    elif result == "1":
                        if answer[0].lower() == "no":
                            trust_context_np = 1
                        elif answer[0].lower() == "yes": 
                            trust_own_np = 1

                    elif result == "2":
                        unknown_np = 1
                        if answer[0].lower() == "i don't know":
                            trust_context_np = 1
                elif item['form'] == "fill-in-blank":
                    if meta_data['difference'] == "dropout":
                        if result == "1":
                            unknown_np = 1
                            if answer[0].lower() == "i don't know":
                                trust_context_np = 1
                        elif result == "0":
                            trust_own_np = 1
                    else:
                        if result == "2":
                            unknown_np = 1
                        elif result == "1":
                            trust_context_np = 1 
                        elif result == "0":
                            trust_own_np = 1

                elif item['form'] == "multi-choice":
                    if result == str(answer[0]):
                        trust_context_np = 1
                    if result == "4":
                        unknown_np = 1

            except Exception as e:
                print(e)
                trust_context_np = 0

            conflict_acc_np_list.append(trust_context_np)
            trust_own_np_list.append(trust_own_np)
            unknown_np_list.append(unknown_np)
    
           # use KU_TRUST_OWN_CHOICE prompt because all questions are now multi-choice
            result = item['result_unk_trust_own'].split("Final Choice:")[-1].lower().strip().strip("unknown").strip().strip(".").strip("]").strip("[").strip(".").strip()
            if len(result) > 1:
               result = result[0]
            try:
                # evaluation
                if item['form'] == "boolean":
                    if result == "0":
                        if answer[0].lower() == "yes":
                            trust_context_tokp = 1
                        elif answer[0].lower() == "no":
                            trust_own_tokp = 1
                        
                        if meta_data['difference'] == "dropout":
                            trust_own_tokp = 1

                    elif result == "1":
                        if answer[0].lower() == "no":
                            trust_context_tokp = 1
                        elif answer[0].lower() == "yes": 
                            trust_own_tokp = 1

                    elif result == "2":
                        unknown_tokp = 1
                        if answer[0].lower() == "i don't know":
                            trust_context_tokp = 1
                elif item['form'] == "fill-in-blank":
                    if meta_data['difference'] == "dropout":
                        if result == "1":
                            unknown_tokp = 1
                            if answer[0].lower() == "i don't know":
                                trust_context_tokp = 1
                        elif result == "0":
                            trust_own_tokp = 1
                    else:
                        if result == "2":
                            unknown_tokp = 1
                        elif result == "1":
                            trust_context_tokp = 1 
                        elif result == "0":
                            trust_own_tokp = 1

                elif item['form'] == "multi-choice":
                    if result == str(answer[0]):
                        trust_context_tokp = 1
                    if result == "4":
                        unknown_tokp = 1

            except Exception as e:
                print(e)
                trust_context_tokp = 0

            conflict_acc_tokp_list.append(trust_context_tokp)
            trust_own_tokp_list.append(trust_own_tokp)
            unknown_acc_tokp_list.append(unknown_tokp)

            # own knowledge first prompt  because all questions are now multi-choice
            result = item['result_okf'].split("Final Choice:")[-1].lower().strip().strip("unknown").strip().strip(".").strip("]").strip("[").strip(".").strip()
            if len(result) > 1:
               result = result[0]
            try:
                # evaluation
                if item['form'] == "boolean":
                    if result == "0":
                        if answer[0].lower() == "yes":
                            trust_context_okf = 1
                        elif answer[0].lower() == "no":
                            trust_own_okf = 1
                        
                        if meta_data['difference'] == "dropout":
                            trust_own_okf = 1

                    elif result == "1":
                        if answer[0].lower() == "no":
                            trust_context_okf = 1
                        elif answer[0].lower() == "yes": 
                            trust_own_okf = 1

                    elif result == "2":
                        unknown_okf = 1
                        if answer[0].lower() == "i don't know":
                            trust_context_okf = 1
                elif item['form'] == "fill-in-blank":
                    if meta_data['difference'] == "dropout":
                        if result == "1":
                            unknown_okf = 1
                            if answer[0].lower() == "i don't know":
                                trust_context_okf = 1
                        elif result == "0":
                            trust_own_okf = 1
                    else:
                        if result == "2":
                            unknown_okf = 1
                        elif result == "1":
                            trust_context_okf = 1 
                        elif result == "0":
                            trust_own_okf = 1

                elif item['form'] == "multi-choice":
                    if result == str(answer[0]):
                        trust_context_okf = 1
                    elif result == "4":
                        unknown_okf = 1
                    # else:


            except Exception as e:
                print(e)
                trust_context_okf = 0

            conflict_acc_okf_list.append(trust_context_okf)
            trust_own_okf_list.append(trust_own_okf)
            unknown_acc_okf_list.append(unknown_okf)
            print("current trust context rate neutral prompt: ", np.mean(conflict_acc_np_list))        
            print("current trust own rate neutral prompt: ", np.mean(trust_own_np_list))        
            print("current unknown rate neutral prompt: ", np.mean(unknown_np_list))        
            print("memorization ratio np: ", np.mean(trust_own_np_list)/(np.mean(conflict_acc_np_list) + np.mean(trust_own_np_list)))


            print("current trust own rate trust context knowledge prompt: ", np.mean(conflict_acc_tokp_list))        
            print("current trust own rate trust own knowledge prompt: ", np.mean(trust_own_tokp_list))        
            print("current unknown rate trust own knowledge prompt: ", np.mean(unknown_acc_tokp_list)) 
            print("memorization ratio tokp: ", np.mean(trust_own_tokp_list)/(np.mean(conflict_acc_tokp_list) + np.mean(trust_own_tokp_list)))


            print("current own knowledge first prompt ck: ", np.mean(conflict_acc_okf_list))
            print("current own knowledge first prompt pk: ", np.mean(trust_own_okf_list))
            print("current own knowledge first prompt unknown: ", np.mean(unknown_acc_okf_list))
            print("memorization ratio okf: ", np.mean(trust_own_okf_list)/(np.mean(conflict_acc_okf_list) + np.mean(trust_own_okf_list)))
            
            print("length of data: ", len(trust_own_okf_list))


def conflict_okf_eval(input_file, output_file, new2old_knowledge_file, parent_entity_file, model):
    """
    Process the JSON file to generate prompts and write them to an output file.
    
    Args:
    input_file (str): The path to the input JSON file containing questions and taxon information.
    output_file (str): The path to the output file where prompts will be saved.
    """
    data = read_json_file(input_file)
    new2old_knowledge = read_json_file(new2old_knowledge_file)
    old2new_knowledge = {v: k for k, v in new2old_knowledge.items()}
    conflict_acc_np_list = []
    trust_own_np_list = []
    unknown_np_list = []

    conflict_acc_tokp_list = []
    trust_own_tokp_list = []
    unknown_acc_tokp_list = []

    conflict_acc_okf_list = []
    trust_own_okf_list = []
    unknown_acc_okf_list = []

    new_data = []
    for index, item in enumerate(tqdm(data[:3200])):
        question = item.get('question')
        taxon_info = item.get('new_knowledge')
        meta_data = item.get('meta_data')
        answer = item.get('answers')
        trust_context_okf = 0
        trust_own_okf = 0
        unknown_okf = 0
        # change new name to parent name
        new_name = taxon_info['name']

        if new_name in new2old_knowledge.keys():
            old_name = new2old_knowledge[new_name]
        elif new_name in old2new_knowledge.keys():
            old_name = new_name
            new_name = old2new_knowledge[old_name]

        if 'internal_acc' in item.keys() and item['internal_acc'] == 1:
            old_entity_question = question.replace(new_name, old_name)

            print(old_entity_question, new_name, old_name)
            print("xxxxxxx")
            taxon_info['name'] = old_name   # change the name to the parent name to build conflict knowledge

            if item['form'] == "boolean":
                question = old_entity_question + "\n0. Yes\n\n1. No\n\n2. Unknown"
            elif item['form'] == "fill-in-blank":
                if meta_data['difference'] == "dropout":
                    if answer[0].lower() == "i don't know":
                        question = old_entity_question + "\n0. " + str(meta_data['related_property']['values']) + "\n\n1. Unknown"
                    else:
                        question = old_entity_question + "\n0. " + str(meta_data['related_property']['values']) + "\n\n1. Unknown"
                elif meta_data['difference'] == "variation":
                    question = old_entity_question + "\n0. " + str(meta_data['related_property']['old']['values']) + "\n\n1. " + str(meta_data['related_property']['new']['values']) + "\n\n2. Unknown"
            elif item['form'] == "multi-choice":
                question = old_entity_question + "\n\n4. Unknown"

            item['conflict_question'] = question
            # use KA prompt because all questions are now multi-choice
            conflict_acc_np_list.append(item['conflict_knowledge_trust_context_rate_np'])
            trust_own_np_list.append(item['conflict_knowledge_trust_own_rate_np'])
            unknown_np_list.append(item['conflict_knowledge_unknown_np'])

            print("current trust context rate neutral prompt: ", np.mean(conflict_acc_np_list))        
            print("current trust own rate neutral prompt: ", np.mean(trust_own_np_list))        
            print("current unknown rate neutral prompt: ", np.mean(unknown_np_list))        
            print("memorization ratio np: ", np.mean(trust_own_np_list)/(np.mean(conflict_acc_np_list) + np.mean(trust_own_np_list)))
            
            # use KU_TRUST_OWN_CHOICE prompt because all questions are now multi-choice
            conflict_acc_tokp_list.append(item['conflict_knowledge_trust_context_tokp'])
            trust_own_tokp_list.append(item['conflict_knowledge_trust_own_tokp'])
            unknown_acc_tokp_list.append(item['conflict_knowledge_unknown_tokp'])
            print("current trust own rate trust context knowledge prompt: ", np.mean(conflict_acc_tokp_list))        
            print("current trust own rate trust own knowledge prompt: ", np.mean(trust_own_tokp_list))        
            print("current unknown rate trust own knowledge prompt: ", np.mean(unknown_acc_tokp_list)) 
            print("memorization ratio tokp: ", np.mean(trust_own_tokp_list)/(np.mean(conflict_acc_tokp_list) + np.mean(trust_own_tokp_list)))

            # own knowledge first prompt  because all questions are now multi-choice
            prompt = generate_prompt(taxon_info, question, "OKF")

            if "gpt" in model:
                result = generate_chatgpt_response(prompt, model)
            else:
                result = chat_completion(prompt, model)

            item['result_okf'] = result
            print(prompt, result)
            print(answer)
            result = result.split("Final Choice:")[-1].lower().strip().strip("unknown").strip().strip(".").strip("]").strip("[").strip(".").strip()

            try:
                # evaluation
                if item['form'] == "boolean":
                    if result == "0":
                        if answer[0].lower() == "yes":
                            trust_context_okf = 1
                        elif answer[0].lower() == "no":
                            trust_own_okf = 1
                        
                        if meta_data['difference'] == "dropout":
                            trust_own_okf = 1

                    elif result == "1":
                        if answer[0].lower() == "no":
                            trust_context_okf = 1
                        elif answer[0].lower() == "yes": 
                            trust_own_okf = 1

                    elif result == "2":
                        unknown_okf = 1
                        if answer[0].lower() == "i don't know":
                            trust_context_okf = 1
                elif item['form'] == "fill-in-blank":
                    if meta_data['difference'] == "dropout":
                        if result == "1":
                            unknown_okf = 1
                            if answer[0].lower() == "i don't know":
                                trust_context_okf = 1
                        elif result == "0":
                            trust_own_okf = 1
                    else:
                        if result == "2":
                            unknown_okf = 1
                        elif result == "1":
                            trust_context_okf = 1 
                        elif result == "0":
                            trust_own_okf = 1

                elif item['form'] == "multi-choice":
                    if result == str(answer[0]):
                        trust_context_okf = 1
                    if result == "4":
                        unknown_okf = 1

            except Exception as e:
                print(e)
                trust_context_okf = 0

            conflict_acc_okf_list.append(trust_context_okf)
            trust_own_okf_list.append(trust_own_okf)
            unknown_acc_okf_list.append(unknown_okf)
            item['conflict_knowledge_trust_context_okf'] = trust_context_okf
            item['conflict_knowledge_trust_own_okf'] = trust_own_okf
            item['conflict_knowledge_unknown_okf'] = unknown_okf

            print("current own knowledge first prompt ck: ", np.mean(conflict_acc_okf_list))
            print("current own knowledge first prompt pk: ", np.mean(trust_own_okf_list))
            print("current own knowledge first prompt unknown: ", np.mean(unknown_acc_okf_list))
            print("memorization ratio okf: ", np.mean(trust_own_okf_list)/(np.mean(conflict_acc_okf_list) + np.mean(trust_own_okf_list)))
            
            new_data.append(item)      

        if index % 100 == 0:
            write_json_file(new_data, output_file)

    write_json_file(new_data, output_file)


def parse_args():
    parser = argparse.ArgumentParser(description="Example program to parse arguments.")
    parser.add_argument('--model', default='o1-preview', type=str, help="gpt-4o-mini or LLAMA_3_70B or Qwen_2_7B")
    parser.add_argument('--question_type', type=str, default="KD", help="KA, KU, KD")
    return parser.parse_args()

args = parse_args()
input_file = f'./data/ALCUNA/questions_{args.question_type}.json'
output_file = f'./output/ALCUNA/{args.question_type}_{args.model}.json'
parent_entity_file = f'./data/ALCUNA/parent_entities.json'

new2old_file = f'./data/ALCUNA/new2old_nms.json'

conflict_knowledge_evaluation(input_file, output_file, new2old_file, parent_entity_file, args.model)
conflict_whole_eval(input_file, output_file, new2old_file, parent_entity_file, args.model)