import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
from utils.utils import *
from llm.avior_api import *
from llm.gpt import *
from prompt.ALCUNA_prompt import *
import numpy as np
from tqdm import tqdm
import random

instruction_template_dict = {
    "KA": KA_PROMPT,
    "KU": KU_PROMPT,
    "KD": KD_PROMPT,
    "INT": INT_PROMPT,
    "INT_CONTEXT": INT_CONTEXT_PROMPT,
    "KU_TRUST_OWN_CHOICE": KU_TRUST_OWN_CHOICE_PROMPT,
    "TRUST_OWN": INT_TRUST_OWN_PROMPT,
    # "TRUST_OWN": INT_TRUST_HALF_PROMPT,
    "CONFLICT": CONFLICT_PROMPT,
    "OKF": OUTPUT_OWN_FIRST_PROMPT,
}


def generate_prompt(taxon_info, question, question_type):
    """
    Generate a prompt based on the given taxon information and question.
    
    Args:
    taxon_info (str): The taxon information to be inserted into the prompt.
    question (str): The question to be inserted into the prompt.
    
    Returns:
    str: The formatted prompt.
    """
    
    instruction_template = instruction_template_dict[question_type]
 
    taxon_info_str = json.dumps(taxon_info, indent=2) if isinstance(taxon_info, dict) or  isinstance(taxon_info, list) else str(taxon_info)
    question_str = question if isinstance(question, str) else str(question)
    
    if question_type == "INT":
        prompt = instruction_template.replace("[Question]", question_str)
    else:
        prompt = instruction_template.replace("[Taxon]", taxon_info_str).replace("[Question]", question_str)

    return prompt


def evaluation(input_file, output_file, question_type, model):
    """
    Process the JSON file to generate prompts and write them to an output file.
    
    Args:
    input_file (str): The path to the input JSON file containing questions and taxon information.
    output_file (str): The path to the output file where prompts will be saved.
    """
    data = read_json_file(input_file)
    acc = []
    prompts = []
    
    for index, item in enumerate(tqdm(data[:3200])):
        question = item.get('question')
        taxon_info = item.get('new_knowledge')
        answer = item.get('answers')
        is_acc = 0
        if question_type == "KA":
            answer = answer[0]

        if question and taxon_info:
            prompt = generate_prompt(taxon_info, question, question_type)
            prompts.append(prompt)
            if "gpt" in model:
                result = generate_chatgpt_response(prompt, model)
            else:
                result = chat_completion(prompt, model)
            if question_type == "KA":
                result = result.split("Final Choice:")[-1].strip(".").strip()
            else:
                result = result.split("Final Answer:")[-1].strip(".").strip().lower()

            print(result)
            print(answer)

            try:
                if question_type == "KA":
                    is_acc = (result == answer)
                else:
                    is_acc = 1
                    for ans in answer:
                        if (str(ans).split())[0].strip().lower() not in result:
                            print(ans, result)
                            is_acc = 0
                            break
                    result = eval(result)
                    if (len(result) != len(answer)) and len(result) != 0:
                        is_acc = 0
                        if result[0] == "yes" or result[0] == "no":
                            if result[0] == str(answer[0]).strip().lower():
                                is_acc = 1
            except Exception as e:
                print(e)
                is_acc = 0

            acc.append(is_acc)
            item['result'] = result
            item['acc'] = is_acc
            data[index]=item
            print("current acc: ", np.mean(acc))

        if index % 300 == 0:
            write_json_file(data, output_file)

    write_json_file(data, output_file)
    return prompts


def translate_triplets_to_boolean_question(meta_data):
    hop_triplets = meta_data.get('hop_triplets', [])
    related_property = meta_data.get('related_property', {})
    
    related_name = related_property.get('name')
    related_values = related_property.get('values', [])

    bool_q_list = [] 
    for triplet in hop_triplets:
        subject, relation, object_ = triplet
        
        # Skip triplet if it matches the related_property
        if relation == related_name and object_ in related_values:
            continue
        
        # Construct the prompt for the chat model
        bool_q_list.append(f"{subject} {relation} {object_}")
        
    return bool_q_list


def internal_evaluation(input_file, output_file, model):
    """
    Process the JSON file to generate prompts and write them to an output file.
    
    Args:
    input_file (str): The path to the input JSON file containing questions and taxon information.
    output_file (str): The path to the output file where prompts will be saved.
    """
    data = read_json_file(input_file)
    acc = []
    prompts = []
    all_acc = []

    for index, item in enumerate(tqdm(data[:3200])):
        taxon_info = item.get('new_knowledge')
        meta_data = item.get('meta_data')
        sub_question_acc = {}
        internal_knowledge_acc = 1

        bool_question_list = translate_triplets_to_boolean_question(meta_data)
        
        # continue running
        # if 'internal_knowledge_acc' in item.keys():
        #     print("111")
        #     for k, v in sub_question_acc.items():
        #         acc.append(v)

        #     all_acc.append(internal_knowledge_acc)
        #     continue

        for que in bool_question_list:
            prompt = generate_prompt(taxon_info, que, "INT")
            prompts.append(prompt)

            if 'gpt' in model:
                result = generate_chatgpt_response(prompt, model)
            else:
                result = chat_completion(prompt, model)
                
            print(prompt)
            print(result)

            result = result.split("Final Answer:")[-1].strip(".").strip().lower()

            if result != "yes":
                sub_question_acc[que] = 0
                internal_knowledge_acc = 0
            else:
                sub_question_acc[que] = 1

            acc.append(sub_question_acc[que])

        all_acc.append(internal_knowledge_acc)

        item['internal_knowledge_acc'] = internal_knowledge_acc
        item['sub_question_acc'] = sub_question_acc
        data[index]=item

        print("current internal_knowledge_acc: ", np.mean(acc))
        print("current all internal_knowledge_acc: ", np.mean(all_acc))

        if index % 300 == 0:
            write_json_file(data, output_file)

    write_json_file(data, output_file)
    return prompts


def internal_context_knowledge_evaluation(input_file, output_file, model):
    """
    Process the JSON file to generate prompts and write them to an output file.
    
    Args:
    input_file (str): The path to the input JSON file containing questions and taxon information.
    output_file (str): The path to the output file where prompts will be saved.
    """
    data = read_json_file(input_file)
    context_acc = []
    all_context_acc = []

    for index, item in enumerate(tqdm(data[:3200])):
        meta_data = item.get('meta_data')
        
        sub_question_context_acc = {}
        in_context_internal_knowledge = 1

        if 'internal_knowledge_acc' in item.keys() and item['internal_knowledge_acc'] == 1:
            sub_question_dict = item.get('sub_question_acc')

            # check internal knowledge
            for que, internal_score in sub_question_dict.items():
                prompt = generate_prompt(meta_data['hop_triplets'], que, "INT_CONTEXT")

                if 'gpt' in model:
                    result = generate_chatgpt_response(prompt, model)
                    print(result)
                else:
                    result = chat_completion(prompt, model)

                result = result.split("Final Answer:")[-1].strip(".").strip().lower()

                if result != "yes":
                    sub_question_context_acc[que] = 0
                    in_context_internal_knowledge = 0
                else:
                    sub_question_context_acc[que] = 1

                context_acc.append(sub_question_context_acc[que])
            all_context_acc.append(in_context_internal_knowledge)

            item['in_context_internal_knowledge'] = in_context_internal_knowledge
            item['sub_question_context_acc'] = sub_question_context_acc
            data[index] = item

            print("current internal_knowledge_acc: ", np.mean(context_acc))
            print("current all internal_knowledge_acc: ", np.mean(all_context_acc))
            
        if index % 300 == 0:
            write_json_file(data, output_file)

    write_json_file(data, output_file)



def noise_evaluation(input_file, output_file, question_type, model, entity_file):
    """
    Process the JSON file to generate prompts and write them to an output file.
    
    Args:
    input_file (str): The path to the input JSON file containing questions and taxon information.
    output_file (str): The path to the output file where prompts will be saved.
    """
    data = read_json_file(input_file)
    aritificial_entities_dict = read_json_file(entity_file)
    acc = []
    aritificial_entities_list = [v for k, v in aritificial_entities_dict.items()]

    for index, item in enumerate(tqdm(data[:3200])):
        question = item.get('question')
        taxon_info = item.get('new_knowledge')
        answer = item.get('answers')
        is_acc = 0

        if question and taxon_info:
            random_noise = get_random_items(aritificial_entities_list)
            random_noise.append(taxon_info)
            random.shuffle(random_noise)

            prompt = generate_prompt(random_noise, question, question_type)
            
            if "gpt" in model:
                result = generate_chatgpt_response(prompt, model)
            else:
                result = chat_completion(prompt, model)

            result = result.split("Final Answer:")[-1].strip(".").strip().lower()
            print(result)
            print(answer)

            try:
                is_acc = 1
                for ans in answer:
                    if (str(ans).split())[0].strip().lower() not in result:
                        print(ans, result)
                        is_acc = 0
                        break
                result = eval(result)
                if (len(result) != len(answer)) and len(result) != 0:
                    is_acc = 0
                    if result[0] == "yes" or result[0] == "no":
                        if result[0] == str(answer[0]).strip().lower():
                            is_acc = 1
            except Exception as e:
                print(e)
                is_acc = 0

            acc.append(is_acc)
            item['noise_entities'] = random_noise
            item['noise_result'] = result
            item['noise_acc'] = is_acc
            data[index]=item
            print("current noise acc: ", np.mean(acc))

        if index % 300 == 0:
            write_json_file(data, output_file)

    write_json_file(data, output_file)


def translate_triple_to_question_variation(meta_data, old_name):
    related_property = meta_data.get('related_property', {})
    related_values = related_property['new']['values']
    name = related_property['new']['name']

    question = f"Is the {name} of {old_name} {json.dumps(related_values)}?"

    return question


def translate_triple_to_question_dropout(meta_data, old_name):
    related_property = meta_data.get('related_property', {})
    related_values = related_property['values']
    name = related_property['name']

    question = f"Is the {name} of {old_name} {json.dumps(related_values)}?"

    return question


def conflict_noise_knowledge_evaluation(input_file, output_file, new2old_knowledge_file, parent_entity_file, model):
    """
    Process the JSON file to generate prompts and write them to an output file.
    
    Args:
    input_file (str): The path to the input JSON file containing questions and taxon information.
    output_file (str): The path to the output file where prompts will be saved.
    """
    data = read_json_file(input_file)
    new2old_knowledge = read_json_file(new2old_knowledge_file)
    parent_entity = read_json_file(parent_entity_file)
    aritificial_entities_dict = read_json_file(entity_file)
    aritificial_entities_list = [v for k, v in aritificial_entities_dict.items()]

    internal_noise_acc = []

    for index, item in enumerate(tqdm(data[:3200])):
        if "internal_acc" in item.keys() and item["internal_acc"] == 1:
            question = item.get('question')
            taxon_info = item.get('new_knowledge')
            meta_data = item.get('meta_data')
            answer = item.get('answers')
            # conflict_knowledge_acc = 1
            is_internal = 0

            # change new name to parent name
            new_name = taxon_info['name']
            if new_name in new2old_knowledge.values():
                old_name = new_name
            else:
                old_name = new2old_knowledge[new_name]
            # old_knowledge = parent_entity[old_name]
            random_noise = get_random_items(aritificial_entities_list)
            random.shuffle(random_noise)

            prompt = generate_prompt(random_noise, question, question_type)
            
            if meta_data['difference'] == "variation":
                verifi_que = translate_triple_to_question_variation(meta_data, old_name)
            else:
                verifi_que = translate_triple_to_question_dropout(meta_data, old_name)

            prompt = generate_prompt(random_noise, verifi_que, "INT_CONTEXT") # tax_info for now should be empty (no context to test the internal knowledge)

            if "gpt" in model:
                result = generate_chatgpt_response(prompt, model)
            else:
                result = chat_completion(prompt, model)
            
            print(prompt)
            print(result)

            result = result.split("Final Answer:")[-1].strip(".").strip("!").strip("[").strip("]").strip().lower()

            # should so no for the old entity, because the meta data is for the new entity
            if meta_data['difference'] == "variation" and result == "no":
                is_internal = 1
            elif meta_data['difference'] == "dropout" and result == "yes":
                is_internal = 1

            item['noise_in_context_acc'] = is_internal
            internal_noise_acc.append(is_internal)

            # if is_internal == 1:
            #     is_context_internal_acc = 0
            #     # check in context internal, only change the prompt (adding taxon info into context)
            #     parent_taxon_info = parent_entity[old_name]
            #     prompt = generate_prompt(parent_taxon_info, verifi_que, "INT_CONTEXT")

            #     if "gpt" in model:
            #         result = generate_chatgpt_response(prompt, model)
            #     else:
            #         result = chat_completion(prompt, model)
                
            #     result = result.split("Final Answer:")[-1].strip(".").strip("!").strip().lower()

            #     # also should say no for the old entity, because the meta data is for the new entity
            #     if meta_data['difference'] == "variation" and result == "no":
            #         is_context_internal_acc = 1
            #     elif meta_data['difference'] == "dropout" and result == "yes":
            #         is_context_internal_acc = 1

            #     internal_context_acc.append(is_context_internal_acc)
            #     item['is_context_internal_acc'] = is_context_internal_acc

            #     # build conflict prompt. do for droput (drop the property) and variation (change the property)
            #     taxon_info['name'] = old_name   # change the name to the parent name to build conflict knowledge
            #     old_entity_question = question.replace(new_name, old_name)
                
            #     # not using the verify question. for we have multiple types of question here to test the vastity of the model.
            #     prompt = generate_prompt(taxon_info, old_entity_question, "KD") # KU_PROMPT is the same as KD_PROMPT

            #     if "gpt" in model:
            #         result = generate_chatgpt_response(prompt, model)
            #     else:
            #         result = chat_completion(prompt, model)
            #     result = result.split("Final Answer:")[-1].strip(".").strip().lower()

            #     print(prompt, result)
            #     print(answer)

            #     try:
            #         conflict_knowledge_acc = 1
            #         for ans in answer:
            #             if (str(ans).split())[0].strip().lower() not in result:
            #                 conflict_knowledge_acc = 0
            #                 break
            #         result = eval(result)
            #         if (len(result) != len(answer)) and len(result) != 0:
            #             conflict_knowledge_acc = 0
            #             if result[0] == "yes" or result[0] == "no":
            #                 if result[0] == str(answer[0]).strip().lower():
            #                     conflict_knowledge_acc = 1
            #         conflict_knowledge_acc = 1 - conflict_knowledge_acc
            #     except Exception as e:
            #         print(e)
            #         conflict_knowledge_acc = 0

            #     conflict_acc.append(conflict_knowledge_acc)
            #     item['conflict_knowledge_acc'] = conflict_knowledge_acc

            data[index] = item

        print("current internal_acc: ", np.mean(internal_noise_acc))        

        if index % 100 == 0:
            write_json_file(data, output_file)

    write_json_file(data, output_file)


def conflict_noise_whole_evaluation(input_file, output_file, new2old_knowledge_file, parent_entity_file, model):
    """
    Process the JSON file to generate prompts and write them to an output file.
    
    Args:
    input_file (str): The path to the input JSON file containing questions and taxon information.
    output_file (str): The path to the output file where prompts will be saved.
    """
    data = read_json_file(input_file)
    new2old_knowledge = read_json_file(new2old_knowledge_file)
    parent_entity = read_json_file(parent_entity_file)
    aritificial_entities_dict = read_json_file(entity_file)
    aritificial_entities_list = [v for k, v in aritificial_entities_dict.items()]
    old2new_knowledge = {v: k for k, v in new2old_knowledge.items()}

    acc_np_list = []
    unknown_np_list = []
    acc_tokp_list = []
    unknown_tokp_list = []
    acc_okf_list = []
    unknown_okf_list = []
    new_data = []

    
    total_price = 0
    price = 0
    print("length of data", len(data))
    for index, item in enumerate(tqdm(data[:3200])):
        if "internal_acc" in item.keys() and item["internal_acc"] == 1:
            question = item.get('question')
            taxon_info = item.get('new_knowledge')
            meta_data = item.get('meta_data')
            answer = item.get('answers')
            acc_np = 0
            unknown_np = 0
            acc_tokp = 0
            unknown_tokp = 0
            acc_okf = 0
            unknown_okf = 0

            # change new name to parent name
            new_name = taxon_info['name']
            if new_name in new2old_knowledge.keys():
                old_name = new2old_knowledge[new_name]
            elif new_name in old2new_knowledge.keys():
                old_name = new_name
                new_name = old2new_knowledge[new_name]

            random_noise = get_random_items(aritificial_entities_list)
            random.shuffle(random_noise)

            prompt = generate_prompt(random_noise, question, question_type)

            if meta_data['difference'] == "variation":
                verifi_que = translate_triple_to_question_variation(meta_data, old_name)
            else:
                verifi_que = translate_triple_to_question_dropout(meta_data, old_name)

            verifi_que = verifi_que + "\n0. yes\n\n1. no\n\n2.unknown"
            item['noise_entities'] = random_noise
            item['noise_question'] = verifi_que
            
            # neutral prompt
            prompt = generate_prompt(random_noise, verifi_que, "KA") 
            
            if 'o1' in model:
                result, price = generate_o1_response(prompt, model)
            elif "gpt" in model:
                result, price = generate_chatgpt_response(prompt, model)
            else:
                result = chat_completion(prompt, model)
            
            print(prompt)
            print(result)
            total_price += price
            print("current total price: ", total_price)
            item['noise_result_np'] = result
            result = result.split("Final Choice:")[-1].lower().strip().strip("unknown").strip().strip(".").strip("]").strip("[").strip(".").strip()
            if len(result) > 1:
                result = result[0]

            # should so no for the old entity, because the meta data is for the new entity
            if meta_data['difference'] == "variation" and  "1" in result:
                acc_np = 1
            elif meta_data['difference'] == "dropout" and "0" in result:
                acc_np = 1

            if "2" in result:
                unknown_np = 1
            item['noise_acc_np'] = acc_np
            item['noise_unknown_np'] = unknown_np
            acc_np_list.append(acc_np)
            unknown_np_list.append(unknown_np)

            # trust own knowledge prompt
            prompt = generate_prompt(random_noise, verifi_que, "KU_TRUST_OWN_CHOICE") # 
            if 'o1' in model:
                result, price = generate_o1_response(prompt, model)
            elif "gpt" in model:
                result, price = generate_chatgpt_response(prompt, model)
            else:
                result = chat_completion(prompt, model)
            
            print(prompt)
            print(result)
            total_price += price
            print("current total price: ", total_price)
            item['noise_result_tokp'] = result
            result = result.split("Final Choice:")[-1].lower().strip().strip("unknown").strip().strip(".").strip("]").strip("[").strip(".").strip()
            if len(result) > 1:
                result = result[0]

            # should so no for the old entity, because the meta data is for the new entity
            if meta_data['difference'] == "variation" and "1" in result:
                acc_tokp = 1
            elif meta_data['difference'] == "dropout" and "0" in result:
                acc_tokp = 1

            if  "2" in result:
                unknown_tokp = 1

            item['noise_acc_tokp'] = acc_tokp
            item['noise_unknown_tokp'] = unknown_tokp
            acc_tokp_list.append(acc_tokp)
            unknown_tokp_list.append(unknown_tokp)

            # neutral prompt
            acc_np_list.append(item['noise_acc_np'])
            unknown_np_list.append(item['noise_unknown_np'])
            print("current noise acc np: ", np.mean(acc_np_list))
            print("current noise unknown np: ", np.mean(unknown_np_list))

            # trust own knowledge
            acc_tokp_list.append(item['noise_acc_tokp'])
            unknown_tokp_list.append(item['noise_unknown_tokp'])
            print("current noise acc tokp: ", np.mean(acc_tokp_list))
            print("current noise unknown tokp: ", np.mean(unknown_tokp_list))

            # own knowledge first 
            prompt = generate_prompt(random_noise, verifi_que, "OKF") # 
            if 'o1' in model:
                result, price = generate_o1_response(prompt, model)
            elif "gpt" in model:
                result, price = generate_chatgpt_response(prompt, model)
            else:
                result = chat_completion(prompt, model)
            
            print(prompt)
            print(result)
            total_price += price
            print("current total price: ", total_price)

            item['noise_result_okf'] = result
            result = result.split("Final Choice:")[-1].lower().strip().strip("unknown").strip().strip(".").strip("]").strip("[").strip(".").strip()
            if len(result) > 1:
                result = result[0]
            # should so no for the old entity, because the meta data is for the new entity
            if meta_data['difference'] == "variation" and "1" in result:
                acc_okf = 1
            elif meta_data['difference'] == "dropout" and "0" in result:
                acc_okf = 1

            if  "2" in result:
                unknown_okf = 1

            item['noise_acc_okf'] = acc_okf
            item['noise_unknown_okf'] = unknown_okf
            acc_okf_list.append(acc_okf)
            unknown_okf_list.append(unknown_okf)


            print("current noise acc np: ", np.mean(acc_np_list))        
            print("current noise unknown np: ", np.mean(unknown_np_list))        
            print("current noise acc tokp: ", np.mean(acc_tokp_list))        
            print("current noise unknown tokp: ", np.mean(unknown_tokp_list)) 
            print("current noise acc okf: ", np.mean(acc_okf_list))
            print("current noise unknown okf: ", np.mean(unknown_okf_list))

            new_data.append(item)

        if index % 100 == 0:
            write_json_file(new_data, output_file)

    write_json_file(new_data, output_file)

def conflict_noise_whole_metrics(input_file, output_file, new2old_knowledge_file, parent_entity_file, model):
    """
    Process the JSON file to generate prompts and write them to an output file.
    
    Args:
    input_file (str): The path to the input JSON file containing questions and taxon information.
    output_file (str): The path to the output file where prompts will be saved.
    """
    data = read_json_file(output_file)
    new2old_knowledge = read_json_file(new2old_knowledge_file)
    parent_entity = read_json_file(parent_entity_file)
    aritificial_entities_dict = read_json_file(entity_file)
    aritificial_entities_list = [v for k, v in aritificial_entities_dict.items()]
    old2new_knowledge = {v: k for k, v in new2old_knowledge.items()}

    acc_np_list = []
    unknown_np_list = []
    acc_tokp_list = []
    unknown_tokp_list = []
    acc_okf_list = []
    unknown_okf_list = []
    new_data = []
    for index, item in enumerate(tqdm(data[:3200])):
        if "internal_acc" in item.keys() and item["internal_acc"] == 1:
            question = item.get('question')
            taxon_info = item.get('new_knowledge')
            meta_data = item.get('meta_data')
            answer = item.get('answers')
            acc_np = 0
            unknown_np = 0
            acc_tokp = 0
            unknown_tokp = 0
            acc_okf = 0
            unknown_okf = 0

            # change new name to parent name
            new_name = taxon_info['name']
            if new_name in new2old_knowledge.keys():
                old_name = new2old_knowledge[new_name]
            elif new_name in old2new_knowledge.keys():
                old_name = new_name
                new_name = old2new_knowledge[new_name]

            random_noise = get_random_items(aritificial_entities_list)
            random.shuffle(random_noise)

            prompt = generate_prompt(random_noise, question, question_type)

            if meta_data['difference'] == "variation":
                verifi_que = translate_triple_to_question_variation(meta_data, old_name)
            else:
                verifi_que = translate_triple_to_question_dropout(meta_data, old_name)

            verifi_que = verifi_que + "\n0. yes\n\n1. no\n\n2.unknown"
            item['noise_entities'] = random_noise
            item['noise_question'] = verifi_que
            
            # neutral prompt
            result = item['noise_result_np'].split("Final Choice:")[-1].lower().strip().strip("unknown").strip().strip(".").strip("]").strip("[").strip(".").strip()
            if len(result) > 1:
                result = result[0]

            # should so no for the old entity, because the meta data is for the new entity
            if meta_data['difference'] == "variation" and "1" in result:
                acc_np = 1
            elif meta_data['difference'] == "dropout" and  "0" in result:
                acc_np = 1

            if "2" in result:
                unknown_np = 1
                
            acc_np_list.append(acc_np)
            unknown_np_list.append(unknown_np)

            # trust own knowledge prompt
            result = item['noise_result_tokp'].split("Final Choice:")[-1].lower().strip().strip("unknown").strip().strip(".").strip("]").strip("[").strip(".").strip()
            if len(result) > 1:
                result = result[0]

            # should so no for the old entity, because the meta data is for the new entity
            if meta_data['difference'] == "variation" and "1" in result:
                acc_tokp = 1
            elif meta_data['difference'] == "dropout" and "0" in result:
                acc_tokp = 1

            if  "2" in result:
                unknown_tokp = 1

            acc_tokp_list.append(acc_tokp)
            unknown_tokp_list.append(unknown_tokp)

            # neutral prompt
            acc_np_list.append(item['noise_acc_np'])
            unknown_np_list.append(item['noise_unknown_np'])
            print("current noise acc np: ", np.mean(acc_np_list))
            print("current noise unknown np: ", np.mean(unknown_np_list))

            # trust own knowledge
            acc_tokp_list.append(item['noise_acc_tokp'])
            unknown_tokp_list.append(item['noise_unknown_tokp'])
            print("current noise acc tokp: ", np.mean(acc_tokp_list))
            print("current noise unknown tokp: ", np.mean(unknown_tokp_list))

            # own knowledge first 
            result = item['noise_result_okf'].split("Final Choice:")[-1].lower().strip().strip("unknown").strip().strip(".").strip("]").strip("[").strip(".").strip()
            if len(result)>1:
                result = result[0]

            # should so no for the old entity, because the meta data is for the new entity
            if meta_data['difference'] == "variation" and "1" in result:
                acc_okf = 1
            elif meta_data['difference'] == "dropout" and "0" in result:
                acc_okf = 1

            if  "2" in result:
                unknown_okf = 1

            acc_okf_list.append(acc_okf)
            unknown_okf_list.append(unknown_okf)


            print("current noise acc np: ", np.mean(acc_np_list))        
            print("current noise unknown np: ", np.mean(unknown_np_list))        
            print("current noise acc tokp: ", np.mean(acc_tokp_list))        
            print("current noise unknown tokp: ", np.mean(unknown_tokp_list)) 
            print("current noise acc okf: ", np.mean(acc_okf_list))
            print("current noise unknown okf: ", np.mean(unknown_okf_list))



def conflict_noise_okf_evaluation(input_file, output_file, new2old_knowledge_file, parent_entity_file, model):
    """
    Process the JSON file to generate prompts and write them to an output file.
    
    Args:
    input_file (str): The path to the input JSON file containing questions and taxon information.
    output_file (str): The path to the output file where prompts will be saved.
    """
    data = read_json_file(input_file)
    new2old_knowledge = read_json_file(new2old_knowledge_file)
    parent_entity = read_json_file(parent_entity_file)
    aritificial_entities_dict = read_json_file(entity_file)
    aritificial_entities_list = [v for k, v in aritificial_entities_dict.items()]
    old2new_knowledge = {v: k for k, v in new2old_knowledge.items()}

    acc_np_list = []
    unknown_np_list = []
    acc_tokp_list = []
    unknown_tokp_list = []
    acc_okf_list = []
    unknown_okf_list = []

    new_data = []
    for index, item in enumerate(tqdm(data[:3200])):
        if "internal_acc" in item.keys() and item["internal_acc"] == 1:
            question = item.get('question')
            taxon_info = item.get('new_knowledge')
            meta_data = item.get('meta_data')
            answer = item.get('answers')
            acc_np = 0
            unknown_np = 0
            acc_tokp = 0
            unknown_tokp = 0
            acc_okf = 0
            unknown_okf = 0

            # change new name to parent name
            new_name = taxon_info['name']
            if new_name in new2old_knowledge.keys():
                old_name = new2old_knowledge[new_name]
            elif new_name in old2new_knowledge.keys():
                old_name = new_name
                new_name = old2new_knowledge[new_name]

            random_noise = get_random_items(aritificial_entities_list)
            random.shuffle(random_noise)

            prompt = generate_prompt(random_noise, question, question_type)

            if meta_data['difference'] == "variation":
                verifi_que = translate_triple_to_question_variation(meta_data, old_name)
            else:
                verifi_que = translate_triple_to_question_dropout(meta_data, old_name)

            verifi_que = verifi_que + "\n0. yes\n\n1. no\n\n2.unknown"

            item['noise_entities'] = random_noise
            item['noise_question'] = verifi_que

            # neutral prompt
            acc_np_list.append(item['noise_acc_np'])
            unknown_np_list.append(item['noise_unknown_np'])
            print("current noise acc np: ", np.mean(acc_np_list))
            print("current noise unknown np: ", np.mean(unknown_np_list))

            # trust own knowledge
            acc_tokp_list.append(item['noise_acc_tokp'])
            unknown_tokp_list.append(item['noise_unknown_tokp'])
            print("current noise acc tokp: ", np.mean(acc_tokp_list))
            print("current noise unknown tokp: ", np.mean(unknown_tokp_list))

            # own knowledge first 
            prompt = generate_prompt(random_noise, verifi_que, "OKF") # 
            if "gpt" in model:
                result = generate_chatgpt_response(prompt, model)
            else:
                result = chat_completion(prompt, model)
            
            print(prompt)
            print(result)
            item['noise_result_okf'] = result
            result = result.split("Final Choice:")[-1].lower().strip().strip("unknown").strip().strip(".").strip("]").strip("[").strip(".").strip()

            # should so no for the old entity, because the meta data is for the new entity
            if meta_data['difference'] == "variation" and result == "1":
                acc_okf = 1
            elif meta_data['difference'] == "dropout" and result == "0":
                acc_okf = 1

            if  result == "2":
                unknown_okf = 1

            item['noise_acc_okf'] = acc_okf
            item['noise_unknown_okf'] = unknown_okf
            acc_okf_list.append(acc_okf)
            unknown_okf_list.append(unknown_okf)

            print("current noise acc okf: ", np.mean(acc_okf_list))
            print("current noise unknown okf: ", np.mean(unknown_okf_list))

            new_data.append(item)

        if index % 100 == 0:
            write_json_file(new_data, output_file)

    write_json_file(new_data, output_file)


# Example usage
question_type = "KD" # KU or KA
# model = "gpt-4o-mini" # "gpt-4o-mini" or "LLAMA_3_70B"
model = "o1-preview" # "gpt-4o-mini" or "LLAMA_3_70B"

input_file = f'./data/ALCUNA/questions_{question_type}.json'
output_file = f'./output/ALCUNA/{question_type}_{model}.json'

entity_file = './data/ALCUNA/artificial_entities.json'
conflict_noise_whole_evaluation(input_file, output_file, './data/ALCUNA/new2old_nms.json', './data/ALCUNA/parent_entities.json', model)