import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from utils.utils import *


def get_questions_by_type(knowledge_path, file_path, output_file, data_type='Knowledge Association'):
    data = read_json_file(file_path)
    all_array = []
    new_knowledge = read_json_file(knowledge_path)
    meta_data = read_jsonl_file('./data/ALCUNA/meta_data.jsonl')
    print(len(new_knowledge))
    for key, array in data.items():
        # 遍历每个数组中的元素（这些元素都是字典）
        for element in array:
            if element.get('type') == data_type:
                element["new_knowledge"] = new_knowledge[key]
                all_array.append(element)

    write_json_file(all_array, output_file)
    return all_array


def new_name_to_old_name(input_file, output_file):
    new2old = {}
    infile = read_jsonl_file(input_file)

    for line in infile:
        artificial_entity = line.get("artificial_entity", {})
        parent_entity = line.get("parent_entity", {})
        entity_old_nm = parent_entity.get("name")
        entity_new_nm = artificial_entity.get("name")

        new2old[entity_new_nm]= entity_old_nm
    
    # Save to output file
    write_json_file(new2old, output_file)


def process_meta_data(input_file, output_file):
    output_data = {}

    infile = read_jsonl_file(input_file)
    for line in infile:
        artificial_entity = line.get("artificial_entity", {})
        entity_id = artificial_entity.get("id")
        if entity_id is None:
            continue  # Skip if there's no artificial_entity or id
        
        formatted_entity = {
            "name": artificial_entity.get("name"),
            "rank": artificial_entity.get("rank"),
            "Properties": {}
        }

        properties = artificial_entity.get("properties", [])
        for prop in properties:
            prop_name = prop.get("name")
            prop_values = prop.get("values", [])
            formatted_entity["Properties"][prop_name] = prop_values

        # Add to output array with id as key
        output_data[str(entity_id)]= formatted_entity
    
    # Save to output file
    write_json_file(output_data, output_file)


def process_parent_entities(input_file, output_file):
    output_data = {}
    new_name_to_old_name = read_json_file('./data/ALCUNA/new2old_nms.json')

    infile = read_jsonl_file(input_file)
    for line in infile:
        artificial_entity = line.get("artificial_entity", {})
        parent_entity = line.get("parent_entity", {})
        entity_id = artificial_entity.get("id")
        entity_name = parent_entity.get("name")
        
        if entity_id is None:
            continue  # Skip if there's no artificial_entity or id
        
        formatted_entity = {
            "name": parent_entity.get("name"),
            "rank": parent_entity.get("rank"),
            "Properties": {}
        }

        properties = parent_entity.get("properties", [])
        for prop in properties:
            prop_name = prop.get("name")
            prop_values = prop.get("values", [])
            formatted_entity["Properties"][prop_name] = prop_values

        # Add to output array with id as key
        output_data[str(entity_name)]= formatted_entity
    
    # Save to output file
    write_json_file(output_data, output_file)


# Example usage:
input_file = './data/ALCUNA/meta_data.jsonl'
output_file = './data/ALCUNA/parent_entities.json'
knowledge_path = "./data/ALCUNA/artificial_entities.json"

process_meta_data(input_file, output_file)
new_name_to_old_name('./data/ALCUNA/meta_data.jsonl', './data/ALCUNA/new2old_nms.json')
process_parent_entities(input_file, output_file)
get_questions_by_type(knowledge_path, input_file, output_file, "Knowledge Differentiation")
