import os
import json
import argparse


def get_data(base_dir):
    all_chunk_data = []
    chunk_dir = os.path.join(base_dir, "chunks")
    num_chunks = len(os.listdir(chunk_dir))
    for chunk_idx in range(num_chunks):
        file_name = os.path.join(chunk_dir, f"chunk_{chunk_idx}.json")
        with open(file_name, "r") as f:
            all_chunk_data += json.load(f)
    return all_chunk_data


def run(args):
    task_dir = os.path.join(os.getcwd(), f"../../output/tagging")
    output_dir = os.path.join(task_dir, f"analysis/tag_intersect")
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    experiments = ["molmo_no-option", "molmo_option", "llama_no-option", "llama_option"]
    data = {e: get_data(os.path.join(task_dir, e)) for e in experiments}
    data_size = len(data["molmo_no-option"])

    type_options = [
        "Bar Chart",
        "Line Graph",
        "Map",
        "Pie Chart",
    ]
    domain_options = [
        "Astronomy",
        "Biology",
        "Chemistry",
        "Computer Science",
        "History",
        "Mathematics",
        "Music",
        "Physics",
    ]

    type_intersect = dict()
    for t in type_options:
        type_list = []
        for i in range(data_size):
            if sum(data[e][i]['tag'] is not None and
                   data[e][i]['tag']['Type'] == t for e in experiments) >= 3:
                type_list.append(i)
        type_intersect[t] = {"Number": len(type_list), "List": str(type_list)}

    domain_intersect = dict()
    for d in domain_options:
        domain_list = []
        for i in range(data_size):
            if sum(data[e][i]['tag'] is not None and
                   data[e][i]['tag']['Type'] not in type_options and
                   data[e][i]['tag']['Domain'] == d for e in experiments) >= 3:
                domain_list.append(i)
        domain_intersect[d] = {"Number": len(domain_list), "List": str(domain_list)}

    intersect_data = {"Type": type_intersect, "Domain": domain_intersect}
    intersect_file = os.path.join(output_dir, f"intersect.json")
    with open(intersect_file, "w") as f:
        json.dump(intersect_data, f, indent=4)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='')
    args = parser.parse_args()
    run(args)
