import random
import os
import json

import sys
sys.path.append("../")
from models.idx2word import Idx2Word
from instance_generator import InstanceGenerator
from utilities import create_topk_meta_info

# Combination for structural pruning
topk_names = 50
topk_attrs = 50 # for i3
topk_relas = 50
suffix = f"topk_names{topk_names}-topk_attrs{topk_attrs}-topk_relas{topk_relas}"
random.seed(12316)
# Top-level directory including all the information
data_dir = "../../data/VQAR"
meta_f = f"{data_dir}/gqa_info.json"
meta_info = json.load(open(meta_f, "r"))
new_meta_info = data_dir + f"/gqa_info-{suffix}.json"
create_topk_meta_info(
    meta_info, topk_names, topk_attrs, topk_relas, meta_info_output=new_meta_info
)


is_a_filename = os.path.join(f"{data_dir}/knowledge_base/", "is_a.facts")
reasoning_directory = ""
idx2word = Idx2Word(meta_info)
# Pickle file with all the training instances
tasks_filename = os.path.join(
    f"{data_dir}/dataset/task_list/train_tasks_c2_10000.pkl"
)
generator = InstanceGenerator(
    data_dir,
    tasks_filename,
    topk_names,
    topk_attrs,
    topk_relas,
    meta_f,
    is_a_filename,
    reasoning_directory,
)
generator.generate_training_instances(
    f"train-c2-all-{suffix}.pickle", f"train-c2-statistics-all-{suffix}.pickle"
)

generator.choose_training_instances(
    f"train-c2-all-{suffix}.pickle",
    f"train-c2-statistics-all-{suffix}.pickle",
    f"experiment1-train-c2-all-{suffix}.pickle",
)

# Test data generation
tasks_filename = os.path.join(
    f"{data_dir}/dataset/task_list/test_tasks.pkl"
)
generator = InstanceGenerator(
    data_dir,
    tasks_filename,
    topk_names,
    topk_attrs,
    topk_relas,
    meta_f,
    is_a_filename,
    reasoning_directory,
)
number_of_instances_per_class = 500
generator.generate_test_instances(
    f"test-c2-all-{suffix}-per-class-{number_of_instances_per_class}.pickle",
    idx2word,
    name_inst=True,
    attr_inst=True,
    rela_inst=True,
    number_of_instances_per_class=number_of_instances_per_class,
)
