import os
import sys
import re
import json
import time

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

from argparse import ArgumentParser
from random import sample
from utils.json_reader import jsonl_loader, json_loader

def return_rand_list(selection_list, required_number):

    numbers = selection_list
    selected_numbers = sample(numbers, required_number)  # Picks and stores one random item from the list.
    return selected_numbers

def read_constraint_seeds(constraint_folder):
    constraint_types = os.listdir(constraint_folder)
    constraint_counts = dict()
    for constraint_type in constraint_types:
        constraint_counts[f"{constraint_type.replace('.jsonl','')}"] = len(jsonl_loader(constraint_folder+"/"+constraint_type))

    return constraint_counts

def random_constraints_selection(jsonl_input, constraint_folder, jsonl_output):
    json_list = json_loader(jsonl_input)
    constraint_counts = read_constraint_seeds(constraint_folder)
    
    # package for text with paragraph structure
    package_1 = ['7_punctuation', '8_structure_paragraph', '2_caps_paragraph', '4_decoration_paragraph', '1_annotation_paragraph']
    package_2 = ['7_punctuation', '8_structure_paragraph', '2_caps_paragraph', '4_decoration_paragraph', '6_keywords']

    # package for text without paragraph structure
    package_3 = ['7_punctuation', '8_structure', '2_caps', '3_decoration', '1_annotation']
    package_4 = ['7_punctuation', '8_structure', '2_caps', '3_decoration', '6_keywords']
    package_5 = ['5_document_format']

    for i, line in enumerate(json_list):
        # get the data information
        word_count = line["word_count"]   

        # select constraint types
        constraint_type_selections = []
        if isinstance(line["answer_split"][0], list):
            select_package_id = return_rand_list([1,2],1)[0]
            match select_package_id:
                case 1:
                    constraint_type_selections = return_rand_list(package_1, return_rand_list([1,2,3],1)[0])
                case 2:
                    constraint_type_selections = return_rand_list(package_2, return_rand_list([1,2,3],1)[0])

        else:
            if word_count >50:
                select_package_id = return_rand_list([3,4,5],1)[0]

                match select_package_id:
                    case 3:
                        constraint_type_selections = return_rand_list(package_3, return_rand_list([1,2,3],1)[0])
                    case 4:
                        constraint_type_selections = return_rand_list(package_4, return_rand_list([1,2,3],1)[0])                    
                    case 5:
                        constraint_type_selections = package_5
            else:
                select_package_id = return_rand_list([3,4],1)[0]
                match select_package_id:
                    case 3:
                        constraint_type_selections = return_rand_list(package_3, return_rand_list([1,2,3],1)[0])
                    case 4:
                        constraint_type_selections = return_rand_list(package_4, return_rand_list([1,2,3],1)[0])

        if "2_caps_paragraph" in constraint_type_selections or "2_caps" in constraint_type_selections:
            if "3_decoration" in constraint_type_selections:
                constraint_type_selections[constraint_type_selections.index("3_decoration")] = "3_decoration_symbol"
            
            if "4_decoration_paragraph" in constraint_type_selections:
                constraint_type_selections[constraint_type_selections.index("4_decoration_paragraph")] = "4_decoration_paragraph_symbol"
        
        # select constraints ids
        constraint_type_ids = {}
        for constraint_type in constraint_type_selections:
            constraint_type_ids[f"{constraint_type}"] = return_rand_list([i for i in range(1,constraint_counts[f"{constraint_type}"]+1)],1)[0]
        
        # rearramge the constraints
        arrangement = ["9_text_format","5_document_format", "1_annotation", "1_annotation_paragraph", "2_caps", "2_caps_paragraph", "7_punctuation", "3_decoration", "3_decoration_symbol", "4_decoration_paragraph", "4_decoration_paragraph_symbol", "5_document_format", "6_keywords", "8_structure", "8_structure_paragraph"]
        # Output constraints into the jsonl
        reordered_constraint_type_ids = {k: constraint_type_ids[k] for k in arrangement if k in constraint_type_ids}
        
        constraints_instructions = []
        modification = {}
        for key, value in reordered_constraint_type_ids.items():
            constraint_json_list = jsonl_loader(constraint_folder+"/"+key+".jsonl")
            constraints_instructions.append(constraint_json_list[value-1]["constraint"])
            reordered_constraint_type_ids[key] = constraint_json_list[value-1]["type"]  ## double check later
            try:
                modification[constraint_json_list[value-1]["type"]] = constraint_json_list[value-1]["modification"]
            except:
                modification[constraint_json_list[value-1]["type"]] = []

        json_list[i]["selected_constraints"] = reordered_constraint_type_ids
        json_list[i]["constraints_instructions"] = constraints_instructions
        json_list[i]["modification"] = modification
        json_list[i]['title'] = []
    
    # remove duplicates
    filtered_object = []
    for line in json_list:
        if line not in filtered_object:
            filtered_object.append(line)

    with open(jsonl_output, "w", encoding="utf-8") as w:
        json.dump(filtered_object, w, indent=2, ensure_ascii=False)


if __name__ == '__main__':
    parser= ArgumentParser()
    parser.add_argument("--jsonl_input", type=str, default="dataset/natural_question/natural_question_02.jsonl", help="Input JSONL file path")
    parser.add_argument("--jsonl_output", type=str, default="dataset/natural_question/natural_question_03.jsonl", help="Output JSONL file path")
    constraint_folder = "module_03_constraint/constraint_seed_gpt"
    args = parser.parse_args()
    
    random_constraints_selection(args.jsonl_input, constraint_folder, args.jsonl_output)
                





    





