import json
import os
import random
from tqdm import tqdm
import pickle

os.chdir(os.path.dirname(os.path.abspath(__file__)))
prompt = "Please combine the following sentences to one paragraph."
_templates = json.load(open(os.path.join(os.path.dirname(__file__), "v3_template.json")))
templates = {}
for temp in _templates:
    attr, v = temp.split("_")
    if attr not in templates:
        templates[attr] = {}
    sentence = _templates[temp].split(";")
    templates[attr][v] = [s.strip() for s in sentence]

continue_from = "all_testset_combine/all_testset_combine.bin"

results = []

if continue_from:
    with open(continue_from, "rb") as f:
        _result = pickle.load(f)
    
    for attr_comb in _result:
        prompts = [prompt]
        attributes = []
        for attr in attr_comb:
            v = random.choice(list(templates[attr].keys()))
            temp = random.choice(templates[attr][v])
            prompts.append(temp)
            attributes.append(f"{attr}_{v}")
        
        result = {
            "attributes": attributes,
            "template": " ".join(prompts)
        }

        results.append(result)
    pickle.dump(results, open(continue_from.split('.')[0]+"_res.bin", "wb"))
else:
    n = 600000
    for i in tqdm(range(n)):
        l = random.randint(2, len(templates))
        attr_comb = random.sample(templates.keys(), l)

        prompts = [prompt]
        attributes = []
        
        for attr in attr_comb:
            v = random.choice(list(templates[attr].keys()))
            temp = random.choice(templates[attr][v])
            prompts.append(temp)
            attributes.append(f"{attr}_{v}")

        result = {
            "attributes": attributes,
            "template": " ".join(prompts)
        }

        results.append(result)

    pickle.dump(results, open("v3_prompt_new", "wb"))