import random
import numpy as np
import json
import tqdm
import re
from collections import defaultdict

random.seed(111)
np.random.seed(111)


def generate_random_key(share_prefix_size: int = 0):
    global same_prefix
    # random_size = random.randint(30, 50)
    random_size = 5
    idxs = np.random.randint(0, len(randomlist), random_size)
    if share_prefix_size > 0 and not same_prefix:
        same_prefix = "".join([randomlist[ii] for ii in idxs[:share_prefix_size]])
    return same_prefix + "".join([randomlist[ii] for ii in idxs[share_prefix_size:]])


def build_one_example(eg):
    multi_turns = eg["multi_turns"]

    start = random.randint(0, 10000)
    end = random.randint(30000, 40000)
    context = list(range(start, end))
    np.random.shuffle(context)
    context = context[:18000]


    turns = []
    for ii in multi_turns:
        if "largest" in ii["input"]:
            func = max
        else:
            func = min
        if "number with" in ii["input"]:
            end = re.findall("with ([0-9]) ", ii["input"])
            ans = func([i for i in context if end[0] in str(i)])
        else:
            ans = func(context)
        turns.append({"input": ii["input"], "answer": ans})

    return {"context": context, "multi_turns": turns}



def build_data(origin_path, total: int = 2000, share_prefix_size: int = 0, turn_size: int = 5):
    with open(origin_path) as f:
        data = [json.loads(ii) for ii in f.readlines()]

    new_data = [build_one_example(eg) for i, eg in tqdm.tqdm(enumerate(data))]
    with open("data/v2_multi_turn_mf.jsonl", "w") as f:
        for example in new_data:
            json.dump(example, f)
            f.write('\n')
    
if __name__ == "__main__":
    build_data("data/multi_turn_mf.jsonl")


