import os
from pathlib import Path
import json
import jsonlines
from argparse import ArgumentParser
import random



def main():
    parser = ArgumentParser()
    parser.add_argument(
        "--data_size", type=int, default=64
    )
    parser.add_argument(
        "--data_type", type=str, default="curated"
    )
    parser.add_argument(
        "--seed", type=int, default=0
    )
    args = parser.parse_args()

    random.seed(args.seed)

    if args.data_type == "curated":
        data_path = "data/curated/1000"
    elif args.data_type == "synthetic":
        data_path = "data/synthetic/40960"
        args.data_size *= 10
    else:
        raise ValueError

    path = Path(data_path)
    data_files = [os.path.join(path, file.name) for file in path.glob("*.json")]
    dir_name = "data/"+args.data_type+"/"+str(args.data_size)+"_seed"+str(args.seed)
    os.makedirs(dir_name, exist_ok=True)

    for data_file in data_files:
        data = []
        with open(data_file, "r", encoding="utf8") as f:
            for line in f:
                data.append(json.loads(line))

        assert len(data) >= args.data_size
        file_name = os.path.split(data_file)[-1].split("_")[0]
        selected_data = random.sample(data, k=args.data_size)

        with jsonlines.open(dir_name + "/" + file_name + ".json", "w") as writer:
            for sample in selected_data:
                writer.write(sample)

    print("Make Data Done!")

if __name__ == "__main__":
    main()
