import os
import json
import codecs
from tqdm import tqdm

if __name__ == "__main__":
    save_dir = "data/webnlg-2020_processed_wasserstein_new"
    os.makedirs(save_dir, exist_ok=True)
    NUMBER_OF_CHUNKS = 20
    fpath, fname = "data/webnlg-2020", "webnlg_2020_en.json"

    with open(os.path.join(fpath, fname), 'r') as file:
        final_dic = json.load(file)

    for chunk_index in tqdm(range(NUMBER_OF_CHUNKS), 'Annotations'):
        processed_json = "webnlg_2020_en_formated_{}.json"
        dict_items = final_dic.items()
        first_items = list(dict_items)[
                      chunk_index * len(final_dic) // NUMBER_OF_CHUNKS:(chunk_index + 1) * len(
                          final_dic) // NUMBER_OF_CHUNKS]
        with open(os.path.join(save_dir, processed_json.format(chunk_index)), "w") as file:
            json.dump(dict(first_items), file)
