import os
from tqdm import tqdm
import random
import json

def open_data(split):
    with open('multiple_attribute/processed/{}.fader.with_cat.proc.80000balanced_category.json'.format(split),
              'r') as file:
        data_dic = json.load(file)
    lines = []
    for index,(_, value) in enumerate(data_dic.items()):
        lines += ['{}\n'.format( line) for line in value]
    random.shuffle(lines)
    return lines


if __name__ == '__main__':
    data_dir = 'multiple_attribute/processed'
    splits = ['test', 'valid', 'train']
    for split in tqdm(splits):
        dev_lines = open_data(split)
        with open(os.path.join(data_dir, 'category.{}'.format('{}_for_gpt2.txt'.format(split))),
                  'w') as file:
            file.writelines(dev_lines)
