import pandas as pd
import os, glob, json, yaml, io


root = './data'
datasets_dir = 'TPP'

root = os.path.join(root, datasets_dir)
# datasets = os.listdir(root)
datasets = ['volcano']


for dataset in datasets:
    print(f'Processing {dataset}...')
    dataset_dir = os.path.join(root, dataset)

    # load the property card
    f_property = open(os.path.join(dataset_dir, 'dataset_card.yml'), 'r')
    dataset_card = yaml.safe_load(f_property)
    f_property.close()

    file_names = [os.path.basename(item) for item in glob.glob(dataset_dir + f'/*.json')]
    max_time = 0
    
    for file_name in file_names:
        f_data = open(os.path.join(dataset_dir, file_name), 'r')
        dataset = json.load(f_data)
        f_data.close()

        df_dataset = pd.DataFrame.from_dict(dataset)
        list_time_seq = df_dataset.time_seq.tolist()
        del df_dataset

        max_time_of_this_dataset = 0
        number_to_events = 0
        for item in list_time_seq:
            number_to_events += len(item)

        dataset_card['num_of_events_' + file_name.split('.')[0]] = number_to_events
        dataset_card[file_name.split('.')[0] + '_size'] = len(list_time_seq)


    with io.open(os.path.join(dataset_dir, 'dataset_card.yml'), 'w', encoding = 'utf8') as outfile:
        yaml.dump(dataset_card, outfile, default_flow_style=False, allow_unicode=True)