import h5py
from NLPTasks_wo_SuperGLUE import *
from MetaPreTrainingDataset import get_meta_task
from transformers import T5TokenizerFast
import numpy as np
from MetaPreTrainingDataset import MetaH5Construction, MetaH5NLPTaskDynamic
import os

tokenizer = T5TokenizerFast.from_pretrained("t5-large")

dt = h5py.special_dtype(vlen=np.ushort)

folder = "meta_task_h5_wo_SuperBLUE/"
os.makedirs(folder, exist_ok=True)

working_tasks = {}
working_tasks.update(TASK_NAME_TO_CLS)

for task_name in working_tasks:
	cls_method = working_tasks[task_name]
	task = cls_method(task_name)
	task.load_data()
	print("Working for %s" % task.task_name)
	meta_validation_task = MetaH5Construction(task, "validation", tokenizer)
	meta_train_task = MetaH5Construction(task, "train", tokenizer)


	print("Labels: ", meta_train_task.key_list)
	output_h5 = h5py.File(os.path.join(folder, task.task_name + ".h5"), "w")

	train_instance_count_dset = output_h5.create_dataset("train_instance_count", (1,), np.int64)
	train_instance_count_dset[0] = len(meta_train_task.split_data_list)
	for key in meta_train_task.key_list:
		key_ids = output_h5.create_dataset("keyids_%s" % key, (1,), dt)
		key_ids[0] = meta_train_task.key2ids[key]
		train_key_value = output_h5.create_dataset("train_key_%s_values" % key, (len(meta_train_task.split_data_list),), dt)
		for i in range(len(meta_train_task.split_data_list)):
			train_key_value[i] = meta_train_task.split_data_list[i][key]

	val_instance_count_dset = output_h5.create_dataset("validation_instance_count", (1,), np.int64)
	val_instance_count_dset[0] = len(meta_validation_task.split_data_list)
	for key in meta_validation_task.key_list:
		train_key_value = output_h5.create_dataset("validation_key_%s_values" % key, (len(meta_validation_task.split_data_list),), dt)
		for i in range(len(meta_validation_task.split_data_list)):
			train_key_value[i] = meta_validation_task.split_data_list[i][key]

	output_h5.close()







