import numpy as np
import h5py

def denoise_nasbench(metrics, threshold=0.8):
    val_metrics = metrics[:, -1, :, -1, 2]
    index = np.where(val_metrics[:, 0] > threshold)
    return index[0]

for i in range(200):
    seed = i

    with h5py.File("datasets/nasbench101/nasbench.hdf5", mode="r") as f:
        total_count = len(f["hash"][()])
        metrics = f["metrics"][()]
    random_state = np.random.RandomState(seed)
    result = dict()
    split_list = [100, 172, 334, 860, 423, 424, 4236, 42362, 127087, 211812, 296537, 381262, 200, 300, 1000, 10, 2118]
    for n_samples in split_list:
        split = random_state.permutation(total_count)[:n_samples]
        result[str(n_samples)] = split


    result["denoise-80"] = denoise_nasbench(metrics)
    np.savez("datasets/nasbench101/train_samples_seed"+str(i)+".npz", **result)

