import os
from mindreadingautobots.sequence_generators import make_datasets, data_io
# build your _sparse_ boolean function here
p_bitflip = .20
signature_tuple = (0, 0, 0, 1, 1, 0, 0, 0, 0)
p_bitflip = .2
subseq_idx = [3, 4, 5, 6, 7, 9, 10, 11] 
assert len(subseq_idx) == len(signature_tuple) - 1
k = len(signature_tuple) - 1
assert len(signature_tuple) == k + 1
signature = dict(zip(range(len(signature_tuple)), signature_tuple))

# now we generate `ntrials` datasets and compute the best-possible validation accuracy of any function
n_bits = 14 # total bits in X, including label
n_train = 10000

for seed, n_val in [(2484, 40000), (2469, 30000), (2488, 20000)]:
    # generate the training set ALWAYS WITH THE SAME SEED
    X_train, Z_train, subseq_idx = make_datasets.sparse_boolean_weightbased_k_n(n_bits, k, n_train, signature, p_bitflip=p_bitflip, seed=1234, subseq_idx=subseq_idx)
    X_val, Z_val, subseq_idx = make_datasets.sparse_boolean_weightbased_k_n(n_bits, k, n_val, signature, p_bitflip=p_bitflip, seed=seed, subseq_idx=subseq_idx)


    gen_name = "counterexample_v3_" + "".join([str(i) for i in signature])
    p100 = int(p_bitflip * 100)
    suffix = f"_nbits{n_bits}_ntr{n_train}_nval{n_val}_bf{p100}_seed{seed}"
    dirname = gen_name + suffix
    if not os.path.exists(dirname):
        os.makedirs(dirname)

    train_path = f"{dirname}/train.pkl"
    val_path = f"{dirname}/val.pkl"
    data_io.save_numpy_as_dict(Z_train, train_path)
    data_io.save_numpy_as_dict(Z_val, val_path)

    noiseless_train_path = f"{dirname}/noiseless_train.pkl"
    noiseless_val_path = f"{dirname}/noiseless_val.pkl"
    data_io.save_numpy_as_dict(X_train, noiseless_train_path)
    data_io.save_numpy_as_dict(X_val, noiseless_val_path)
    print(f"Saved {train_path}, {val_path}, {noiseless_train_path}, {noiseless_val_path}")