for i in range(num_eval_examples // batch_size):
    xs, xs_p = generating_func(data_sampler, n_points, batch_size)
    task = task_sampler()
    device = "cuda"

    ys = task.evaluate(xs)
    break

# need to internally permute these
rand_permuted_xs = copy.deepcopy(xs)
rand_permuted_ys = copy.deepcopy(ys)
for batch_idx in range(64):
    rand_permutation = torch.randperm(40)
    rand_permuted_xs[batch_idx, :-1] = xs[batch_idx, :-1][rand_permutation]
    rand_permuted_ys[batch_idx, :-1] = ys[batch_idx, :-1][rand_permutation]

batch_idx = 0
my_x = copy.deepcopy(xs[batch_idx, :-1])
rand_permutation = torch.randperm(40)
shuffled_x = my_x[rand_permutation]


pred = model(xs.to(device), ys.to(device)).detach()

# but this is prediction for every example while varying number of in-context examples
sq_error = (ys.cpu() - pred.cpu()).square()
# pick only the prediction with 40 in-context examples
final_pred_sq_error = sq_error[:, -1]


xs, xs_p = generating_func(data_sampler, n_points, batch_size)
task = task_sampler()
ys = task.evaluate(xs)

old_df_model = pd.DataFrame(model_error_list)

old_pred = model(xs.to(device), ys.to(device)).detach()
xs_1 = copy.deepcopy(xs)
xs_2 = copy.deepcopy(xs)
xs_3 = copy.deepcopy(xs)
xs_4 = copy.deepcopy(xs)
xs_5 = copy.deepcopy(xs)
xs_6 = copy.deepcopy(xs)
ys_5 = copy.deepcopy(ys)
# screw up one example in each
for C in [0, 2, 2.5, 3, 4, 5]:
    xs_1 = copy.deepcopy(xs)
    xs_2 = copy.deepcopy(xs)
    xs_3 = copy.deepcopy(xs)
    xs_4 = copy.deepcopy(xs)
    xs_5 = copy.deepcopy(xs)
    xs_6 = copy.deepcopy(xs)

    print("Repeating experiment with C={}".format(C))
    for batch_idx in range(64):
        xs_1[batch_idx, 0, :] = C * xs_1[batch_idx, 0, :]
        ys_1 = task.evaluate(xs_1)
        xs_2[batch_idx, 20, :] = C * xs_2[batch_idx, 20, :]
        ys_2 = task.evaluate(xs_2)
        xs_3[batch_idx, 39, :] = C * xs_3[batch_idx, 39, :]
        ys_3 = task.evaluate(xs_3)
        xs_4[batch_idx, :-1, :] = C * xs_4[batch_idx, :-1, :]
        ys_4 = task.evaluate(xs_4)
        # just randomize the outputs (this should not work)
        new_task = task_sampler()
        ys_5[batch_idx, :-1] = new_task.evaluate(xs_5)[batch_idx, :-1]
        ys_5[batch_idx, -1] = ys[batch_idx, -1]
        xs_6[batch_idx, -1, :] = C * xs_6[batch_idx, -1, :]
        ys_6 = task.evaluate(xs_6)

    pred_1 = model(xs_1.to(device), ys_1.to(device)).detach()
    pred_2 = model(xs_2.to(device), ys_2.to(device)).detach()
    pred_3 = model(xs_3.to(device), ys_3.to(device)).detach()
    pred_4 = model(xs_4.to(device), ys_4.to(device)).detach()
    pred_5 = model(xs_5.to(device), ys_5.to(device)).detach()
    pred_6 = model(xs_6.to(device), ys_6.to(device)).detach()

    pred_list = [old_pred, pred_1, pred_2, pred_3, pred_4, pred_5, pred_6]
    xs_list = [xs, xs_1, xs_2, xs_3, xs_4, xs_5, xs_6]
    ys_list = [ys, ys_1, ys_2, ys_3, ys_4, ys_5, ys_6]
    for idx, pred in enumerate(pred_list):
        sq_error = (ys_list[idx].cpu() - pred.cpu()).square()
        final_pred_sq_error = sq_error[:, -1].mean()
        print("Experiment {} | Mean sq_error = {}".format(idx, final_pred_sq_error))

exp_descriptions = """
# Experiment Descriptions
- Experiment 0: Vanilla Tspiras setting with fully trained GPT2
- Experiment 1: Replace 1st element of Prefix x with C * x (1st element stable)
- Experiment 2: Replace 21st element of Prefix x with C * x (middle stable)
- Experiment 3: Replace 40th element of Prefix x with C * x (end stable)
- Experiment 4: Replace all elements of Prefix x with C * x (scaling stable)
- Experiment 5: Replace all labels of Prefix with a different linear function (does the label matter)
- Experiment 6: Replace query element of Prefix x with C * x (query stable)
"""
print(exp_descriptions)