from collections import OrderedDict
import re
import os

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import torch
from tqdm.notebook import tqdm
import numpy as np
import json
import pickle
import sys

sys.path.append('/root/weiminwu/in-context-learning-fork/in-context-learning/src/')

from eval import get_run_metrics, read_run_dir, get_model_from_run
from plot_utils import basic_plot, collect_results, relevant_model_names
from models import get_relevant_baselines, GDModel, DecisionTreeModel
from base_models import NeuralNetwork, ParallelNetworks

from samplers import get_data_sampler
from tasks import get_task_sampler


run_dir = "../models"
task = "decision_tree"
run_id = "6c67f151-9ba6-4eb1-84a0-b439a71c4545"
run_path = os.path.join(run_dir, task, run_id)

save_path = '/root/weiminwu/in-context-learning-fork/in-context-learning/src/evaluation/results/DT/exp_2'

def Square_Error(ys, pred):
    y_mean = torch.mean(ys)
    SS_tot = torch.sum((ys - y_mean) ** 2)
    SS_res = torch.sum((ys - pred) ** 2)
    R_square = 1 - SS_res / SS_tot
    return R_square

model, conf = get_model_from_run(run_path)

n_dims = conf.model.n_dims
batch_size = conf.training.batch_size
data_sampler = get_data_sampler(conf.training.data, n_dims)
task_sampler = get_task_sampler(
    conf.training.task,
    n_dims,
    batch_size,
    **conf.training.task_kwargs
)

task = task_sampler()

# Part1: 100% accurate
n_batches = 3
prompt_length = 76

# 20 percentage accuracy

actual_points_20_random = [[] for _ in range(prompt_length)]
predicted_points_20_random = [[] for _ in range(prompt_length)]
with open(save_path+'/data.pkl', 'rb') as file:
    data = pickle.load(file)
xs_list = data['x']
ys_list = data['y']
for time_idx in tqdm(range(n_batches)):
    xs_list[time_idx] = torch.from_numpy(xs_list[time_idx])
    ys_list[time_idx] = torch.from_numpy(ys_list[time_idx])
# Generate data and perform the experiment
i = 0
for batch_idx in tqdm(range(n_batches)):
    print(batch_idx)
    i += 1
    xs = xs_list[batch_idx]
    ys = ys_list[batch_idx]

    # j's idx starts at 1, but in graph, we refer it to 0
    pred = model(xs, ys)
    actual_points_20_random[0].extend(ys[:, 0])
    predicted_points_20_random[0].extend(pred[:, 0])
    
    for j in range(1, prompt_length):
        permuted_ys = np.copy(ys)
        if j > 2:  # if j == 1, 2, there are no prior labels or no need to permuted
            for i in range(batch_size):
                num_elements_to_shuffle = int(j * 0.20)
                indices_to_shuffle = np.random.choice(j, num_elements_to_shuffle, replace=False)
                elements_to_shuffle = permuted_ys[i, indices_to_shuffle]
                np.random.shuffle(elements_to_shuffle)
                permuted_ys[i, indices_to_shuffle] = elements_to_shuffle
                
        # Transfer np array to tensor
        permuted_ys_tensor = torch.from_numpy(permuted_ys)
        # predict with the si
        with torch.no_grad():
            pred = model(xs, permuted_ys_tensor)
        
        actual_points_20_random[j].extend(ys[:, j])
        predicted_points_20_random[j].extend(pred[:, j])

    random_20p = []

    for point_idx in range(prompt_length):
        actual = torch.tensor(actual_points_20_random[point_idx])
        predicted = torch.tensor(predicted_points_20_random[point_idx])
        R_square = Square_Error(actual, predicted)
        random_20p.append(R_square)

    with open(save_path+'/20p_random.txt', 'w') as f:
        for value in random_20p:
            f.write(f"{value}\n")