from collections import OrderedDict
import re
import os

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import torch
from tqdm.notebook import tqdm
import numpy as np
import json
import pickle
import sys

sys.path.append('/root/weiminwu/in-context-learning-fork/in-context-learning/src/')

from eval import get_run_metrics, read_run_dir, get_model_from_run
from plot_utils import basic_plot, collect_results, relevant_model_names
from models import get_relevant_baselines, GDModel, DecisionTreeModel
from base_models import NeuralNetwork, ParallelNetworks

from samplers import get_data_sampler
from tasks import get_task_sampler


run_dir = "../models"
task = "decision_tree"
run_id = "6c67f151-9ba6-4eb1-84a0-b439a71c4545"
run_path = os.path.join(run_dir, task, run_id)

save_path = '/root/weiminwu/in-context-learning-fork/in-context-learning/src/evaluation/results/DT/exp_2'

def Square_Error(ys, pred):
    y_mean = torch.mean(ys)
    SS_tot = torch.sum((ys - y_mean) ** 2)
    SS_res = torch.sum((ys - pred) ** 2)
    R_square = 1 - SS_res / SS_tot
    return R_square

model, conf = get_model_from_run(run_path)

n_dims = conf.model.n_dims
batch_size = conf.training.batch_size
data_sampler = get_data_sampler(conf.training.data, n_dims)
task_sampler = get_task_sampler(
    conf.training.task,
    n_dims,
    batch_size,
    **conf.training.task_kwargs
)

task = task_sampler()

# Part1: 100% accurate
n_batches = 3
prompt_length = 76
# # save xs and ys for the base_line
xs_list = [] 
ys_list = [] 
actual_points_1 = [[] for _ in range(prompt_length)]
predicted_points_1 = [[] for _ in range(prompt_length)]
# Generate data and perform the experiment
for _ in tqdm(range(n_batches)):

    xs = data_sampler.sample_xs(b_size=batch_size, n_points=prompt_length)
    xs_list.append(np.array(xs))
    ys = task.evaluate(xs)
    ys_list.append(np.array(ys))

data = {'x':xs_list, 'y':ys_list}
with open(save_path+'/data.pkl', 'wb') as pkl_file:
    pickle.dump(data, pkl_file)
    
with open(save_path+'/data.pkl', 'rb') as file:
    data = pickle.load(file)
xs_list = data['x']
ys_list = data['y']
for time_idx in tqdm(range(n_batches)):
    xs_list[time_idx] = torch.from_numpy(xs_list[time_idx])
    ys_list[time_idx] = torch.from_numpy(ys_list[time_idx])

for time_idx in tqdm(range(n_batches)):
    xs = xs_list[time_idx]
    ys = ys_list[time_idx]
    
    with torch.no_grad():
        pred = model(xs, ys)
    for j in range(prompt_length):
        actual_points_1[j].extend(ys[:, j])
        predicted_points_1[j].extend(pred[:, j])

w_1_error = []

for point_idx in range(prompt_length):
    actual = torch.tensor(actual_points_1[point_idx])
    predicted = torch.tensor(predicted_points_1[point_idx])
    R_square = Square_Error(actual, predicted)
    w_1_error.append(R_square)

with open(save_path+'/w_1.txt', 'w') as f:
    for value in w_1_error:
        f.write(f"{value}\n")
        
        
# 10% percentage accuracy

actual_points_10_random = [[] for _ in range(prompt_length)]
predicted_points_10_random = [[] for _ in range(prompt_length)]
with open(save_path+'/data.pkl', 'rb') as file:
    data = pickle.load(file)
xs_list = data['x']
ys_list = data['y']
for time_idx in tqdm(range(n_batches)):
    xs_list[time_idx] = torch.from_numpy(xs_list[time_idx])
    ys_list[time_idx] = torch.from_numpy(ys_list[time_idx])
# Generate data and perform the experiment
i = 0
print("start running")
for batch_idx in tqdm(range(n_batches)):
    print(batch_idx)
    i += 1
    xs = xs_list[batch_idx]
    ys = ys_list[batch_idx]

    # j's idx starts at 1, but in graph, we refer it to 0
    pred = model(xs, ys)
    actual_points_10_random[0].extend(ys[:, 0])
    predicted_points_10_random[0].extend(pred[:, 0])
    
    for j in range(1, prompt_length):
        permuted_ys = np.copy(ys)
        if j > 2:  # if j == 1, 2, there are no prior labels or no need to permuted
            for i in range(batch_size):
                num_elements_to_shuffle = int(j * 0.10)
                indices_to_shuffle = np.random.choice(j, num_elements_to_shuffle, replace=False)
                elements_to_shuffle = permuted_ys[i, indices_to_shuffle]
                np.random.shuffle(elements_to_shuffle)
                permuted_ys[i, indices_to_shuffle] = elements_to_shuffle
                
        # Transfer np array to tensor
        permuted_ys_tensor = torch.from_numpy(permuted_ys)
        # predict with the si
        with torch.no_grad():
            pred = model(xs, permuted_ys_tensor)
        
        actual_points_10_random[j].extend(ys[:, j])
        predicted_points_10_random[j].extend(pred[:, j])

    random_10p = []

    for point_idx in range(prompt_length):
        actual = torch.tensor(actual_points_10_random[point_idx])
        predicted = torch.tensor(predicted_points_10_random[point_idx])
        R_square = Square_Error(actual, predicted)
        random_10p.append(R_square)

    with open(save_path+'/10p_random.txt', 'w') as f:
        for value in random_10p:
            f.write(f"{value}\n")

# # 20 percentage accuracy

# actual_points_20_random = [[] for _ in range(prompt_length)]
# predicted_points_20_random = [[] for _ in range(prompt_length)]
# with open(save_path+'/data.pkl', 'rb') as file:
#     data = pickle.load(file)
# xs_list = data['x']
# ys_list = data['y']
# for time_idx in tqdm(range(n_batches)):
#     xs_list[time_idx] = torch.from_numpy(xs_list[time_idx])
#     ys_list[time_idx] = torch.from_numpy(ys_list[time_idx])
# # Generate data and perform the experiment
# i = 0
# for batch_idx in tqdm(range(n_batches)):
#     print(batch_idx)
#     i += 1
#     xs = xs_list[batch_idx]
#     ys = ys_list[batch_idx]

#     # j's idx starts at 1, but in graph, we refer it to 0
#     pred = model(xs, ys)
#     actual_points_20_random[0].extend(ys[:, 0])
#     predicted_points_20_random[0].extend(pred[:, 0])
    
#     for j in range(1, prompt_length):
#         permuted_ys = np.copy(ys)
#         if j > 2:  # if j == 1, 2, there are no prior labels or no need to permuted
#             for i in range(batch_size):
#                 num_elements_to_shuffle = int(j * 0.20)
#                 indices_to_shuffle = np.random.choice(j, num_elements_to_shuffle, replace=False)
#                 elements_to_shuffle = permuted_ys[i, indices_to_shuffle]
#                 np.random.shuffle(elements_to_shuffle)
#                 permuted_ys[i, indices_to_shuffle] = elements_to_shuffle
                
#         # Transfer np array to tensor
#         permuted_ys_tensor = torch.from_numpy(permuted_ys)
#         # predict with the si
#         with torch.no_grad():
#             pred = model(xs, permuted_ys_tensor)
        
#         actual_points_20_random[j].extend(ys[:, j])
#         predicted_points_20_random[j].extend(pred[:, j])

#     random_20p = []

#     for point_idx in range(prompt_length):
#         actual = torch.tensor(actual_points_20_random[point_idx])
#         predicted = torch.tensor(predicted_points_20_random[point_idx])
#         R_square = Square_Error(actual, predicted)
#         random_20p.append(R_square)

#     with open(save_path+'/20p_random.txt', 'w') as f:
#         for value in random_20p:
#             f.write(f"{value}\n")
        
# Baseline
baselines = {
        "relu_2nn_regression": [
            (
                GDModel,
                {
                    "model_class": NeuralNetwork,
                    "model_class_args": {
                        "in_size": 20,
                        "hidden_size": 100,
                        "out_size": 1,
                    },
                    "opt_alg": "adam",
                    "batch_size": 64,
                    "lr": 5e-3,
                    "num_steps": 100,
                },
            ),
        ],
        "decision_tree": [
            (DecisionTreeModel, {"max_depth": 4}),
            # (DecisionTreeModel, {"max_depth": None}),
        ],
    }

baseline = [model_cls(**kwargs) for model_cls, kwargs in baselines[conf.training.task]]

actual_points_base = [[] for _ in range(prompt_length)]
predicted_points_base = [[] for _ in range(prompt_length)]
all_errors_base = []

with open(save_path+'/data.pkl', 'rb') as file:
    data = pickle.load(file)
xs_list = data['x']
ys_list = data['y']
for time_idx in tqdm(range(n_batches)):
    xs_list[time_idx] = torch.from_numpy(xs_list[time_idx])
    ys_list[time_idx] = torch.from_numpy(ys_list[time_idx])

for time_idx in tqdm(range(n_batches)):
    xs = xs_list[time_idx]
    ys = ys_list[time_idx]
    pred = baseline[0](xs, ys)
    
    for j in range(prompt_length):
        actual_points_base[j].extend(ys[:, j])
        predicted_points_base[j].extend(pred[:, j])
        
base_error = []

for point_idx in range(prompt_length):
    actual = torch.tensor(actual_points_base[point_idx])
    predicted = torch.tensor(predicted_points_base[point_idx])
    R_square = Square_Error(actual, predicted)
    base_error.append(R_square)
    
with open(save_path+'/baseline.txt', 'w') as f:
    for value in base_error:
        f.write(f"{value}\n")
