import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from supplementary.util import clt_confidence_interval
from supplementary.util import (
    conformal_interval_quantile,
    power_interval_quantile,
    ppipp_interval_quantile,
    fab_interval_quantile,
    classical_interval_quantile,
)
import xgboost as xgb
import matplotlib.pyplot as plt
import os


def generate_mock_data(n_samples, noise_std):
    np.random.seed(42)
    X = np.random.uniform(0, 1, size=(n_samples, 1))
    Y = np.sin(np.pi * X).ravel() + np.random.normal(0, noise_std, size=n_samples)
    return X, Y

ALPHA = 0.05                # nível do IC
CI_CONSTRUCTOR = clt_confidence_interval
BIG_M = 1e99
q = 0.5
M = 1.0

ind = lambda y, theta: (y - theta <= 0).astype(int)
psi = lambda y, theta: ind(y, theta) - q


n_cal = 1000
n_test = 10000
n_train = 10000
n_samples = n_cal + n_test + n_train


noise_stds = np.linspace(0.01, 1.0, 15)
errs = np.linspace(0.001, 0.499, 60)
widths_grid = np.full((len(noise_stds), len(errs)), np.nan, dtype=float)


for i, ns in enumerate(tqdm(noise_stds, desc="noise_std")):
    X, Y = generate_mock_data(n_samples=n_samples, noise_std=ns)

    X_train, X_temp, Y_train, Y_temp = train_test_split(
        X, Y,
        train_size=n_train,
        random_state=0
    )

    X_cal, X_test, Y_cal, Y_test = train_test_split(
        X_temp, Y_temp,
        test_size=n_test,   # absoluto (inteiro)
        random_state=0
    )

    model = xgb.XGBRegressor()
    model.fit(X_train, Y_train)

    Yhat_cal  = model.predict(X_cal)
    Yhat_test = model.predict(X_test)

    scores_cal = np.abs(Y_cal - Yhat_cal)

    theta_min = float(Y_temp.min())
    theta_max = float(Y_temp.max())
    thetas = np.linspace(theta_min, theta_max, 200)

    for j, err in enumerate(errs):
        CPPI = conformal_interval_quantile(psi=psi,scores_cal=scores_cal,err=err,Yhat_test=Yhat_test,thetas=thetas,ci_constructor=CI_CONSTRUCTOR,alpha=ALPHA,M=M)
        if CPPI is not None and len(CPPI) >= 2:
            widths_grid[i, j] = float(CPPI[-1] - CPPI[0])
        else:
            widths_grid[i, j] = np.nan



E, N = np.meshgrid(errs, noise_stds)  # E -> eixo X (err), N -> eixo Y (noise_std)

plt.figure(figsize=(8, 5))
cf = plt.contourf(E, N, widths_grid, levels=20)
cbar = plt.colorbar(cf)
cbar.set_label('Largura do IC (sup C - inf C)')

plt.xlabel('err')
plt.ylabel('noise_std')
plt.title('Largura do intervalo CPPI em função de noise_std e err')
plt.tight_layout()
plt.show()

noise_std_escolhidos = [0.01, 0.1, 0.5, 1.0]

plt.figure(figsize=(8, 5))
for val in noise_std_escolhidos:
    idx = int(np.argmin(np.abs(noise_stds - val)))  # pega o mais próximo na grade
    plt.plot(errs, widths_grid[idx, :], marker='o', linewidth=1.2,
             label=f'noise_std≈{noise_stds[idx]:.2f}')

plt.xlabel('err')
plt.ylabel('Largura do IC (sup C - inf C)')
plt.title('CPPI: largura vs err para ruídos selecionados')
plt.grid(True)
plt.legend(title='Séries (σ)')
plt.tight_layout()
plt.show()