import numpy as np
from sklearn.linear_model import Lasso

import pandas as pd

import numpy as np

def ls_gradient_descent(X, y, learning_rate=0.05, n_iterations=100):
    """
    Perform least squares regression using gradient descent.

    Parameters:
        X (numpy.ndarray): Feature matrix of shape (n_samples, n_features).
        y (numpy.ndarray): Target vector of shape (n_samples,).
        learning_rate (float): Learning rate for gradient descent.
        n_iterations (int): Number of iterations to run gradient descent.

    Returns:
        theta (numpy.ndarray): Coefficients of the regression model of shape (n_features,).
        cost_history (list): List of cost values at each iteration.
    """
    n_samples, n_features = X.shape

    # Initialize weights (theta) to zeros
    theta = np.zeros(n_features)

    # To track the cost function at each iteration
    cost_history = []

    for i in range(n_iterations):
        # Compute predictions
        y_pred = np.dot(X, theta)

        # Compute residuals
        residuals = y_pred - y

        # Compute the gradient
        gradient = (1 / n_samples) * np.dot(X.T, residuals)

        # Update theta (gradient descent step)
        theta -= learning_rate * gradient

        # Compute cost (mean squared error)
        cost = (1 / (2 * n_samples)) * np.sum(residuals ** 2)
        cost_history.append(cost)

    return theta

def least_squares_regression(X, y):
    """
    Perform Least Squares Regression.

    Parameters:
        X (numpy.ndarray): Feature matrix of shape (n_samples, n_features).
        y (numpy.ndarray): Target vector of shape (n_samples,).

    Returns:
        numpy.ndarray: Coefficients of the regression model of shape (n_features,).
    """
    # Add bias term to X
    X = np.hstack([np.ones((X.shape[0], 1)), X])
    
    # Compute the least squares solution
    coeffs = np.linalg.pinv(X.T @ X) @ X.T @ y
    return coeffs

def lasso_regression(X, y, test_x, alpha=1.0):
    """
    Perform Lasso Regression.

    Parameters:
        X (numpy.ndarray): Feature matrix of shape (n_samples, n_features).
        y (numpy.ndarray): Target vector of shape (n_samples,).
        alpha (float): Regularization strength (default: 1.0).

    Returns:
        numpy.ndarray: Coefficients of the regression model of shape (n_features,).
    """
    # Fit Lasso regression
    lasso = Lasso(alpha=alpha, fit_intercept=True, max_iter=10000)
    lasso.fit(X, y)
    
    lasso.predict(test_x)

    return lasso.predict(test_x)

def prepare_ar_samples(data, lag):
    """
    Prepare samples for fitting a multivariate AR model.

    Parameters:
        data (numpy.ndarray): Multivariate time series of shape (d, T).
        lag (int): The lag order \(\ell\) for the AR model.

    Returns:
        tuple: Tuple containing:
            - X (numpy.ndarray): Feature matrix of shape ((T-lag), d*lag).
            - y (numpy.ndarray): Target matrix of shape ((T-lag), d).
    """
    d, T = data.shape
    if lag >= T:
        raise ValueError("Lag must be smaller than the time series length T.")

    X = []
    y = []

    for t in range(lag, T):
        X.append(data[:, t-lag:t].flatten())
        y.append(data[0, t])

    return np.array(X), np.array(y)

def least_squares_predict(coeffs, X):
    """
    Predict using coefficients from a Least Squares Regression model.

    Parameters:
        coeffs (numpy.ndarray): Coefficients of the regression model of shape (n_features,).
        X (numpy.ndarray): Feature matrix of shape (n_samples, n_features - 1).

    Returns:
        numpy.ndarray: Predicted values of shape (n_samples,).
    """
    # Add bias term to X
    # X = np.hstack([np.ones((X.shape[0], 1)), X])
    return X @ coeffs


def run_baselines(test_csv_path="csv_datasets/test/data_0.csv", CTX=100, iters = [100, 200]):

    # input sequence: (d, T)
    df = pd.read_csv(test_csv_path, index_col=0, parse_dates=True)
    df = df.tail(2000)
    raw_data = []
    for i, row in df.iterrows():
        raw_data.append(row)
    raw_data = np.array(raw_data)

    data = []
    for i in range(len(raw_data)-CTX-1):
        data.append(raw_data[i:i+CTX+1])

    data = np.array(data).T.transpose(2, 0, 1)

    # ls_errors_100 = []
    ls_errors_500 = []
    ls_errors_1000 = []

    for d in data:
        Xs, Ys = prepare_ar_samples(d, 5)
        # print(Xs.shape, Xs[:-1].shape)
        # pred = lasso_regression(Xs[:-1], Ys[:-1], [Xs[-1]])
        # lasso_errors.append( (Ys[-1]-pred)**2 )

        ls = ls_gradient_descent(Xs[:-1], Ys[:-1], n_iterations=iters[0])
        pred = least_squares_predict(ls, np.array([Xs[-1]]))
        ls_errors_500.append( (pred-Ys[-1])**2 )

        ls = ls_gradient_descent(Xs[:-1], Ys[:-1], n_iterations=iters[1])
        pred = least_squares_predict(ls, np.array([Xs[-1]]))
        ls_errors_1000.append( (pred-Ys[-1])**2 )

    return np.mean(ls_errors_1000), np.mean(ls_errors_500)

# Example usage
if __name__ == "__main__":

    data = {
        "Model":[],
        "MSE":[],
        "Input Length":[]
    }

    d = 2 # o2d2 # o5d2
    for _ in range(5):
        for i in range(10, 200, 10):
            l1, l2 = run_baselines(test_csv_path = f"csv_datasets/test/strong_data_99.csv", CTX=i, iters=[10, 5])
            data["Model"].append("LS (iter=100)")
            data["MSE"].append(l1)
            data["Input Length"].append(i)

            data["Model"].append("LS (iter=50)")
            data["MSE"].append(l2)
            data["Input Length"].append(i)

    df = pd.DataFrame(data)
    df.to_csv(f"test_lsr_strong_5.csv")