import numpy as np
import cv2 as cv
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
import os
import random
import matplotlib.pyplot as plt
import seaborn as sns

np.random.seed(42)
random.seed(42)

def process_data(path):
    data = []
    label = []
    all_folders = os.listdir(path)
    for idx, folder in enumerate(all_folders):
        numbers = os.listdir(os.path.join(path, folder))
        for number in numbers:
            img = cv.imread(os.path.join(path, folder, number), 0)
            img = img.reshape(-1)
            data.append(img)
            label.append(idx)
    return np.array(data), np.array(label)

def train_and_test():
    train_data, train_label = process_data('../MNIST/transformed/TRAIN/')
    test_data, test_label = process_data('../MNIST/transformed/TEST/')

    # Train Logistic Regression model with L1(lasso) regularization
    model = LogisticRegression(penalty='l1', solver='liblinear', C=1.0)
    model.fit(train_data, train_label)

    predictions = model.predict(test_data)
    accuracy = accuracy_score(test_label, predictions)
    print(f'Test accuracy: {accuracy:.4f}')

    # Calculate total number of parameters (coefficients + intercept)
    n_features = train_data.shape[1]  # Number of features (flattened image size)
    n_classes = len(np.unique(train_label))  # Number of classes
    total_params = n_features * n_classes + n_classes  # Parameters: weights + bias
    print(f'Total number of parameters: {total_params}')

    # Confusion matrix
    cm = confusion_matrix(test_label, predictions)

    plt.figure(figsize=(10, 7))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False, xticklabels=np.unique(test_label), yticklabels=np.unique(test_label))
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.savefig('confusion_matrix.png')

if __name__ == "__main__":
    train_and_test()
