import numpy as np
import pandas as pd
import scipy.sparse as sp
from math import exp, log, sqrt
import cupy as cp
import accelib1 as accelib


def recall_at_k(predictions: np.ndarray, ground_truth: np.ndarray, k: int) -> float:
    assert predictions.shape == ground_truth.shape, "Arrays must be of the same size"

    top_k_indices = np.argsort(predictions)[-k:][::-1]
    relevant_indices = np.where(ground_truth > 0)[0]
    relevant_at_k = len(set(top_k_indices) & set(relevant_indices))
    total_relevant = len(relevant_indices)
    
    return relevant_at_k / total_relevant if total_relevant > 0 else 0.0


def averaged_recall_at_k(predictions: np.ndarray, ground_truth: np.ndarray, k: int) -> float:

    assert predictions.shape == ground_truth.shape, "Matrices must be of the same size"
    
    recalls = []
    for i in range(predictions.shape[0]):
        recall = recall_at_k(predictions[i], ground_truth[i], k)
        recalls.append(recall)
    
    return np.mean(recalls)


def ndcg_at_k(predictions: np.ndarray, ground_truth: np.ndarray, k: int) -> float:

    assert predictions.shape == ground_truth.shape, "Arrays must be of the same size"

    top_k_indices = np.argsort(predictions)[-k:][::-1]
    relevance_at_k = ground_truth[top_k_indices]
    dcg_at_k = np.sum(relevance_at_k / np.log2(np.arange(2, k + 2)))
    ideal_relevance_at_k = np.sort(ground_truth)[-k:][::-1]
    idcg_at_k = np.sum(ideal_relevance_at_k / np.log2(np.arange(2, k + 2)))
    ndcg = dcg_at_k / idcg_at_k if idcg_at_k > 0 else 0.0

    return ndcg


def averaged_ndcg_at_k(predictions: np.ndarray, ground_truth: np.ndarray, k: int) -> float:

    assert predictions.shape == ground_truth.shape, "Matrices must be of the same size"

    ndcgs = []
    for i in range(predictions.shape[0]):
        ndcg = ndcg_at_k(predictions[i], ground_truth[i], k)
        ndcgs.append(ndcg)

    return np.mean(ndcgs)


df_X = pd.read_csv("../data/ML20M1/ratings_processed_test1.csv")
df_Y = pd.read_csv("../data/ML20M1/ratings_processed_test2.csv")

arr_X = df_X.to_numpy()
arr_Y = df_Y.to_numpy()

X = sp.csr_matrix((np.squeeze(arr_X[:, 2]), (np.squeeze(arr_X[:, 0].astype(int)),
                    np.squeeze(arr_X[:, 1].astype(int)))),
                    shape = (41549, 26744), dtype = "float32").todense() # ML20M1
                    #shape = (144058, 17770), dtype = "float32").todense() # NETFLIX
                    #shape = (769365, 40000), dtype = "float32").todense() # YELP2018
                    #shape = (305396, 40000), dtype = "float32").todense() # MSD

Y = sp.csr_matrix((np.squeeze(arr_Y[:, 2]), (np.squeeze(arr_Y[:, 0].astype(int)),
                    np.squeeze(arr_Y[:, 1].astype(int)))),
                    shape = (41549, 26744), dtype = "float32").todense() # ML20M1
                    #shape = (144058, 17770), dtype = "float32").todense() # NETFLIX
                    #shape = (769365, 40000), dtype = "float32").todense() # YELP2018
                    #shape = (305396, 40000), dtype = "float32").todense() # MSD

W = np.load("../model2/ease_train_ML20M1_50.npy")

XW = accelib.gpu_block_matmulxy1(X, W)

#print(type(XW))
#print(type(Y))
#print(XW)

Y = np.asarray(Y)

print(type(Y))
#print(Y.shape)

k = 50
average_recall = averaged_recall_at_k(XW, Y, k)
print(average_recall)

k = 100
average_ndcg = averaged_ndcg_at_k(XW, Y, k)
print(average_ndcg)
