import numpy as np
import pandas as pd
import scipy.sparse as sp
import accelib1 as accelib

#Calculate Training Error and Test Error
df_train = pd.read_csv("./data/YELP2018/ratings_processed_train.csv")
df_test = pd.read_csv("./data/YELP2018/ratings_processed_test.csv")
#df_whole = pd.read_csv("./data/ML20M/ratings_processed.csv")

arr1 = df_train.to_numpy()
arr2 = df_test.to_numpy()
#arr3 = df_whole.to_numpy()

num1 = arr1.shape[0]
label1 = np.ones((num1, 1))
num2 = arr2.shape[0]
label2 = np.ones((num2, 1))
#num3 = arr3.shape[0]
#label3 = np.ones((num3, 1))
#print(num1)
#print(num2)

s1 = sp.csr_matrix((np.squeeze(arr1[:, 2]), (np.squeeze(arr1[:, 0].astype(int)), 
                    np.squeeze(arr1[:, 1].astype(int)))), 
                    #shape = (117718, 26744) , dtype = "float32").todense()
                    #shape = (408160, 17770) , dtype = "float32").todense()
                    shape = (769365, 40000) , dtype = "float32").todense()
                    #shape = (865284, 40000) , dtype = "float32").todense()
s2 = sp.csr_matrix((np.squeeze(arr2[:, 2]), (np.squeeze(arr2[:, 0].astype(int)), 
                    np.squeeze(arr2[:, 1].astype(int)))), 
                    shape = (135771, 40000) , dtype = "float32").todense()
                    #shape = (152698, 40000) , dtype = "float32").todense()
#s3 = sp.csr_matrix((np.squeeze(label3), (np.squeeze(arr3[:, 0]), np.squeeze(arr3[:, 1])))).todense()

#s1 = s1[:, :2000]
#s2 = s2[:, :2000]

#Load Data
W = np.load("./model/ease_train_YELP2018.npy")

print(s1.shape)
print(s2.shape)
#print(s3.shape)
print(W.shape)
print(W.dtype)

I = np.eye(W.shape[0])

tre = accelib.gpu_block_norm(s1, I - W) / (df_train["userId"].max() + 1)
tee = accelib.gpu_block_norm(s2, I - W) / (df_test["userId"].max() + 1)
print("training error: ", tre)
print("test error: ", tee)
print("Difference: ", tee - tre)

#print(s1)
#print(s1 @ W)
