import numpy as np
import pandas as pd
import scipy.sparse as sp
from math import exp, log, sqrt
import cupy as cp

import sys
sys.path.append('../../../')    # Change to the path where accelib1.py locates
import accelib1 as accelib
import time

start = time.time()

df_X = pd.read_csv("./ratings_processed_test1.csv")
df_Y = pd.read_csv("./ratings_processed_test2.csv")
df_R = pd.read_csv("./ratings_processed.csv")

arr_X = df_X.to_numpy()
arr_Y = df_Y.to_numpy()
arr_R = df_R.to_numpy()


X = sp.csr_matrix((np.squeeze(arr_X[:, 2]), (np.squeeze(arr_X[:, 0].astype(int)),
                    np.squeeze(arr_X[:, 1].astype(int)))),
                    shape = (41549, 26744), dtype = "float64").todense() # ML20M1
                    #shape = (408160, 17770), dtype = "float32").todense() # NETFLIX
                    #shape = (769365, 40000), dtype = "float32").todense() # YELP2018
                    #shape = (865284, 40000), dtype = "float32").todense() # MSD

Y = sp.csr_matrix((np.squeeze(arr_Y[:, 2]), (np.squeeze(arr_Y[:, 0].astype(int)),
                    np.squeeze(arr_Y[:, 1].astype(int)))),
                    shape = (41549, 26744),dtype = "float64").todense() # ML20M1
                    #shape = (408160, 17770), dtype = "float32").todense() # NETFLIX
                    #shape = (769365, 40000), dtype = "float32").todense() # YELP2018
                    #shape = (865284, 40000), dtype = "float32").todense() # MSD

R = sp.csr_matrix((np.squeeze(arr_R[:, 2]), (np.squeeze(arr_R[:, 0].astype(int)),
                    np.squeeze(arr_R[:, 1].astype(int)))), 
                    shape = (138493, 26744) , dtype = "float64").todense()
                    #shape = (72029, 17770) , dtype = "float32").todense()
                    #shape = (135771, 40000) , dtype = "float32").todense()
                    #shape = (152698, 40000) , dtype = "float32").todense()

#Y2 = sp.csr_matrix((np.squeeze(arr_Y2[:, 2]), (np.squeeze(arr_Y2[:, 0].astype(int)),
#                    np.squeeze(arr_Y2[:, 1].astype(int)))),
#                    shape = (20775, 26744) , dtype = "float64").todense()
                    #shape = (72029, 17770) , dtype = "float32").todense()
                    #shape = (135771, 40000) , dtype = "float32").todense()
                    #shape = (152698, 40000) , dtype = "float32").todense()

#print(X1.shape)


#R = R[0:int(R.shape[0] * 0.8), :]

p = 0.5
p2 = p * p
p3 = p * (1 - p)
p4 = (1 - p) * (1 - p)

XX = accelib.gpu_block_matmul(X) / X.shape[0]
np.save("./matrix1/XX.npy", XX)
print("XX Finished")

YX = accelib.gpu_block_matmulxy1(Y.T, X) / X.shape[0]
np.save("./matrix1/YX.npy", YX)
print("YX Finished")

RR = accelib.gpu_block_matmul(R) / R.shape[0]
diag_RR = np.diag(RR)

Sxx = p2 * RR
np.fill_diagonal(Sxx, p * diag_RR)
np.save("./matrix1/Sxx.npy", Sxx)
print("Sxx Finished")

Syy = p4 * RR
np.fill_diagonal(Syy, (1 - p) * diag_RR)
np.save("./matrix1/Syy.npy", Syy)
print("Syy Finished")

Syx = p3 * RR
np.fill_diagonal(Syx, 0 * diag_RR)

print(Sxx)
print(Syx)
print(Syy)

print(XX)
print(YX)
del XX
del YX

Sxx_gpu = cp.asarray(Sxx)
L_gpu, S_gpu = cp.linalg.eigh(Sxx_gpu)

for i in range(len(L_gpu)):
    if L_gpu[i] < 0:
        L_gpu[i] = 0.00001

L_h_gpu = cp.sqrt(L_gpu)
L_mh_gpu = 1 / L_h_gpu
del Sxx_gpu

Q_h_gpu = (S_gpu * L_h_gpu) @ S_gpu.T
Q_h = cp.asnumpy(Q_h_gpu)      # Sxx^{1/2}
#del Q_h_gpu
Q_mh_gpu = (S_gpu * L_mh_gpu) @ S_gpu.T      # Sxx^{-1/2}
np.save("./matrix1/Q_h.npy", Q_h)
print("Q_h Finished")

Syx_gpu = cp.asarray(Syx)
B_gpu = Syx_gpu @ Q_mh_gpu
B = cp.asnumpy(B_gpu)
print(B)
B1_gpu = B_gpu @ Q_mh_gpu
B1 = cp.asnumpy(B1_gpu)
print(B1)
del Syx_gpu
del B_gpu
del Q_mh_gpu
del B1_gpu
np.save("./matrix1/B.npy", B)
print("B Finished")
np.save("./matrix1/B1.npy", B1)
print("B1 Finished")


