# -*- coding: utf-8 -*-
"""1.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1o61PJX_-lYejQnAkdQ-4Mza3QskMq8WK
"""

import math
import io
import os
from random import sample
import multiprocessing as mp
from multiprocessing import Pool, freeze_support
from concurrent.futures import ThreadPoolExecutor
import numpy as np
import pandas as pd
import time
from sklearn.metrics import ndcg_score
from multiprocessing import Pool, cpu_count
import numba
import random
from numba import njit
np.random.seed(0)
random.seed(10)

'''Training data'''

#Read data from file
ratings_df = pd.read_csv("/home/kiran/Downloads/MTP/Experiment1/ml-1m/Age/Train/ml1m_age_train.csv")
ratings_df.reset_index(drop=True,inplace=True)

#create users list
users = ratings_df['userid']
users = list(set(np.array(users.values)))
users = np.array(users)

#store user and item count
N = len(set(ratings_df.userid))
print("No. of users: ",N)
M = len(set(ratings_df.movieid))
print("No. of items: ",M)

#Latent dimensions
K = 20

#Pivot the dataset
ratings_df = ratings_df.pivot(index =  'userid' , columns = 'movieid' , values = 'rating').fillna(0)
R = np.array(ratings_df)

'''For sampling'''
tau_f = 5
tau = int((N*tau_f)/100)
print("No. of users permitted for 1 communication round: ",tau)

'''Demographic split'''

#under 18
u_df = pd.read_csv('/home/kiran/Downloads/MTP/Experiment1/ml-1m/Age/Train/ml1m_age_train_under18.csv')
u_df = u_df[['userid']]
new_header = u_df.iloc[0] #grab the first row for the header
u_df = u_df[1:] #take the data less the header row
u_df.columns = new_header #set the header row as the df header
v = []
vals = u_df.values
for [i] in vals:
    x = i
    v.append(x)
u_list = list(set(v))    

#above 18
a_df = pd.read_csv('/home/kiran/Downloads/MTP/Experiment1/ml-1m/Age/Train/ml1m_age_train_above18.csv')
a_df = a_df[['userid']]
new_header = a_df.iloc[0] #grab the first row for the header
a_df = a_df[1:] #take the data less the header row
a_df.columns = new_header #set the header row as the df header
vm = []
valsm = a_df.values
for [i] in valsm:
    x = i
    vm.append(x)
a_list = list(set(vm)) 


#store list of the count
u_count = len(u_list)
print("Under 18: ",u_count)
a_count = len(a_list)
print("Above 18: ",a_count)

l = []
for i in range(1,N+1):
    if i in u_list:
        l.append(0)
    else:
        l.append(1)

U = {}
gamma = 0.08
lamda = 0.01

def ClientFilling(Q,Uu,u,t):
    rui_prime = 0.0
    denom = 0
    for item in u:
        if item != 0:
            yuk  = 1
        else:
            yuk  = 0
        
        rui_prime += (yuk * item)
        denom += yuk
        
    return rui_prime/denom

'''ClientBatch'''

def ClientBatch(V, u, t, Uu, i, i_d,gamma):
    pred_v       = []
    error_sum    = 0
    error        = 0 
    grad         = np.zeros((1,K))
    index        = []
    V_UA_EF      = {}
    count        = 0
    no_samples   = 2
    sample_i     = []
    n_itemsrated = np.count_nonzero(u) + no_samples
    
    '''Assign virtual ratings'''
    #save unrated items
    for i in range(M):
        if u[i]==0:
            sample_i.append(i)
            
    #Randomly sample some of these items
    random_sample = sample(sample_i , no_samples )
    
    #get virtual rating
    rui_prime = ClientFilling(V, Uu, u, t)    
            
    
    '''Update user gradient'''
    for j in range(M):
        if u[j]!=0:
            pred       = np.dot(Uu,V[j])
            error      = pred - u[j]
            error_sq   = pow(error,2)
            error_sum += error_sq
            x          = (error * V[j])+(lamda * Uu)
            grad       = np.add(grad, x)
        
        #if user has rated item virtually
        if u[j]==0 and j in random_sample:
            pred       = np.dot(Uu,V[j])
            error      = pred - rui_prime
            error_sq   = pow(error,2)
            error_sum += error_sq
            x          = (error * V[j])+(lamda * Uu)
            grad       = np.add(grad, x)
        
        
    '''Update user vector'''
    grad   = grad / n_itemsrated
    Uu     = Uu - (gamma * grad)
    U[i_d] = Uu
    
    '''Update item gradient''' 
    for item in u:
        
        #if user has rated item actually
        if item != 0:
            index.append(count)
            pred           = np.dot(Uu,V[count])
            V_ui           = ((pred - item)*Uu) + (lamda * V[count] )
            V_UA_EF[count] = V_ui
        
    
        #if user has rated itemm virtually
        if item == 0 and count in random_sample:
            index.append(count)
            pred           = np.dot(Uu,V[count])
            V_ui           = ((pred - rui_prime)*Uu) + (lamda * V[count])
            V_UA_EF[count] = V_ui
        count +=1
    
    '''RMSE of 1 user'''
    RMSE = np.sqrt(error_sum/n_itemsrated)
    
    '''Store final predicted matrix'''
    if(t==19):
        for j in range(M):
            p = np.dot(Uu , V[i])
            pred_v.append(p[0])
            
    '''Group-wise RMSE'''
    #disadvantaged group
    if i_d in u_list:
        demographic = 0
        return RMSE, V_UA_EF, Uu, demographic , error_sum/n_itemsrated , pred_v
    #advantaged group
    else:
        demographic = 1
        return RMSE, V_UA_EF, Uu, demographic , error_sum/n_itemsrated , pred_v

'''Batch FedRec'''

V = np.random.uniform(low=0.1, high=0.9, size=(M,K))
localloss_list = []
dloss_list     = []
aloss_list     = []
bias_list      = []
for t in range(20):
    st = time.time()
    local_loss = 0
    local_data = []
    i_grad     = {}
    d_loss     = 0
    d_sqloss   = 0
    a_loss     = 0
    a_sqloss   = 0
    pred_v     = {}
    
    '''Send item vectors to users'''
    for i in range(N):
        u   = R[i,:]
        i_d = users[i]
        
        if(t==0):
            Uu = np.random.uniform(low=0.1, high=0.9, size=(1,K))
        else:
            Uu = U[i_d]
        local_data.append(tuple((V, u, t, Uu, i, i_d,gamma)))
        
    '''Train Clients Locally'''
    with mp.Pool() as pool:
        ret = pool.starmap(ClientBatch, local_data)
        
    '''Retrieve item gradients and losses of each local client'''
    for i in range(N):
        i_d         = users[i]
        RMSE        = ret[i][0]
        grad        = ret[i][1]
        U[i_d]      = ret[i][2]
        i_grad[i_d] = grad
        local_loss += RMSE
        demographic = ret[i][3]
        
        #retrieve predicted vector
        if(t==19):
            pred = ret[i][5]
            pred_v[i] = pred            
        
        '''if disadvantaged'''
        if(demographic == 0):
            d_loss   += RMSE
            d_sqloss += ret[i][4]
            
        '''if    advantaged'''
        if(demographic == 1):
            a_loss   += RMSE
            a_sqloss += ret[i][4]
            
    '''Store the final losses'''
    
    #into lists
    localloss_list.append(local_loss / N)
    dloss_list.append(d_loss / u_count)
    aloss_list.append(a_loss / a_count)
    bias_list.append(((d_sqloss/u_count)-(a_sqloss/a_count)))
    
    #into df
    output_df = pd.DataFrame(list(zip(localloss_list , dloss_list , aloss_list , bias_list)),
               columns =['  RMSE  ', '  RMSE_Disadv  ' , '  RMSE_adv. ' , '  Demographic bias  '])
    #into csv
    output_df.to_csv('1.csv')
    
    #store predicted matrix
    if(t==19):
        with open("1_pred.txt" , "w") as f:
            for key,value in pred_v.items():
                f.write("%s:%s\n"%(key,value))
                
    '''Randomply sub-sample tau% of total clients'''
    sample_users = np.random.choice(users, size=tau, replace=False)
    
    '''Gradient aggregation at server'''
    for i in range(M):
        #if(t == 1):
            #print("ITEM NO:",i)
        sum_gradi    = np.zeros((1,K))
        n_usersrated = 0
        grads_item   = []
        for j in sample_users:
            grad_i = i_grad[j]
            for key,val in grad_i.items():
                if i == key:
                    n_usersrated += 1
                    grads_item.append(val)
        if(n_usersrated >=1):
            for grad in grads_item:
                grad      = np.array(grad)
                sum_gradi = np.add(sum_gradi,grad)
        
            grad_vi = sum_gradi / n_usersrated
            V[i]    = V[i] - (gamma*grad_vi)
            
    et = time.time()
    print('Execution Time : ',et - st)
    gamma = 0.9*gamma



