import pandas as pd
from collections import defaultdict

# Specify the file paths for the dataset files
users_path = "../datasets/ml-1m/users.dat"
ratings_path = "../datasets/ml-1m/ratings.dat"
movies_path = "../datasets/ml-1m/movies.dat"


# Define column names for each dataset
users_cols = ['user_id', 'gender', 'age', 'occupation', 'zip_code']
ratings_cols = ['user_id', 'movie_id', 'rating', 'timestamp']
movies_cols = ['movie_id', 'title', 'genres']

# Load data into Pandas DataFrames
users_df = pd.read_csv(users_path, sep='::', header=None, names=users_cols, encoding='latin-1', engine='python')
ratings_df = pd.read_csv(ratings_path, sep='::', header=None, names=ratings_cols, encoding='latin-1', engine='python')
movies_df = pd.read_csv(movies_path, sep='::', header=None, names=movies_cols, encoding='latin-1', engine='python')

# Optionally, convert DataFrames to NumPy arrays/matrices
users_array = users_df.values
ratings_array = ratings_df.values
movies_array = movies_df.values

ratings_df = pd.merge(ratings_df, movies_df)[['user_id', 'title', 'rating', 'timestamp']]
ratings_df["user_id"] = ratings_df["user_id"].astype(str)
user_lookup = {v: i+1 for i, v in enumerate(ratings_df['user_id'].unique())}
movie_lookup = {v: i+1 for i, v in enumerate(ratings_df['title'].unique())}
ratings_df['movie_id'] = ratings_df['title'].map(movie_lookup)
ratings_df['user_int'] = ratings_df['user_id'].map(user_lookup)
ratings_per_user = ratings_df.groupby('user_id').rating.count()
ratings_per_item = ratings_df.groupby('movie_id').rating.count()
sorted_ratings_per_item = ratings_per_item.sort_values(ascending=False)
user_item_rating_tuples = ratings_df[['user_int', 'movie_id', 'rating']].values.tolist()

from surprise import Dataset, Reader
from surprise.model_selection import cross_validate
from surprise import SVD
from surprise import accuracy

reader = Reader()
data = Dataset.load_from_df(ratings_df[['user_int', 'movie_id', 'rating']], reader)

# Retrieve the trainset
trainset = data.build_full_trainset()

from surprise import dump

file_path = 'surprise_model_full.dump'

# Load the saved model
_, model = dump.load(file_path)
def get_last_n_ratings_by_user(
    df, n, min_ratings_per_user=1, user_colname="user_id", timestamp_colname="timestamp"
):
    return (
        df.groupby(user_colname)
        .filter(lambda x: len(x) >= min_ratings_per_user)
        .sort_values(timestamp_colname)
        .groupby(user_colname)
        .tail(n)
        .sort_values(user_colname)
    )
def mark_last_n_ratings_as_validation_set(
    df, n, min_ratings=1, user_colname="user_id", timestamp_colname="timestamp"
):
    """
    Mark the chronologically last n ratings as the validation set.
    This is done by adding the additional 'is_valid' column to the df.
    :param df: a DataFrame containing user item ratings
    :param n: the number of ratings to include in the validation set
    :param min_ratings: only include users with more than this many ratings
    :param user_id_colname: the name of the column containing user ids
    :param timestamp_colname: the name of the column containing the imestamps
    :return: the same df with the additional 'is_valid' column added
    """
    df["is_valid"] = False
    df.loc[
        get_last_n_ratings_by_user(
            df,
            n,
            min_ratings,
            user_colname=user_colname,
            timestamp_colname=timestamp_colname,
        ).index,
        "is_valid",
    ] = True

    return df
mark_last_n_ratings_as_validation_set(ratings_df, 1)
train_df = ratings_df[ratings_df.is_valid==False]
valid_df = ratings_df[ratings_df.is_valid==True]
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate
from surprise import SVD
from surprise import accuracy

reader = Reader()
data = Dataset.load_from_df(train_df[['user_int', 'movie_id', 'rating']], reader)
# Retrieve the trainset
trainset1 = data.build_full_trainset()

from surprise import dump

file_path = 'surprise_model_last_item.dump'

# Load the saved model
_, model_last_item = dump.load(file_path)
def get_last_n_ratings_by_user(
    df, n, min_ratings_per_user=1, user_colname="user_id", timestamp_colname="timestamp"
):
    return (
        df.groupby(user_colname)
        .filter(lambda x: len(x) >= min_ratings_per_user)
        .sort_values(timestamp_colname)
        .groupby(user_colname)
        .tail(n)
        .sort_values(user_colname)
    )
def mark_last_n_ratings_as_validation_set(
    df, n, min_ratings=1, user_colname="user_id", timestamp_colname="timestamp"
):
    """
    Mark the chronologically last n ratings as the validation set.
    This is done by adding the additional 'is_valid' column to the df.
    :param df: a DataFrame containing user item ratings
    :param n: the number of ratings to include in the validation set
    :param min_ratings: only include users with more than this many ratings
    :param user_id_colname: the name of the column containing user ids
    :param timestamp_colname: the name of the column containing the imestamps
    :return: the same df with the additional 'is_valid' column added
    """
    df["is_valid"] = False
    df.loc[
        get_last_n_ratings_by_user(
            df,
            n,
            min_ratings,
            user_colname=user_colname,
            timestamp_colname=timestamp_colname,
        ).index,
        "is_valid",
    ] = True

    return df
mark_last_n_ratings_as_validation_set(ratings_df, 5)
train_df_5 = ratings_df[ratings_df.is_valid==False]
valid_df_5 = ratings_df[ratings_df.is_valid==True]
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate
from surprise import SVD
from surprise import accuracy

reader = Reader()
data = Dataset.load_from_df(train_df_5[['user_int', 'movie_id', 'rating']], reader)

# Retrieve the trainset
trainset5 = data.build_full_trainset()

from surprise import dump

file_path = 'surprise_model_last_five.dump'

# Load the saved model
_, model_last_five = dump.load(file_path)
user_item_matrix = trainset.ur
user_item_matrix_1 = trainset1.ur
user_item_matrix_5 = trainset5.ur
# Convert to a dictionary of dictionaries
user_item_rating_dict = defaultdict(dict)

# Populate the user_item_rating_dict
for user, items_ratings in user_item_matrix.items():
    items_dict = {item: rating for item, rating in items_ratings}
    user_item_rating_dict[user] = items_dict
import torch
user_item_rating_tensor = {}

# Iterate over each user_id and their item-rating dictionary
for user_id, item_rating_dict in user_item_rating_dict.items():
    # Convert the item-rating dictionary to a list of tuples
    item_rating_list = list(item_rating_dict.items())
    
    # Convert the list of tuples to a tensor
    item_rating_tensors = [torch.tensor([[item_id, rating]], dtype=torch.float) for item_id, rating in item_rating_list]
    
    # Stack the list of tensors along a new dimension to create a single tensor
    item_rating_tensor = torch.stack(item_rating_tensors)
    
    # Store the item-rating tensor in the converted dictionary with the user_id as the key
    user_item_rating_tensor[user_id] = item_rating_tensor

# Convert to a dictionary of dictionaries
user_item_rating_dict1 = defaultdict(dict)

# Populate the user_item_rating_dict
for user, items_ratings in user_item_matrix_1.items():
    items_dict = {item: rating for item, rating in items_ratings}
    user_item_rating_dict1[user] = items_dict

import torch
user_item_rating_tensor1 = {}

# Iterate over each user_id and their item-rating dictionary
for user_id, item_rating_dict in user_item_rating_dict1.items():
    # Convert the item-rating dictionary to a list of tuples
    item_rating_list = list(item_rating_dict.items())
    
    # Convert the list of tuples to a tensor
    item_rating_tensors = [torch.tensor([[item_id, rating]], dtype=torch.float) for item_id, rating in item_rating_list]
    
    # Stack the list of tensors along a new dimension to create a single tensor
    item_rating_tensor = torch.stack(item_rating_tensors)
    
    # Store the item-rating tensor in the converted dictionary with the user_id as the key
    user_item_rating_tensor1[user_id] = item_rating_tensor

# Convert to a dictionary of dictionaries
user_item_rating_dict5 = defaultdict(dict)

# Populate the user_item_rating_dict
for user, items_ratings in user_item_matrix_5.items():
    items_dict = {item: rating for item, rating in items_ratings}
    user_item_rating_dict5[user] = items_dict
    
import torch
user_item_rating_tensor5 = {}

# Iterate over each user_id and their item-rating dictionary
for user_id, item_rating_dict in user_item_rating_dict5.items():
    # Convert the item-rating dictionary to a list of tuples
    item_rating_list = list(item_rating_dict.items())
    
    # Convert the list of tuples to a tensor
    item_rating_tensors = [torch.tensor([[item_id, rating]], dtype=torch.float) for item_id, rating in item_rating_list]
    
    # Stack the list of tensors along a new dimension to create a single tensor
    item_rating_tensor = torch.stack(item_rating_tensors)
    
    # Store the item-rating tensor in the converted dictionary with the user_id as the key
    user_item_rating_tensor5[user_id] = item_rating_tensor
    
import numpy as np
import torch
from tqdm import tqdm

def update_user_tensor_single(user_vector, items, ratings):
    Q_list = [model_last_item.qi[item] for item in items]
    Q = torch.tensor(Q_list, dtype=torch.float64)
    # p = np.linalg.inv(Q.T @ Q) @ Q.T @ ratings
    p = torch.inverse(Q.t() @ Q) @ Q.t() @ ratings
    return p
def update_user_tensor(user_vector, items, ratings):
    Q_list = [model.qi[item] for item in items]
    Q = torch.tensor(Q_list, dtype=torch.float64)
    # p = np.linalg.inv(Q.T @ Q) @ Q.T @ ratings
    p = torch.inverse(Q.t() @ Q) @ Q.t() @ ratings
    return p
def update_user_tensor_five(user_vector, items, ratings):
    Q_list = [model_last_five.qi[item] for item in items]
    Q = torch.tensor(Q_list, dtype=torch.float64)
    # p = np.linalg.inv(Q.T @ Q) @ Q.T @ ratings
    p = torch.inverse(Q.t() @ Q) @ Q.t() @ ratings
    return p

item_user_matrix = trainset.ir
item_user_matrix1 = trainset1.ir
item_user_matrix5 = trainset5.ir
# Convert to a dictionary of dictionaries
item_user_rating_dict = defaultdict(dict)

# Populate the user_item_rating_dict
for item, users_ratings in item_user_matrix.items():
    users_dict = {user: rating for user, rating in users_ratings}
    item_user_rating_dict[item] = users_dict

item_user_rating_tensor = {}

# Iterate over each user_id and their item-rating dictionary
for item_id, user_rating_dict in item_user_rating_dict.items():
    # Convert the item-rating dictionary to a list of tuples
    user_rating_list = list(user_rating_dict.items())
    
    # Convert the list of tuples to a tensor
    user_rating_tensors = [torch.tensor([[user_id, rating]], dtype=torch.float) for user_id, rating in user_rating_list]
    user_rating_tensor = torch.stack(user_rating_tensors)

    # Store the item-rating tensor in the converted dictionary with the user_id as the key
    item_user_rating_tensor[item_id] = user_rating_tensor

# Convert to a dictionary of dictionaries
item_user_rating_dict1 = defaultdict(dict)

# Populate the user_item_rating_dict
for item, users_ratings in item_user_matrix1.items():
    users_dict = {user: rating for user, rating in users_ratings}
    item_user_rating_dict1[item] = users_dict
    
import torch
item_user_rating_tensor1 = {}

# Iterate over each user_id and their item-rating dictionary
for item_id, user_rating_dict in item_user_rating_dict1.items():
    # Convert the item-rating dictionary to a list of tuples
    user_rating_list = list(user_rating_dict.items())
    
    # Convert the list of tuples to a tensor
    user_rating_tensors = [torch.tensor([[user_id, rating]], dtype=torch.float) for user_id, rating in user_rating_list]
    user_rating_tensor = torch.stack(user_rating_tensors)
    
    # Store the item-rating tensor in the converted dictionary with the user_id as the key
    item_user_rating_tensor1[item_id] = user_rating_tensor
    
# Convert to a dictionary of dictionaries
item_user_rating_dict5 = defaultdict(dict)

# Populate the user_item_rating_dict
for item, users_ratings in item_user_matrix5.items():
    users_dict = {user: rating for user, rating in users_ratings}
    item_user_rating_dict5[item] = users_dict
    
import torch
item_user_rating_tensor5 = {}

# Iterate over each user_id and their item-rating dictionary
for item_id, user_rating_dict in item_user_rating_dict5.items():
    # Convert the item-rating dictionary to a list of tuples
    user_rating_list = list(user_rating_dict.items())
    
    # Convert the list of tuples to a tensor
    user_rating_tensors = [torch.tensor([[user_id, rating]], dtype=torch.float) for user_id, rating in user_rating_list]
    user_rating_tensor = torch.stack(user_rating_tensors)
    
    # Store the item-rating tensor in the converted dictionary with the user_id as the key
    item_user_rating_tensor5[item_id] = user_rating_tensor


#Finalized
#What I compare with is the first value
torch.set_printoptions(precision=7)
print("Starting now...")

user_lstm = torch.nn.LSTM(2, 100)
item_lstm = torch.nn.LSTM(2, 100)
user_lstm.load_state_dict(torch.load('user_model.pth'))
item_lstm.load_state_dict(torch.load('item_model.pth'))
torch.cuda.set_device(6)

import torch.nn.functional as F
def past_user_item_reachability_lstm(user_id, item_id, past_time):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    item_to_be_reached = item_id
    item_and_rating = trainset.ur[user_id][-past_time:]
    chosen_items = [i[0] for i in item_and_rating]
    chosen_ratings = [i[1] for i in item_and_rating]
    user_action = torch.tensor(chosen_ratings, requires_grad=True, dtype=torch.float64).to(device).clone().detach().requires_grad_(True)
    optimizer = torch.optim.Adam([user_action], lr=8)
    
    item_lstm.to(device)
    user_lstm.to(device)
    
    final_rating = 0
    init_val = 0
    for epoch in range(1, 5):
        user_h = user_item_rating_tensor5[user_id].detach().clone().to(device)
        user_action_clamped = user_action.clamp(1, 5).to(device)
        rating_tensor = user_h.to(device)
        _, (user_vector_initial, _) = user_lstm(rating_tensor)
        user_vector_initial = user_vector_initial[-1]
        
        time_max = past_time
        n = len(rating_tensor)
        zeros_to_add = torch.zeros(time_max, 1, 2).to(device)
        ratings = torch.cat((rating_tensor, zeros_to_add), dim=0)
        
        for timestep in range(0, time_max):
            curr_item = chosen_items[timestep]
            ratings[n+timestep][0][0] = curr_item
            ratings[n+timestep][0][1] = user_action_clamped[timestep]
        _, (user_vector, _) = user_lstm(ratings)
        user_vector = user_vector[-1]
        item_scores = []
        for item_ in range(len(model_last_five.qi)):
            item_history_ = item_user_rating_tensor5[item_].to(device)
            _, (item_vector_, _) = item_lstm(item_history_)
            item_vector_ = item_vector_[-1]
            item_score = 0.8 * torch.dot(user_vector.view(-1), item_vector_.view(-1))
            item_scores.append(item_score)
        item_scores = torch.stack(item_scores)
        log_softmax_scores = F.log_softmax(item_scores, dim=0)
        target_item_score = log_softmax_scores[item_to_be_reached]
        if epoch == 1:
            init_val = target_item_score.item()
        target_item_score.backward()
        optimizer.step()
        optimizer.zero_grad()
        
    if init_val!=0:
        diff_val = init_val - target_item_score
        final_rating = torch.exp(diff_val).item()
    return final_rating, target_item_score.item()


n_users = len(model_last_five.pu)
n_items = len(model_last_five.qi)

import random

def generate_random_lists(n_items, n_users, num_samples=30):
    item_list = [random.randint(0, n_items - 1) for _ in range(num_samples)]
    user_list = [random.randint(0, n_users - 1) for _ in range(num_samples)]
    return item_list, user_list

#ok time to create influential and non-influential users
ratings_count = {}
for user_id, ratings1 in trainset.ur.items():
    # Count the number of ratings for the user and store it in ratings_count
    ratings_count[user_id] = len(ratings1)

# Sort the ratings_count dictionary by the number of ratings in descending order
sorted_ratings_count = sorted(ratings_count.items(), key=lambda x: x[1], reverse=True)

# Get the top 10 influential users based on the number of ratings
top_influential_users = [user_id for user_id, _ in sorted_ratings_count[:30]]

mid_users = [i for i, _ in sorted_ratings_count[200:230]]

top_influential_items = [i for i in sorted_ratings_per_item[:30].keys()]

mid_items = [i for i in sorted_ratings_per_item[200:230].keys()]


#items, users = generate_random_lists(n_items, n_users)
items = [3417, 2001, 239, 1642, 2584, 3400, 2172, 3699, 535, 6, 1087, 561, 698, 2531, 3440, 3164, 1326, 447, 3176, 451]# 276, 3208, 1109, 2385, 2675, 1212, 2654, 3051, 6, 305]
users = [485, 920, 196, 5, 773, 714, 537, 665, 192, 920, 964, 709, 766, 173, 97, 119, 947, 270, 623, 569]# 3, 733, 532, 632, 173, 211, 467, 285, 82, 175]
time_till = 5

def save_list_to_file(lst, filename):
    with open(filename, 'w') as file:
        for item in lst:
            file.write(str(item) + '\n')

all_reachabilities1 = []
final_reachabilities1 = []
for p_user in tqdm(top_influential_users):
    for p_item in items:
        ratio_val, reach_val = past_user_item_reachability_lstm(p_user, p_item, time_till)
        if ratio_val!=0:
            all_reachabilities1.append(ratio_val)
            final_reachabilities1.append(reach_val)
            
all_reachabilities2 = []
final_reachabilities2 = []
for p_user in tqdm(mid_users):
    for p_item in items:
        ratio_val, reach_val = past_user_item_reachability_lstm(p_user, p_item, time_till)
        if ratio_val!=0:
            all_reachabilities2.append(ratio_val)
            final_reachabilities2.append(reach_val)
            
all_reachabilities3 = []
final_reachabilities3 = []
for p_user in tqdm(users):
    for p_item in top_influential_items:
        ratio_val, reach_val = past_user_item_reachability_lstm(p_user, p_item, time_till)
        if ratio_val!=0:
            all_reachabilities3.append(ratio_val)
            final_reachabilities3.append(reach_val)
            
all_reachabilities4 = []
final_reachabilities4 = []
for p_user in tqdm(users):
    for p_item in mid_items:
        ratio_val, reach_val = past_user_item_reachability_lstm(p_user, p_item, time_till)
        if ratio_val!=0:
            all_reachabilities4.append(ratio_val)
            final_reachabilities4.append(reach_val)

import statistics
import math

def calculate_mean_std(lst):
    # Using the statistics module
    mean = statistics.mean(lst)
    std_dev = statistics.stdev(lst)

    # Calculating manually
    manual_mean = sum(lst) / len(lst)
    manual_std_dev = math.sqrt(sum((x - manual_mean) ** 2 for x in lst) / len(lst))

    print(f"Mean (using statistics module): {mean}")
    print(f"Standard Deviation (using statistics module): {std_dev}")
    print(f"Mean (calculated manually): {manual_mean}")
    print(f"Standard Deviation (calculated manually): {manual_std_dev}")

calculate_mean_std(all_reachabilities1)

save_list_to_file(all_reachabilities1, 'aggregate/past_reachability_lstm_top_users_all_items.txt')  
save_list_to_file(final_reachabilities1, 'aggregate/past_freachability_lstm_top_users_all_items.txt')    

calculate_mean_std(all_reachabilities2)

save_list_to_file(all_reachabilities2, 'aggregate/past_reachability_lstm_mid_users_all_items.txt')  
save_list_to_file(final_reachabilities2, 'aggregate/past_freachability_lstm_mid_users_all_items.txt')    

calculate_mean_std(all_reachabilities3)

save_list_to_file(all_reachabilities3, 'aggregate/past_reachability_lstm_all_users_top_items.txt')  
save_list_to_file(final_reachabilities3, 'aggregate/past_freachability_lstm_all_users_top_items.txt')    

calculate_mean_std(all_reachabilities4)

save_list_to_file(all_reachabilities4, 'aggregate/past_reachability_lstm_all_users_mid_items.txt')  
save_list_to_file(final_reachabilities4, 'aggregate/past_freachability_lstm_all_users_mid_items.txt')          
