import pandas as pd
import math
from collections import defaultdict

# Specify the file paths for the dataset files
users_path = "../datasets/ml-1m/users.dat"
ratings_path = "../datasets/ml-1m/ratings.dat"
movies_path = "../datasets/ml-1m/movies.dat"


# Define column names for each dataset
users_cols = ['user_id', 'gender', 'age', 'occupation', 'zip_code']
ratings_cols = ['user_id', 'movie_id', 'rating', 'timestamp']
movies_cols = ['movie_id', 'title', 'genres']

# Load data into Pandas DataFrames
users_df = pd.read_csv(users_path, sep='::', header=None, names=users_cols, encoding='latin-1', engine='python')
ratings_df = pd.read_csv(ratings_path, sep='::', header=None, names=ratings_cols, encoding='latin-1', engine='python')
movies_df = pd.read_csv(movies_path, sep='::', header=None, names=movies_cols, encoding='latin-1', engine='python')

# Optionally, convert DataFrames to NumPy arrays/matrices
users_array = users_df.values
ratings_array = ratings_df.values
movies_array = movies_df.values

ratings_df = pd.merge(ratings_df, movies_df)[['user_id', 'title', 'rating', 'timestamp']]
ratings_df["user_id"] = ratings_df["user_id"].astype(str)
user_lookup = {v: i+1 for i, v in enumerate(ratings_df['user_id'].unique())}
movie_lookup = {v: i+1 for i, v in enumerate(ratings_df['title'].unique())}
ratings_df['movie_id'] = ratings_df['title'].map(movie_lookup)
ratings_df['user_int'] = ratings_df['user_id'].map(user_lookup)
ratings_per_user = ratings_df.groupby('user_id').rating.count()
ratings_per_item = ratings_df.groupby('movie_id').rating.count()
sorted_ratings_per_item = ratings_per_item.sort_values(ascending=False)
user_item_rating_tuples = ratings_df[['user_int', 'movie_id', 'rating']].values.tolist()

from surprise import Dataset, Reader
from surprise.model_selection import cross_validate
from surprise import SVD
from surprise import accuracy

reader = Reader()
data = Dataset.load_from_df(ratings_df[['user_int', 'movie_id', 'rating']], reader)

# Retrieve the trainset
trainset = data.build_full_trainset()

from surprise import dump

file_path = 'surprise_model_full.dump'

# Load the saved model
_, model = dump.load(file_path)
def get_last_n_ratings_by_user(
    df, n, min_ratings_per_user=1, user_colname="user_id", timestamp_colname="timestamp"
):
    return (
        df.groupby(user_colname)
        .filter(lambda x: len(x) >= min_ratings_per_user)
        .sort_values(timestamp_colname)
        .groupby(user_colname)
        .tail(n)
        .sort_values(user_colname)
    )
def mark_last_n_ratings_as_validation_set(
    df, n, min_ratings=1, user_colname="user_id", timestamp_colname="timestamp"
):
    """
    Mark the chronologically last n ratings as the validation set.
    This is done by adding the additional 'is_valid' column to the df.
    :param df: a DataFrame containing user item ratings
    :param n: the number of ratings to include in the validation set
    :param min_ratings: only include users with more than this many ratings
    :param user_id_colname: the name of the column containing user ids
    :param timestamp_colname: the name of the column containing the imestamps
    :return: the same df with the additional 'is_valid' column added
    """
    df["is_valid"] = False
    df.loc[
        get_last_n_ratings_by_user(
            df,
            n,
            min_ratings,
            user_colname=user_colname,
            timestamp_colname=timestamp_colname,
        ).index,
        "is_valid",
    ] = True

    return df
mark_last_n_ratings_as_validation_set(ratings_df, 1)
train_df = ratings_df[ratings_df.is_valid==False]
valid_df = ratings_df[ratings_df.is_valid==True]
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate
from surprise import SVD
from surprise import accuracy

reader = Reader()
data = Dataset.load_from_df(train_df[['user_int', 'movie_id', 'rating']], reader)
# Retrieve the trainset
trainset1 = data.build_full_trainset()

from surprise import dump

file_path = 'surprise_model_last_item.dump'

# Load the saved model
_, model_last_item = dump.load(file_path)
def get_last_n_ratings_by_user(
    df, n, min_ratings_per_user=1, user_colname="user_id", timestamp_colname="timestamp"
):
    return (
        df.groupby(user_colname)
        .filter(lambda x: len(x) >= min_ratings_per_user)
        .sort_values(timestamp_colname)
        .groupby(user_colname)
        .tail(n)
        .sort_values(user_colname)
    )
def mark_last_n_ratings_as_validation_set(
    df, n, min_ratings=1, user_colname="user_id", timestamp_colname="timestamp"
):
    """
    Mark the chronologically last n ratings as the validation set.
    This is done by adding the additional 'is_valid' column to the df.
    :param df: a DataFrame containing user item ratings
    :param n: the number of ratings to include in the validation set
    :param min_ratings: only include users with more than this many ratings
    :param user_id_colname: the name of the column containing user ids
    :param timestamp_colname: the name of the column containing the imestamps
    :return: the same df with the additional 'is_valid' column added
    """
    df["is_valid"] = False
    df.loc[
        get_last_n_ratings_by_user(
            df,
            n,
            min_ratings,
            user_colname=user_colname,
            timestamp_colname=timestamp_colname,
        ).index,
        "is_valid",
    ] = True

    return df
mark_last_n_ratings_as_validation_set(ratings_df, 5)
train_df_5 = ratings_df[ratings_df.is_valid==False]
valid_df_5 = ratings_df[ratings_df.is_valid==True]
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate
from surprise import SVD
from surprise import accuracy

reader = Reader()
data = Dataset.load_from_df(train_df_5[['user_int', 'movie_id', 'rating']], reader)

# Retrieve the trainset
trainset5 = data.build_full_trainset()

from surprise import dump

file_path = 'surprise_model_last_five.dump'

# Load the saved model
_, model_last_five = dump.load(file_path)
user_item_matrix = trainset.ur
user_item_matrix_1 = trainset1.ur
user_item_matrix_5 = trainset5.ur
# Convert to a dictionary of dictionaries
user_item_rating_dict = defaultdict(dict)

# Populate the user_item_rating_dict
for user, items_ratings in user_item_matrix.items():
    items_dict = {item: rating for item, rating in items_ratings}
    user_item_rating_dict[user] = items_dict
import torch
user_item_rating_tensor = {}

# Iterate over each user_id and their item-rating dictionary
for user_id, item_rating_dict in user_item_rating_dict.items():
    # Convert the item-rating dictionary to a list of tuples
    item_rating_list = list(item_rating_dict.items())
    
    # Convert the list of tuples to a tensor
    item_rating_tensors = [torch.tensor([[item_id, rating]], dtype=torch.float) for item_id, rating in item_rating_list]
    
    # Stack the list of tensors along a new dimension to create a single tensor
    item_rating_tensor = torch.stack(item_rating_tensors)
    
    # Store the item-rating tensor in the converted dictionary with the user_id as the key
    user_item_rating_tensor[user_id] = item_rating_tensor

# Convert to a dictionary of dictionaries
user_item_rating_dict1 = defaultdict(dict)

# Populate the user_item_rating_dict
for user, items_ratings in user_item_matrix_1.items():
    items_dict = {item: rating for item, rating in items_ratings}
    user_item_rating_dict1[user] = items_dict

import torch
user_item_rating_tensor1 = {}

# Iterate over each user_id and their item-rating dictionary
for user_id, item_rating_dict in user_item_rating_dict1.items():
    # Convert the item-rating dictionary to a list of tuples
    item_rating_list = list(item_rating_dict.items())
    
    # Convert the list of tuples to a tensor
    item_rating_tensors = [torch.tensor([[item_id, rating]], dtype=torch.float) for item_id, rating in item_rating_list]
    
    # Stack the list of tensors along a new dimension to create a single tensor
    item_rating_tensor = torch.stack(item_rating_tensors)
    
    # Store the item-rating tensor in the converted dictionary with the user_id as the key
    user_item_rating_tensor1[user_id] = item_rating_tensor

# Convert to a dictionary of dictionaries
user_item_rating_dict5 = defaultdict(dict)

# Populate the user_item_rating_dict
for user, items_ratings in user_item_matrix_5.items():
    items_dict = {item: rating for item, rating in items_ratings}
    user_item_rating_dict5[user] = items_dict
    
import torch
user_item_rating_tensor5 = {}

# Iterate over each user_id and their item-rating dictionary
for user_id, item_rating_dict in user_item_rating_dict5.items():
    # Convert the item-rating dictionary to a list of tuples
    item_rating_list = list(item_rating_dict.items())
    
    # Convert the list of tuples to a tensor
    item_rating_tensors = [torch.tensor([[item_id, rating]], dtype=torch.float) for item_id, rating in item_rating_list]
    
    # Stack the list of tensors along a new dimension to create a single tensor
    item_rating_tensor = torch.stack(item_rating_tensors)
    
    # Store the item-rating tensor in the converted dictionary with the user_id as the key
    user_item_rating_tensor5[user_id] = item_rating_tensor
    
import numpy as np
import torch
from tqdm import tqdm

def update_user_tensor_single(user_vector, items, ratings):
    Q_list = [model_last_item.qi[item] for item in items]
    Q = torch.tensor(Q_list, dtype=torch.float64)
    # p = np.linalg.inv(Q.T @ Q) @ Q.T @ ratings
    p = torch.inverse(Q.t() @ Q) @ Q.t() @ ratings
    return p
def update_user_tensor(user_vector, items, ratings):
    Q_list = [model.qi[item] for item in items]
    Q = torch.tensor(Q_list, dtype=torch.float64)
    # p = np.linalg.inv(Q.T @ Q) @ Q.T @ ratings
    p = torch.inverse(Q.t() @ Q) @ Q.t() @ ratings
    return p
def update_user_tensor_five(user_vector, items, ratings):
    Q_list = [model_last_five.qi[item] for item in items]
    Q = torch.tensor(Q_list, dtype=torch.float64)
    # p = np.linalg.inv(Q.T @ Q) @ Q.T @ ratings
    p = torch.inverse(Q.t() @ Q) @ Q.t() @ ratings
    return p

def get_all_recommendation_scores_deterministic(user_vector, sample_size, type_, beta=0.8):
    device = user_vector.device
    num_samples = sample_size

    if type_ == 'keepall':
        n_items = len(model.qi)
    if type_ == 'single':
        n_items = len(model_last_item.qi)
    if type_ == 'five':
        n_items = len(model_last_five.qi)

    predicted_ratings = {index: None for index in range(0, n_items)}

    if type_ == 'keepall':
        for item in range(0, n_items):
            item_rating = user_vector @ torch.tensor(model.qi[item])
            predicted_ratings[item] = item_rating
    if type_ == 'single':
        for item in range(0, n_items):
            item_rating = user_vector @ torch.tensor(model_last_item.qi[item])
            predicted_ratings[item] = item_rating
    if type_ == 'five':
        for item in range(0, n_items):
            item_rating = user_vector @ torch.tensor(model_last_five.qi[item])
            predicted_ratings[item] = item_rating

    # Find the item with the highest predicted rating
    max_item_index = max(predicted_ratings, key=predicted_ratings.get)
    max_item_score = predicted_ratings[max_item_index]

    return [max_item_index], [max_item_score]
#Finalized
#What I compare with is the first value
torch.set_printoptions(precision=7)
print("Starting now...")

def future_user_item_reachability(user_id, item_id, future_time):
    item_to_be_reached = item_id
    # item_and_rating =  trainset.ur[user_id][-future_time:]
    chosen_ratings = [5] * future_time * len(model.qi)
    user_action = torch.tensor(chosen_ratings, requires_grad=True, dtype=torch.float64)
    optimizer = torch.optim.Adam([user_action], lr=1.2)
    # reach_probabilities = []
    ratings_dict1 = user_item_rating_dict[user_id]
    for j in user_item_rating_dict[user_id]:
        ratings_dict1[j]=torch.tensor(ratings_dict1[j], dtype=torch.float64)
    ratings_dict = ratings_dict1
    final_rating=0
    
    rating_vals_ = torch.zeros(8)
    for upper_var in range(0,len(rating_vals_)):
        user_vector_ = torch.tensor(model.pu[user_id])
        already_rated_ = list(user_item_rating_dict[user_id].keys())
        ratings_old_ = torch.tensor(list(ratings_dict.values()))
        n = len(ratings_old_)
        zeros_to_add = torch.zeros(future_time)
        ratings_ = torch.cat((ratings_old_, zeros_to_add), dim=0)
        for timestep in range(0,future_time):
            recommendation, recommendation_score = get_all_recommendation_scores_deterministic(user_vector_, sample_size=1, type_="keepall")
            ratings_[n+timestep] = (user_vector_ @ torch.tensor(model.qi[recommendation[0]])).clamp(1,5)
            already_rated_.append(recommendation[0])
            user_vector_ = update_user_tensor(user_vector_, already_rated_, ratings_[:n+timestep+1])
        total_sum = 0
        for item in range(len(model.qi
                              )):
            total_sum += torch.exp(0.8 * torch.matmul(user_vector_, torch.tensor(model.qi[item])))
        rating_vals_[upper_var] = -torch.exp(0.8*torch.matmul(user_vector_, torch.tensor(model.qi[item_to_be_reached])))/total_sum
    init_val = torch.mean(rating_vals_)
    #print(init_val)
    
    for epoch in range(1, 8):
        rating_vals = torch.zeros(8)
        for int_var in range(0,len(rating_vals)):
            user_action_clamped = user_action.clamp(1, 5)
            rating_tensor = torch.tensor(list(ratings_dict.values()))
            user_vector_initial = torch.tensor(model.pu[user_id])
            user_vector = user_vector_initial
            time_max = future_time
            already_rated = list(user_item_rating_dict[user_id].keys())
            ratings_old = rating_tensor
            n = len(ratings_old)
            zeros_to_add = torch.zeros(time_max)
            ratings = torch.cat((ratings_old, zeros_to_add), dim=0)
            for timestep in range(0,time_max):
                recommendation, recommendation_score = get_all_recommendation_scores_deterministic(user_vector, sample_size=1, type_="keepall")
                ratings[n+timestep] = user_action_clamped[5*recommendation[0]+timestep]
                already_rated.append(recommendation[0])
                user_vector = update_user_tensor(user_vector, already_rated, ratings[:n+timestep+1])
            total_sum = 0
            for item in range(len(model.qi)):
                total_sum += torch.exp(0.8 * torch.matmul(user_vector, torch.tensor(model.qi[item])))
            rating_vals[int_var] = -torch.exp(0.8*torch.matmul(user_vector, torch.tensor(model.qi[item_to_be_reached])))/total_sum
            #print(item_rating)
        item_rating = sum(rating_vals)/len(rating_vals)
        item_rating.backward()
        optimizer.step()
        # print(user_action)
        optimizer.zero_grad()
    if init_val.item()!=0:
        final_rating = item_rating.item()/init_val.item()
    return final_rating, item_rating.item()

n_users = len(model_last_five.pu)
n_items = len(model_last_five.qi)

import random

def generate_random_lists(n_items, n_users, num_samples=30):
    item_list = [random.randint(0, n_items - 1) for _ in range(num_samples)]
    user_list = [random.randint(0, n_users - 1) for _ in range(num_samples)]
    return item_list, user_list

#ok time to create influential and non-influential users
ratings_count = {}
for user_id, ratings1 in trainset.ur.items():
    # Count the number of ratings for the user and store it in ratings_count
    ratings_count[user_id] = len(ratings1)

# Sort the ratings_count dictionary by the number of ratings in descending order
sorted_ratings_count = sorted(ratings_count.items(), key=lambda x: x[1], reverse=True)

# Get the top 10 influential users based on the number of ratings
top_influential_users = [user_id for user_id, _ in sorted_ratings_count[:30]]

mid_users = [i for i, _ in sorted_ratings_count[200:220]]

top_influential_items = [i for i in sorted_ratings_per_item[:20].keys()]

mid_items = [i for i in sorted_ratings_per_item[200:220].keys()]

items = [3417, 2001, 239, 1642, 2584, 3400, 2172, 3699, 535, 2447, 1087, 561, 698, 2531, 3440, 3164, 1326, 447, 3176, 451]#, 276, 3208, 1109, 2385, 2675, 1212, 2654, 3051, 6, 305]
users = [485, 920, 196, 5, 773, 714, 537, 665, 192, 920, 964, 709, 766, 173, 97, 119, 947, 270, 623, 569]#, 3, 733, 532, 632, 173, 211, 467, 285, 82, 175]
# items, users = generate_random_lists(n_items, n_users)
time_till = 5

def save_list_to_file(lst, filename):
    with open(filename, 'w') as file:
        for item in lst:
            file.write(str(item) + '\n')

all_reachabilities1 = []
final_reachabilities1 = []
for p_user in tqdm(top_influential_users):
    for p_item in items:
        ratio_val, reach_val = future_user_item_reachability(p_user, p_item, time_till)
        if ratio_val!=0:
            all_reachabilities1.append(ratio_val)
            final_reachabilities1.append(reach_val)
            
all_reachabilities2 = []
final_reachabilities2 = []
for p_user in tqdm(mid_users):
    for p_item in items:
        ratio_val, reach_val = future_user_item_reachability(p_user, p_item, time_till)
        if ratio_val!=0:
            all_reachabilities2.append(ratio_val)
            final_reachabilities2.append(reach_val)
            
all_reachabilities3 = []
final_reachabilities3 = []
for p_user in tqdm(users):
    for p_item in top_influential_items:
        ratio_val, reach_val = future_user_item_reachability(p_user, p_item, time_till)
        if ratio_val!=0:
            all_reachabilities3.append(ratio_val)
            final_reachabilities3.append(reach_val)
            
all_reachabilities4 = []
final_reachabilities4 = []
for p_user in tqdm(users):
    for p_item in mid_items:
        ratio_val, reach_val = future_user_item_reachability(p_user, p_item, time_till)
        if ratio_val!=0:
            all_reachabilities4.append(ratio_val)
            final_reachabilities4.append(reach_val)
        
import statistics
import math

def calculate_mean_std(lst):
    # Using the statistics module
    mean = statistics.mean(lst)
    std_dev = statistics.stdev(lst)

    # Calculating manually
    manual_mean = sum(lst) / len(lst)
    manual_std_dev = math.sqrt(sum((x - manual_mean) ** 2 for x in lst) / len(lst))

    print(f"Mean (using statistics module): {mean}")
    print(f"Standard Deviation (using statistics module): {std_dev}")
    print(f"Mean (calculated manually): {manual_mean}")
    print(f"Standard Deviation (calculated manually): {manual_std_dev}")

calculate_mean_std(all_reachabilities1)

save_list_to_file(all_reachabilities1, 'aggregate/future_reachability_top_users_all_items.txt')  
save_list_to_file(final_reachabilities1, 'aggregate/future_freachability_top_users_all_items.txt')    

calculate_mean_std(all_reachabilities2)

save_list_to_file(all_reachabilities2, 'aggregate/future_reachability_mid_users_all_items.txt')  
save_list_to_file(final_reachabilities2, 'aggregate/future_freachability_mid_users_all_items.txt')    

calculate_mean_std(all_reachabilities3)

save_list_to_file(all_reachabilities3, 'aggregate/future_reachability_all_users_top_items.txt')  
save_list_to_file(final_reachabilities3, 'aggregate/future_freachability_all_users_top_items.txt')    

calculate_mean_std(all_reachabilities4)

save_list_to_file(all_reachabilities4, 'aggregate/future_reachability_all_users_mid_items.txt')  
save_list_to_file(final_reachabilities4, 'aggregate/future_freachability_all_users_mid_items.txt')            
