import pandas as pd
import math
from collections import defaultdict

# Specify the file paths for the dataset files
users_path = "../datasets/ml-1m/users.dat"
ratings_path = "../datasets/ml-1m/ratings.dat"
movies_path = "../datasets/ml-1m/movies.dat"


# Define column names for each dataset
users_cols = ['user_id', 'gender', 'age', 'occupation', 'zip_code']
ratings_cols = ['user_id', 'movie_id', 'rating', 'timestamp']
movies_cols = ['movie_id', 'title', 'genres']

# Load data into Pandas DataFrames
users_df = pd.read_csv(users_path, sep='::', header=None, names=users_cols, encoding='latin-1', engine='python')
ratings_df = pd.read_csv(ratings_path, sep='::', header=None, names=ratings_cols, encoding='latin-1', engine='python')
movies_df = pd.read_csv(movies_path, sep='::', header=None, names=movies_cols, encoding='latin-1', engine='python')

# Optionally, convert DataFrames to NumPy arrays/matrices
users_array = users_df.values
ratings_array = ratings_df.values
movies_array = movies_df.values

ratings_df = pd.merge(ratings_df, movies_df)[['user_id', 'title', 'rating', 'timestamp']]
ratings_df["user_id"] = ratings_df["user_id"].astype(str)
user_lookup = {v: i+1 for i, v in enumerate(ratings_df['user_id'].unique())}
movie_lookup = {v: i+1 for i, v in enumerate(ratings_df['title'].unique())}
ratings_df['movie_id'] = ratings_df['title'].map(movie_lookup)
ratings_df['user_int'] = ratings_df['user_id'].map(user_lookup)
ratings_per_user = ratings_df.groupby('user_id').rating.count()
ratings_per_item = ratings_df.groupby('movie_id').rating.count()
sorted_ratings_per_item = ratings_per_item.sort_values(ascending=False)
user_item_rating_tuples = ratings_df[['user_int', 'movie_id', 'rating']].values.tolist()

from surprise import Dataset, Reader
from surprise.model_selection import cross_validate
from surprise import SVD
from surprise import accuracy

reader = Reader()
data = Dataset.load_from_df(ratings_df[['user_int', 'movie_id', 'rating']], reader)

# Retrieve the trainset
trainset = data.build_full_trainset()

from surprise import dump

file_path = 'surprise_model_full.dump'

# Load the saved model
_, model = dump.load(file_path)
def get_last_n_ratings_by_user(
    df, n, min_ratings_per_user=1, user_colname="user_id", timestamp_colname="timestamp"
):
    return (
        df.groupby(user_colname)
        .filter(lambda x: len(x) >= min_ratings_per_user)
        .sort_values(timestamp_colname)
        .groupby(user_colname)
        .tail(n)
        .sort_values(user_colname)
    )
def mark_last_n_ratings_as_validation_set(
    df, n, min_ratings=1, user_colname="user_id", timestamp_colname="timestamp"
):
    """
    Mark the chronologically last n ratings as the validation set.
    This is done by adding the additional 'is_valid' column to the df.
    :param df: a DataFrame containing user item ratings
    :param n: the number of ratings to include in the validation set
    :param min_ratings: only include users with more than this many ratings
    :param user_id_colname: the name of the column containing user ids
    :param timestamp_colname: the name of the column containing the imestamps
    :return: the same df with the additional 'is_valid' column added
    """
    df["is_valid"] = False
    df.loc[
        get_last_n_ratings_by_user(
            df,
            n,
            min_ratings,
            user_colname=user_colname,
            timestamp_colname=timestamp_colname,
        ).index,
        "is_valid",
    ] = True

    return df
mark_last_n_ratings_as_validation_set(ratings_df, 1)
train_df = ratings_df[ratings_df.is_valid==False]
valid_df = ratings_df[ratings_df.is_valid==True]
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate
from surprise import SVD
from surprise import accuracy

reader = Reader()
data = Dataset.load_from_df(train_df[['user_int', 'movie_id', 'rating']], reader)
# Retrieve the trainset
trainset1 = data.build_full_trainset()

from surprise import dump

file_path = 'surprise_model_last_item.dump'

# Load the saved model
_, model_last_item = dump.load(file_path)
def get_last_n_ratings_by_user(
    df, n, min_ratings_per_user=1, user_colname="user_id", timestamp_colname="timestamp"
):
    return (
        df.groupby(user_colname)
        .filter(lambda x: len(x) >= min_ratings_per_user)
        .sort_values(timestamp_colname)
        .groupby(user_colname)
        .tail(n)
        .sort_values(user_colname)
    )
def mark_last_n_ratings_as_validation_set(
    df, n, min_ratings=1, user_colname="user_id", timestamp_colname="timestamp"
):
    """
    Mark the chronologically last n ratings as the validation set.
    This is done by adding the additional 'is_valid' column to the df.
    :param df: a DataFrame containing user item ratings
    :param n: the number of ratings to include in the validation set
    :param min_ratings: only include users with more than this many ratings
    :param user_id_colname: the name of the column containing user ids
    :param timestamp_colname: the name of the column containing the imestamps
    :return: the same df with the additional 'is_valid' column added
    """
    df["is_valid"] = False
    df.loc[
        get_last_n_ratings_by_user(
            df,
            n,
            min_ratings,
            user_colname=user_colname,
            timestamp_colname=timestamp_colname,
        ).index,
        "is_valid",
    ] = True

    return df
mark_last_n_ratings_as_validation_set(ratings_df, 5)
train_df_5 = ratings_df[ratings_df.is_valid==False]
valid_df_5 = ratings_df[ratings_df.is_valid==True]
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate
from surprise import SVD
from surprise import accuracy

reader = Reader()
data = Dataset.load_from_df(train_df_5[['user_int', 'movie_id', 'rating']], reader)

# Retrieve the trainset
trainset5 = data.build_full_trainset()

from surprise import dump

file_path = 'surprise_model_last_five.dump'

# Load the saved model
_, model_last_five = dump.load(file_path)
user_item_matrix = trainset.ur
user_item_matrix_1 = trainset1.ur
user_item_matrix_5 = trainset5.ur
# Convert to a dictionary of dictionaries
user_item_rating_dict = defaultdict(dict)

# Populate the user_item_rating_dict
for user, items_ratings in user_item_matrix.items():
    items_dict = {item: rating for item, rating in items_ratings}
    user_item_rating_dict[user] = items_dict
import torch
user_item_rating_tensor = {}

# Iterate over each user_id and their item-rating dictionary
for user_id, item_rating_dict in user_item_rating_dict.items():
    # Convert the item-rating dictionary to a list of tuples
    item_rating_list = list(item_rating_dict.items())
    
    # Convert the list of tuples to a tensor
    item_rating_tensors = [torch.tensor([[item_id, rating]], dtype=torch.float) for item_id, rating in item_rating_list]
    
    # Stack the list of tensors along a new dimension to create a single tensor
    item_rating_tensor = torch.stack(item_rating_tensors)
    
    # Store the item-rating tensor in the converted dictionary with the user_id as the key
    user_item_rating_tensor[user_id] = item_rating_tensor

# Convert to a dictionary of dictionaries
user_item_rating_dict1 = defaultdict(dict)

# Populate the user_item_rating_dict
for user, items_ratings in user_item_matrix_1.items():
    items_dict = {item: rating for item, rating in items_ratings}
    user_item_rating_dict1[user] = items_dict

import torch
user_item_rating_tensor1 = {}

# Iterate over each user_id and their item-rating dictionary
for user_id, item_rating_dict in user_item_rating_dict1.items():
    # Convert the item-rating dictionary to a list of tuples
    item_rating_list = list(item_rating_dict.items())
    
    # Convert the list of tuples to a tensor
    item_rating_tensors = [torch.tensor([[item_id, rating]], dtype=torch.float) for item_id, rating in item_rating_list]
    
    # Stack the list of tensors along a new dimension to create a single tensor
    item_rating_tensor = torch.stack(item_rating_tensors)
    
    # Store the item-rating tensor in the converted dictionary with the user_id as the key
    user_item_rating_tensor1[user_id] = item_rating_tensor

# Convert to a dictionary of dictionaries
user_item_rating_dict5 = defaultdict(dict)

# Populate the user_item_rating_dict
for user, items_ratings in user_item_matrix_5.items():
    items_dict = {item: rating for item, rating in items_ratings}
    user_item_rating_dict5[user] = items_dict
    
import torch
user_item_rating_tensor5 = {}

# Iterate over each user_id and their item-rating dictionary
for user_id, item_rating_dict in user_item_rating_dict5.items():
    # Convert the item-rating dictionary to a list of tuples
    item_rating_list = list(item_rating_dict.items())
    
    # Convert the list of tuples to a tensor
    item_rating_tensors = [torch.tensor([[item_id, rating]], dtype=torch.float) for item_id, rating in item_rating_list]
    
    # Stack the list of tensors along a new dimension to create a single tensor
    item_rating_tensor = torch.stack(item_rating_tensors)
    
    # Store the item-rating tensor in the converted dictionary with the user_id as the key
    user_item_rating_tensor5[user_id] = item_rating_tensor
    
import numpy as np
import torch
from tqdm import tqdm

def update_user_tensor_single(user_vector, items, ratings):
    Q_list = [model_last_item.qi[item] for item in items]
    Q = torch.tensor(Q_list, dtype=torch.float64)
    # p = np.linalg.inv(Q.T @ Q) @ Q.T @ ratings
    p = torch.inverse(Q.t() @ Q) @ Q.t() @ ratings
    return p
def update_user_tensor(user_vector, items, ratings):
    Q_list = [model.qi[item] for item in items]
    Q = torch.tensor(Q_list, dtype=torch.float64)
    # p = np.linalg.inv(Q.T @ Q) @ Q.T @ ratings
    p = torch.inverse(Q.t() @ Q) @ Q.t() @ ratings
    return p
def update_user_tensor_five(user_vector, items, ratings):
    Q_list = [model_last_five.qi[item] for item in items]
    Q = torch.tensor(Q_list, dtype=torch.float64)
    # p = np.linalg.inv(Q.T @ Q) @ Q.T @ ratings
    p = torch.inverse(Q.t() @ Q) @ Q.t() @ ratings
    return p

item_user_matrix = trainset.ir
item_user_matrix1 = trainset1.ir
item_user_matrix5 = trainset5.ir
# Convert to a dictionary of dictionaries
item_user_rating_dict = defaultdict(dict)

# Populate the user_item_rating_dict
for item, users_ratings in item_user_matrix.items():
    users_dict = {user: rating for user, rating in users_ratings}
    item_user_rating_dict[item] = users_dict

item_user_rating_tensor = {}

# Iterate over each user_id and their item-rating dictionary
for item_id, user_rating_dict in item_user_rating_dict.items():
    # Convert the item-rating dictionary to a list of tuples
    user_rating_list = list(user_rating_dict.items())
    
    # Convert the list of tuples to a tensor
    user_rating_tensors = [torch.tensor([[user_id, rating]], dtype=torch.float) for user_id, rating in user_rating_list]
    user_rating_tensor = torch.stack(user_rating_tensors)

    # Store the item-rating tensor in the converted dictionary with the user_id as the key
    item_user_rating_tensor[item_id] = user_rating_tensor

# Convert to a dictionary of dictionaries
item_user_rating_dict1 = defaultdict(dict)

# Populate the user_item_rating_dict
for item, users_ratings in item_user_matrix1.items():
    users_dict = {user: rating for user, rating in users_ratings}
    item_user_rating_dict1[item] = users_dict
    
import torch
item_user_rating_tensor1 = {}

# Iterate over each user_id and their item-rating dictionary
for item_id, user_rating_dict in item_user_rating_dict1.items():
    # Convert the item-rating dictionary to a list of tuples
    user_rating_list = list(user_rating_dict.items())
    
    # Convert the list of tuples to a tensor
    user_rating_tensors = [torch.tensor([[user_id, rating]], dtype=torch.float) for user_id, rating in user_rating_list]
    user_rating_tensor = torch.stack(user_rating_tensors)
    
    # Store the item-rating tensor in the converted dictionary with the user_id as the key
    item_user_rating_tensor1[item_id] = user_rating_tensor
    
# Convert to a dictionary of dictionaries
item_user_rating_dict5 = defaultdict(dict)

# Populate the user_item_rating_dict
for item, users_ratings in item_user_matrix5.items():
    users_dict = {user: rating for user, rating in users_ratings}
    item_user_rating_dict5[item] = users_dict
    
import torch
item_user_rating_tensor5 = {}

# Iterate over each user_id and their item-rating dictionary
for item_id, user_rating_dict in item_user_rating_dict5.items():
    # Convert the item-rating dictionary to a list of tuples
    user_rating_list = list(user_rating_dict.items())
    
    # Convert the list of tuples to a tensor
    user_rating_tensors = [torch.tensor([[user_id, rating]], dtype=torch.float) for user_id, rating in user_rating_list]
    user_rating_tensor = torch.stack(user_rating_tensors)
    
    # Store the item-rating tensor in the converted dictionary with the user_id as the key
    item_user_rating_tensor5[item_id] = user_rating_tensor

def hellinger_distance(p, q):
    return torch.sqrt(torch.sum((torch.sqrt(p) - torch.sqrt(q)) ** 2)) / torch.sqrt(torch.tensor(2.0))

#Finalized
#What I compare with is the first value
torch.set_printoptions(precision=7)
print("Starting now...")

def get_top_recommendations_lstm(user_vector, sample_size, beta=0.8):
    n_items = len(item_user_rating_tensor)
    predicted_ratings = {index: None for index in range(0, n_items)}

    for item in range(0, n_items):
        item_rating = torch.dot(user_vector.view(-1), all_item_vectors[item].view(-1))
        predicted_ratings[item] = item_rating
    sorted_ratings = sorted(predicted_ratings.items(), key=lambda x: x[1], reverse=True)

    return sorted_ratings[0][0], sorted_ratings[0][1]

def get_top_recommendations_lstm(user_vector, sample_size, rated_already, beta=0.8):
    n_items = len(item_user_rating_tensor)
    predicted_ratings = {index: None for index in range(0, n_items)}
    for item in range(0, n_items):
        item_rating = torch.dot(user_vector.view(-1), all_item_vectors[item].view(-1))
        predicted_ratings[item] = item_rating
    
    sorted_ratings = sorted(predicted_ratings.items(), key=lambda x: x[1], reverse=True)
    
    # Find the first item that hasn't been rated
    for item, rating in sorted_ratings:
        if item not in rated_already:
            return item, rating
    
    # If all items have been rated, return None
    return None, None

user_lstm = torch.nn.LSTM(2, 100)
item_lstm = torch.nn.LSTM(2, 100)
user_lstm.load_state_dict(torch.load('user_model.pth'))
item_lstm.load_state_dict(torch.load('item_model.pth'))
torch.cuda.set_device(1)

def get_all_preferences_lstm(user_vector_1, item_vector_copy_h):
    # Compute ratings for each item by dot product with user_vector
    temp_vector = user_vector_1.detach().clone()
    item_ratings = torch.matmul(torch.stack(item_vector_copy_h), temp_vector.transpose(0,1))
    return item_ratings.squeeze(1) 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
item_lstm.to(device)
user_lstm.to(device)

all_item_vectors = []
n_items = len(item_user_rating_tensor)
for i in tqdm(range(0, n_items)):
    item_history = item_user_rating_tensor[i].to(device)
    _, (item_vector, _) = item_lstm(item_history)
    item_vector = item_vector[-1]
    all_item_vectors.append(item_vector)

import torch.nn.functional as F
def future_k_stability_lstm(adversary, curr_user, k):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    adversary_history = user_item_rating_tensor[adversary].to(device)
    _, (adversary_vector, _) = user_lstm(adversary_history)
    adversary_vector = adversary_vector[-1]
    chosen_ratings = [5.0] * k * len(model.qi)
    user_action = torch.tensor(chosen_ratings, dtype=torch.float64).to(device).clone().detach().requires_grad_(True)
    optimizer = torch.optim.Adam([user_action], lr=2)

    item_vector_copy = [vec.to(device) for vec in all_item_vectors]  # Move item vectors to GPU
    user_history = user_item_rating_tensor[curr_user].to(device)
    _, (user_vector, _) = user_lstm(user_history)
    user_vector = user_vector[-1]
    initial_rec_list = get_all_preferences_lstm(user_vector, item_vector_copy).detach()
    initial_rec_list_softmax = F.softmax(initial_rec_list, dim=0)
    
    n_epochs = 10
    distance_metric = None

    for epoch in tqdm(range(0, n_epochs)):
        item_vector_copy_tensor = [vec.clone().detach().requires_grad_(True) for vec in item_vector_copy]
        user_action_clamped = user_action.clamp(1, 5)
        for loop_var in range(0, k):
            curr_item, _ = get_top_recommendations_lstm(adversary_vector, sample_size=1)
            user_list = item_user_rating_tensor[curr_item].detach().clone().to(device)  # Move user list to GPU
            n = len(user_list)
            zeros_to_add = torch.zeros(1, 1, 2).to(device)  # Create zeros tensor on GPU
            ratings = torch.cat((user_list, zeros_to_add), dim=0)
            ratings[n][0][0] = curr_item
            ratings[n][0][1] = user_action_clamped[curr_item+loop_var]
            _, (item_vector_, _) = item_lstm(ratings[:n+1])
            item_vector_copy_tensor[curr_item] = item_vector_[-1]
        
        final_rec_list = get_all_preferences_lstm(user_vector, item_vector_copy_tensor)
        final_rec_list_softmax = F.softmax(final_rec_list, dim=0)
        distance_metric = -hellinger_distance(final_rec_list_softmax, initial_rec_list_softmax)
        distance_metric.backward()
        optimizer.step()
        optimizer.zero_grad()

    return distance_metric.item()


n_users = len(model_last_five.pu)
n_items = len(model_last_five.qi)

import random

def generate_random_lists(n_items, n_users, num_samples=30):
    item_list = [random.randint(0, n_items - 1) for _ in range(num_samples)]
    user_list = [random.randint(0, n_users - 1) for _ in range(num_samples)]
    return item_list, user_list

#items, users = generate_random_lists(n_items, n_users)
advs = [88, 657, 811, 998, 24, 180, 411, 839, 466, 704, 70, 84, 842, 967, 254, 567, 506, 302, 658, 490, 844, 54, 323, 625, 119, 566, 923, 432, 29, 934]
users = [595, 843, 451, 674, 762, 732, 636, 788, 989, 16, 39, 525, 33, 151, 19, 777, 255, 405, 277, 410, 590, 776, 846, 207, 439, 103, 706, 36, 650, 878]
time_till = 1

def save_list_to_file(lst, filename):
    with open(filename, 'w') as file:
        for item in lst:
            file.write(str(item) + '\n')

all_stabilities = []
for p_user in tqdm(users):
    for p_item in advs:
        dist_val = future_k_stability_lstm(p_item, p_user, time_till)
        if math.isnan(dist_val)==False:
            all_stabilities.append(dist_val)
        
import statistics

import math

def calculate_mean_std(lst):
    # Using the statistics module
    mean = statistics.mean(lst)
    std_dev = statistics.stdev(lst)

    # Calculating manually
    manual_mean = sum(lst) / len(lst)
    manual_std_dev = math.sqrt(sum((x - manual_mean) ** 2 for x in lst) / len(lst))

    print(f"Mean (using statistics module): {mean}")
    print(f"Standard Deviation (using statistics module): {std_dev}")
    print(f"Mean (calculated manually): {manual_mean}")
    print(f"Standard Deviation (calculated manually): {manual_std_dev}")

calculate_mean_std(all_stabilities)

save_list_to_file(all_stabilities, 'more_files/future_stability_lstm_1.txt')        